o
    䯪g                     @   sp  d dl Z d dlmZ d dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Zd dl
mZ d dlmZ d dlmZmZmZ zd dlmZ d dlmZmZ W n eyW   dZY nw zd dlZd dlmZ d d	lmZ d d
lm Z  W n ey}   d ZZY nw zd dl!Z"W n ey   dZ"Y nw e	j#jZ$dd Z%dd Z&e	j#jdd Z'e	j#jdd Z(e	j#jdd Z)e	j#jdd Z*e	j#jdd Z+dd Z,dd Z-e	j#j.dd Z/e	j#jdd  Z0e	j#jd!d" Z1d#d$ Z2e	j#jd%d& Z3d'd( Z4e	j#5d)de6 ge	j#5d*d+d,d- Z7d.d/ Z8d0d1 Z9d2d3 Z:d4d5 Z;d6d7 Z<d8d9 Z=d:d; Z>d<d= Z?e	j#jd>d? Z@e	j#jd@dA ZAe	j#jdBdC ZBdDdE ZCdFdG ZDe	j#jdHdI ZEdJdK ZFe	j#je	j#jGe	j#HdLe	j#HdMdNdO ZIe	j#5dPdQdR dSdR dTdR dUdR ge	j#5dVdWdXgdYdZ ZJd[d\ ZKd]d^ ZLd_d` ZMdadb ZNdcdd ZOdedf ZPe	j#jQdgdh ZRe	j#jQdidj ZSdS )k    N)OrderedDict)copytree)Decimal)fs)util)_check_roundtrip_roundtrip_table_test_dataframe)_read_table_write_table)dataframe_with_lists)alltypes_samplec                 C   s   t dg di}tjtdd t|| d dd W d    n1 s$w   Y  tjtdd t|| d dd	 W d    d S 1 sEw   Y  d S )
Na         z"Unsupported Parquet format versionmatchztest_version.parquetz2.2versionz%Unsupported Parquet data page version)data_page_version)patablepytestraises
ValueErrorr   )tempdirr    r   \/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/pyarrow/tests/parquet/test_basic.pytest_parquet_invalid_version;   s   
"r    c                  C   sH   t g dd } t jj| gdgd}ddg}|D ]}t||d qd S )Nr   i f0namesi   i   )data_page_size)r   arrayTablefrom_arraysr   )arrt
page_sizestarget_page_sizer   r   r   test_set_data_page_sizeE   s   r,   c                  C   s,   t d} tjj| dd}t|dddd d S )Nd   Fpreserve_index
   r   2.4)r$   write_batch_sizer   )r	   r   r&   from_pandasr   dfr   r   r   r   test_set_write_batch_sizeO   s
   
r6   c                  C   sh   t d} tjj| dd}t|dddd tt t|dddd W d    d S 1 s-w   Y  d S )	Nr-   Fr.   r   r0   r1   )dictionary_pagesize_limitr$   r   r   )r	   r   r&   r3   r   r   r   	TypeErrorr4   r   r   r   "test_set_dictionary_pagesize_limitY   s   "r9   c               	   C   s   g } t jtdd}| t j|gd  t \}}t j|}| t j|gd  dD ]}dD ]}| D ]
}t|d||d q8q4q0d S )Nr0   sizer   )z1.0z2.0)TF2.6)r   r   use_dictionary)	r   RecordBatchr3   r   appendr&   from_batchesr   r   )tablesbatchr5   _r   r=   r   r   r   r   test_chunked_table_writef   s"   
rD   c                 C   s   t dd}tj|}t|ddidd t| d }t|d}t||dd	 W d    n1 s1w   Y  tj	|dd
}|
|sDJ d S )Nr0   r:   
memory_mapTr<   read_table_kwargsr   tmp_filewbr   )rE   r   r   r&   r3   r   stropenr   pqread_pandasequalsr   r5   r   filenamef
table_readr   r   r   test_memory_mapy      

rT   c                 C   s   t dd}tj|}t|ddidd t| d }t|d}t||dd	 W d    n1 s1w   Y  tj	|d
d}|
|sDJ d S )Nr0   r:   buffer_sizei  r<   rF   rH   rI   r   i   )rV   rJ   rP   r   r   r   test_enable_buffered_stream   rU   rW   c                 C   sj   t jt dggdg}d}| | }| rJ t|t| | s&J tt|}||s3J d S )N*   intsz	foo # bar)	r   r&   r'   r%   existsr   rK   r
   rO   )r   r   rQ   pathrS   r   r   r   test_special_chars_filename   s   r\   c                   C   sv   t jtdd td  W d    n1 sw   Y  t jtdd td  W d    d S 1 s4w   Y  d S )NNoner   )r   r   r8   rM   
read_tableParquetFiler   r   r   r   test_invalid_source   s   "r`   c                  C   s*   t jttdgdgd} t| dd d S )Ni@  r!   r"   r   )row_group_size)r   r   listranger   )r)   r   r   r   (test_file_with_over_int16_max_row_groups   s   rd   c                  C   s   t dd} tj| }tjjdd | D |jjd}|jdj	t
 ks)J |jdj	tt
 ks9J t|dd	 d S )
Nr0   r:   c                 S   s   g | ]}| d dd  qS )r   N)chunk).0colr   r   r   
<listcomp>   s    z.test_empty_table_roundtrip.<locals>.<listcomp>r"   null	null_listr<   r   )r   r   r&   r3   r'   itercolumnsschemar#   fieldtyperi   list_r   r4   r   r   r   test_empty_table_roundtrip   s   
 
rp   c                  C   s$   t  } tjj| dd}t| d S )NFr.   )pd	DataFramer   r&   r3   r   )r5   emptyr   r   r   test_empty_table_no_columns   s   rt   c                     sp   t t tt d g t dddgg}  fdd| D } fdd|D }tj|t }t| d S )N)int32list_stringr   )Gc                    s$   g | ]}t j|t  d  qS )rn   )r   r%   structflattenrf   rB   colsr   r   rh      s    zEtest_write_nested_zero_length_array_chunk_failure.<locals>.<listcomp>c                    s"   g | ]}t jj|t  d qS ))rl   )r   r>   r'   rl   r{   r|   r   r   rh      s    )	r   r   ru   ro   stringr&   r@   rl   r   )data	my_arrays
my_batchestblr   r|   r   1test_write_nested_zero_length_array_chunk_failure   s   

r   c                 C   s   | d }t dtjdtjdi}t|| t|}| }t	|| t
| d }t dtjdtjdi}t|| t|}| }t	|| d S )Nzzzz.parquetxr0   dtype)rq   rr   nparangeint64r   r
   	to_pandastmassert_frame_equalrK   )r   r[   r5   rS   df_readr   r   r   test_multiple_path_types   s   

r   c                 C   s   | d }t dg di}t|| t|}t|}||s"J tt	 t|t
 d W d    d S 1 s;w   Y  d S )Ntest.parquetr   r   
filesystem)r   r   r   r   FSProtocolClassr
   rO   r   r   r8   r   
FileSystem)r   r[   r   fs_protocol_objresultr   r   r   test_fspath   s   

"r   r   name)data.parquetu   例.parquetc                 C   s   t dg di}| | }t|t| t|  tj||d}W d    n1 s,w   Y  ||s8J |	  |
 rBJ t|  tj|||d W d    n1 sZw   Y  t|}||skJ d S )Nr   r   r   )r   r   rM   write_tablerK   r   
change_cwdr^   rO   unlinkrZ   )r   r   r   r   r[   r   r   r   r   test_relative_paths  s   
r   c                   C   s:   t t td W d    d S 1 sw   Y  d S )Nzi-am-not-existing.parquet)r   r   FileNotFoundErrorrM   r^   r   r   r   r   test_read_non_existing_file&  s   "r   c                  C   sT   G dd dt j} tjtdd t| d W d    d S 1 s#w   Y  d S )Nc                   @   s   e Zd Zdd Zdd ZdS )z3test_file_error_python_exception.<locals>.BogusFilec                 W      t dNzorglubZeroDivisionErrorselfargsr   r   r   read.     z8test_file_error_python_exception.<locals>.BogusFile.readc                 W   r   r   r   r   r   r   r   seek1  r   z8test_file_error_python_exception.<locals>.BogusFile.seekN)__name__
__module____qualname__r   r   r   r   r   r   	BogusFile-  s    r   r   r       )ioBytesIOr   r   r   rM   r^   )r   r   r   r    test_file_error_python_exception,  s   "r   c                 C   s   t dg di}t|t| d  tt| d d}t|}W d    n1 s,w   Y  ||s8J tt| d d}tt |}W d    n1 sTw   Y  ||s`J d S )Nr   r   r   rb)	r   r   rM   r   rK   rL   r^   rO   
PythonFile)r   r   rR   r   r   r   r   test_parquet_read_from_buffer9  s   r   c                  C   s,  t ttttd} t ttttd}t ddgd }| | g}t jj|ddgd}t	||dddd	 t	||ddgdgd	 t	||dddgddgd	 t jj| | ||gg d
d}t	||ddgddgd t jj|gdgd}t
jtdd t	||ddd W d    d S 1 sw   Y  d S )Nr-   TF2   r   br"   gzip)expectedcompressionr=   use_byte_stream_splitr   r   cdr   r   )r   r=   r   tmpBYTE_STREAM_SPLIT only supportsr   )r   r   r=   )r   r%   rb   mapfloatrc   intr&   r'   r   r   r   IOError)	arr_floatarr_intarr_bool
data_floatr   mixed_tabler   r   r   test_byte_stream_splitG  s:   "r   c              	   C   sX  t jttttdt ddd}t jttttdt ddd}t jttttdt ddd}t dd	gd
 }|||g}t jj|g dd}t	||dd	dd t
j| d}tj||dd	dd t|}|jd}	|jd}
|	jdksJ |
jdksJ t	||dd	ddddd t jj||||gg dd}t	||d	dd d S )Nr-      r   rx      	      TFr   r   r   r   r"   r   )r   r   r=   store_decimal_as_integerr   )r   r=   r   r   r   INT32INT64DELTA_BINARY_PACKEDr   r   )r   r   r=   r   column_encodingr   )r   r=   r   )r   r%   rb   r   r   rc   
decimal128r&   r'   r   osr[   joinrM   r   r_   rl   columnphysical_type)r   arr_decimal_1_9arr_decimal_10_18arr_decimal_gt18r   data_decimalr   pqtestfile_path
pqtestfilepqcol_decimal_1_9pqcol_decimal_10_18r   r   r   r   test_store_decimal_as_integerm  s^   






r   c               
   C   s  t ttttd} t ttttd}t jdd tdD t  d}t jdd tdD t dd}t g dd }t jj	| ||||gg d	d
}t
||ddddddd t
||ddd t
||dddddd t
||dddddd t
||ddddddd t
||dddid tjtdd t
||dddddd W d    n1 sw   Y  tjtdd t
||dddddd W d    n1 sw   Y  tjtdd t
||ddd W d    n1 sw   Y  tjtdd t
||dddid W d    n	1 sw   Y  tt t
||dgddid W d    n	1 s8w   Y  tt t
||ddid  W d    n	1 sWw   Y  tt t
||ddgddddd! W d    n	1 s{w   Y  tt t
||dd"ddddd! W d    n	1 sw   Y  tt t
||dd"d W d    d S 1 sw   Y  d S )#Nr-   c                 S   s   g | ]}t |qS r   )rK   rf   r   r   r   r   rh     s    z(test_column_encoding.<locals>.<listcomp>rx   c                 S   s   g | ]	}t |d qS )r0   )rK   zfillr   r   r   r   rh     s    r0   )FTFF   )r   r   r   r   er"   FBYTE_STREAM_SPLITPLAINr   )r   r=   r   r   r   DELTA_LENGTH_BYTE_ARRAYDELTA_BYTE_ARRAYr   RLEr   r   )r   r   r   z)DELTA_BINARY_PACKED encoder only supportsz+'RLE_DICTIONARY' is already used by defaultRLE_DICTIONARYz/Unsupported column encoding: 'MADE_UP_ENCODING'r   MADE_UP_ENCODINGr   )r   r   )r   r=   r   r   T)r   r%   rb   r   r   rc   r   binaryr&   r'   r   r   r   r   OSErrorr   r8   )r   r   arr_binarr_flbar   r   r   r   r   test_column_encoding  s    


$r   c               	   C   s   t ttttd} | | g}t jj|ddgd}t||ddd t||ddd t||dd	d
d t||dddd
d t||ddd t||ddd g d}t	
 }|D ]#\}}tttf t||||d W d    n1 sww   Y  qYd S )N  r   r   r"   r   r   )r   r   compression_levelr   snappyr   )r   r   r   r   lz4r   ))r      )r   i)r]   i  )lzo   )r   r   )r   r%   rb   r   r   rc   r&   r'   r   r   r   r   r   r   r   r   )r(   r   r   invalid_combinationsbufcodeclevelr   r   r   test_compression_level#  s>   	r   c                  C   sP   t g d} d}t j| g|g}t|ddid}d}|jd j|ks&J d S )N)r   r   r   r   r   zprohib; ,	{}flavorspark)write_table_kwargsprohib______r   )r   r%   r&   r'   r   rl   r   )a0r   r   r   expected_namer   r   r    test_sanitized_spark_field_namesP  s   r  c                  C   sl   t dd} tj| }t }t||ddd |d t|dd}|d t|d	d}|	|s4J d S )
Ni'  r:   SNAPPYr<   )r   r   r   T)use_threadsF)
r   r   r&   r3   r   r   r   r   r
   rO   )r5   r   r   table1table2r   r   r   test_multithreaded_read[  s   


r
  c                  C   s   t jtdgg dd} tj|  }t	 }t
||dd |d t|}||s0J tt t
||dd W d    d S 1 sHw   Y  d S )Nr   )ABCD)columns)
chunk_sizer   )rq   rr   r   r   r   r&   r3   reset_indexr   r   r   r   r
   rO   r   r   r   )r   r   r   r   r   r   r   test_min_chunksizem  s   
"r  c                 C   s   t tdttddtdddtjddd	d
g dt tdt jdddt jddddt jddddd	}t	j
|}| d }z	t||dd W n
 t	jyX   Y nw | r_J d S )Nabcr   r   r      u1      @      @float64r   TFT20130101periodsz
US/Eastern)r  tzns)r  freq)	r   r   r   r   r   rR   ghirH   r1   r   )rq   rr   rb   rc   r   r   astypeCategorical
date_ranger   r&   r3   r   ArrowExceptionrZ   )r   r5   pdfrQ   r   r   r   (test_write_error_deletes_incomplete_file~  s(   
r)  c              
   C   sN   d}zt | W d S  ty& } z||jd v sJ W Y d }~d S d }~ww )Nznonexistent-file.parquetr   )rM   r^   	Exceptionr   )r   r[   r   r   r   r   test_read_non_existent_file  s    r+  c                 C   sH   t   t jdd t| d  W d    d S 1 sw   Y  d S )Nerror)actionzv0.7.1.parquet)warningscatch_warningssimplefilterrM   r^   )datadirr   r   r   test_read_table_doesnt_warn  s   
"r2  c                  C   s`   t jt ddggdg} t }tj| |dd |d t	|}t
| |   d S )Nr  defsome_colr   r   r   )r   r&   r'   r%   r   r   rM   r   r   r^   r   r   r   )r   rR   	roundtripr   r   r   test_zlib_compression_bug  s   

r7  c              	   C   s   t | d }tjtjtfdd" t|d}W d    n1 s!w   Y  t| W d    n1 s5w   Y  tjtjtfdd( t|d}|	d W d    n1 sZw   Y  t| W d    d S 1 sow   Y  d S )Nr   zsize is 0 bytesr   rI   zsize is 4 bytess   ffff)
rK   r   r   r   ArrowInvalidr   rL   rM   r^   write)r   r[   rR   r   r   r   test_parquet_file_too_small  s"   "r:  zignore:RangeIndex:FutureWarningz.ignore:tostring:DeprecationWarning:fastparquetc           	      C   s   t d}ttdttddtjddddg d	tjd
ddt	g dd}t
|}t| d }tj||d d ||}| }t|| t| d }||| t|}|d t|d< t| | d S )Nfastparquetr  r   r   r  r  r  r   r  r  r   r  )r   r   r   )r   r   r   r   r   rR   zcross_compat_arrow.parquetr5  z cross_compat_fastparquet.parquetrR   )r   importorskiprq   rr   rb   rc   r   r   r&  r%  r   r   rK   rM   r   r_   r   r   r   r9  rN   r$  object)	r   fpr5   r   
file_arrowfp_filedf_fpfile_fastparquettable_fpr   r   r   $test_fastparquet_cross_compatibility  s*   



rD  array_factoryc                   C      t dd gd S Nr   r0   r   r%   r   r   r   r   <lambda>      rI  c                   C      t dd gd  S rG  r   r%   dictionary_encoder   r   r   r   rI        c                   C   rF  N r0   rH  r   r   r   r   rI    rJ  c                   C   rK  rO  rL  r   r   r   r   rI    rN  read_dictionaryFTc                 C   s   t jd|  i}t }tj||dd |d |rdgnd }tj|d|d}|j	D ]}|j
\}| d }| |jd ksCJ q,d S )	Nrg   T)r=   r   F)r  rQ  r       )r   r&   from_pydictr   r   rM   r   r   r^   r  chunksbuffers
to_pybytesr;   )rE  rQ  
orig_tablebior   rg   re   r   r   r   r   test_buffer_contents  s   

rY  c                 C   sP   t jt tdgdgd}| d }tj||dd t|}||s&J d S )Nr   rY   r"   zarrow-10480.pyarrow.gzGZIPr5  )r   r   r%   rc   rM   r   r^   rO   )r   r   r[   r   r   r   r   "test_parquet_compression_roundtrip  s
   
r[  c                 C   s   t jt jg ddgdg}| d }d}t||j}t|D ]}|| q W d    n1 s2w   Y  t	|}|j
j|ksDJ t|D ]}|||sTJ qHd S )Nru   rx   r!   zempty_row_groups.parquetr   )r   r&   r'   r%   rM   ParquetWriterrl   rc   r   r_   metadatanum_row_groupsread_row_grouprO   )r   r   r[   
num_groupswriterr#  readerr   r   r   test_empty_row_groups  s   
rc  c                 C   sV   d gd }| dg tj|gdg}| d }t|| t|}||ks)J d S )Ni   r   r   zarrow-11607.parquet)r?   r   r&   r'   rM   r   r^   )r   r   r   r[   r	  r   r   r   test_reads_over_batch$  s   

rd  c                 C   s   | d }|j dd tjg dg dgddgd}t||d	  tjg d
g dgddgd}t||d  tt|}tjg dg dgddgd}||ksTJ d S )N dataset_column_order_permutationT)exist_okr   )皙?皙?333333?r   r   r"   zdata1.parquet)皙?      ?333333?)r   r   r  zdata2.parquet)r   r   r   r   r   r  )rg  rh  ri  rj  rk  rl  )mkdirr   r   rM   r   r^   rK   )r   casedata1data2r   r	  r   r   r    test_permutation_of_column_order2  s   
rq  c                 C   s  | d }t ttd}d}t j|g| dd t|D d}t|| tjt	dd tj
|d	| d
 W d    n1 sAw   Y  tjt	dd tj
||d W d    n1 s_w   Y  tj
|d| d
}||kssJ tj
|d| d}||ksJ t
|}||ksJ d S )Nzlargethrift.parquetr0   r   c                 S   s   g | ]}d | qS )some_long_column_name_r   )rf   r#  r   r   r   rh   L  rN  z+test_thrift_size_limits.<locals>.<listcomp>r"   z1Couldn't deserialize thrift:.*Exceeded size limitr   r   )thrift_string_size_limit)thrift_container_size_limitr-   r   )r   r%   rb   rc   r   rM   r   r   r   r   r^   )r   r[   r%   num_colsr   gotr   r   r   test_thrift_size_limitsE  s4   
rw  c           
      C   s  | d }t dg di}tj||dd tj|dd}||ks"J t| }|d |d ks2J |d |d |d< |d< | d	 }|| tj|d
d}||ksUJ |t dg diksbJ tj	t
dd tj|dd}W d   n1 s{w   Y  tj|d
d}| }	|	|ksJ |	t dg diksJ tj|dd}tj	t
dd | }W d   dS 1 sw   Y  dS )zUCheck that checksum verification works for datasets created with
    pq.write_table()zcorrect.parquetr   r   r   r   r   Twrite_page_checksumpage_checksum_verification   $   zcorrupted.parquetFr   r   r   r   CRC checksum verificationr   N)r   r   rM   r   r^   	bytearray
read_byteswrite_bytesr   r   r   r_   r   )
r   original_path
table_origtable_checkbin_datacorrupted_pathtable_corruptrC   corrupted_pq_filetable_corrupt2r   r   r   +test_page_checksum_verification_write_table`  s<   

"r  c                 C   s>  t dg di}| d }tj||dd t| }t|dks#J |d }tj|dd}||ks4J t|	 }|d	 |d
 ksDJ |d
 |d	 |d	< |d
< | d }t
|| ||j }|| tj|dd}	|	|ksqJ |	t dg diks~J tjtdd tj|dd}
W d   dS 1 sw   Y  dS )zXCheck that checksum verification works for datasets created with
    pq.write_to_datasetr   rx  correct_dirTry  r   r   r{  r}  r~  corrupted_dirFr  r  r   N)r   r   rM   write_to_datasetrb   iterdirlenr^   r  r  r   r   r  r   r   r   )r   r  original_dir_pathoriginal_file_path_listr  r  r  corrupted_dir_pathcorrupted_file_pathr  rC   r   r   r   test_checksum_write_to_dataset  s4   


"r  c                 C   s   t dg di}| d }d}tjt|d tj||dd W d    n1 s)w   Y  t|| tjt|d tj|dd W d    n1 sMw   Y  tjt|d tj|dd W d    d S 1 slw   Y  d S )Nr   r   deprecate_legacyzPassing 'use_legacy_dataset'r   F)use_legacy_dataset)	r   r   r   warnsFutureWarningrM   r  r^   ParquetDataset)r   r   r[   msgr   r   r   "test_deprecated_use_legacy_dataset  s   "r  )Tr   collectionsr   r   r.  shutilr   decimalr   r   pyarrowr   r   pyarrow.testsr   pyarrow.tests.parquet.commonr   r   r	   pyarrow.parquetparquetrM   r
   r   ImportErrorpandasrq   pandas.testingtestingr   pyarrow.tests.pandas_examplesr   r   numpyr   mark
pytestmarkr    r,   r6   r9   rD   rT   rW   r\   r`   slowrd   rp   rt   r   r   r   parametrizeLocalFileSystemr   r   r   r   r   r   r   r   r  r
  r  r)  r+  r2  r7  r:  r;  filterwarningsrD  rY  r[  rc  rd  rq  rw  r  datasetr  r  r   r   r   r   <module>   s   


	









&6 -





$:
4