o
    䯪gj                     @   s  d dl Z d dlZd dlmZ d dlZzd dlZW n ey#   dZY nw d dlZd dl	Z
d dlmZmZ d dlmZ d dlmZ zd dlmZ d dlmZ W n eyY   dZY nw zd dlZd dlmZ d dlmZ W n eyy   d ZZY nw ejjZejjdd	 Zd
d Zejjej dg de
! ddddddfg de
" ddddddfg de
# ddddddfg de
$ ddddddfg de
% ddddddfg de
& ddddddfg de
' ddddddfg de
( ddddddfg de
) ddddddfg de
* ddddddfdde+dddge
, dde+d-ddddfg d e
. d!d"d#d d$dfg d%e
, dd&d'dddfgd(d) Z/d*d+ Z0ejjd,d- Z1d.d/ Z2d0d1 Z3d2d3 Z4d4d5 Z5d6d7 Z6d8d9 Z7d:d; Z8ejjd<d= Z9d>d? Z:ej;d@dAdB Z<dCdD Z=ejjdEdF Z>ejjdGdH Z?dIdJ Z@ejjAejjBdKdL ZCdMdN ZDdOdP ZEej dQdReFdSidReFdSidfdReFdSidTeFdSidUfeFdSeFdSdVdWeFdSidXffdYdZ ZGejjHd[d\ ZId]d^ ZJdS )_    N)OrderedDict)_check_roundtripmake_sample_file)LocalFileSystem)util)_write_table)alltypes_samplec            
      C   s  t dd} | jt| jd} tjjddt| d| _t	| }t| j}|j
}t| |jt| ks3J |j|d ks<J |jdksCJ |jdksJJ d|jv sQJ t|jtsYJ t|j
tsaJ |j}|j|u skJ t||d ksuJ t| |d }t| |j| jd ksJ |jdksJ |jdksJ |jdksJ |jd	ksJ |jd
ksJ |d }|jjdksJ tt ||d   W d    n1 sw   Y  tt |d  W d    n1 sw   Y  t |jD ]/}|!|}t|t"j#sJ t| t |jD ]}|$|}	t|	t"j%sJ t|	 qqtt |!d W d    n	1 s9w   Y  tt |!|jd  W d    n	1 sWw   Y  |!d}|jt| kskJ |j|d ksuJ |j&dks}J tt |$d}	W d    n	1 sw   Y  tt |$|d }	W d    n	1 sw   Y  |$d}	|	j'dksJ |	j(dksJ |	jd	ksJ |	j)dksJ |	j*dksJ |	j+du sJ t|	j,t"j-sJ |	j.dksJ t/|	j0ddhksJ |	j1du sJ |	j2d u sJ |	j3dks J |	j4dks(J |	j5dks0J tt6 |	j7 W d    n	1 sDw   Y  tt6 |	j8 W d    d S 1 s^w   Y  d S )N'  sizecolumnsr   i@B    2.6zparquet-cppBOOLEANNONE   FLOAT16    boolTSNAPPYPLAINRLEF)9r   reindexsortedr   nprandomrandintlenindexr   metadatareprnum_rowsnum_columnsnum_row_groupsformat_version
created_by
isinstanceserialized_sizeintdictschemanamemax_definition_levelmax_repetition_levelphysical_typeconverted_typelogical_typetypepytestraises
IndexErrorrange	row_grouppqRowGroupMetaDatacolumnColumnChunkMetaDatatotal_byte_sizefile_offset	file_path
num_valuespath_in_schemais_stats_set
statistics
Statisticscompressionset	encodingshas_dictionary_pagedictionary_page_offsetdata_page_offsettotal_compressed_sizetotal_uncompressed_sizeNotImplementedErrorhas_index_pageindex_page_offset)
dffilehncolsmetar-   colcol_float16rgrg_metacol_meta rZ   _/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/pyarrow/tests/parquet/test_metadata.pytest_parquet_metadata_api7   s   






$r\   c                 C   sH   t dg di}t|| d  t| d }|jddj d S )Nar   r      ztest_metadata_segfault.parquetr   )	patabler:   write_tableParquetFiler"   r9   r<   rD   )tempdirra   parquet_filerZ   rZ   r[   test_parquet_metadata_lifetime   s   rf   )datar4   r1   	min_value	max_value
null_countrA   distinct_count)r   r   r   N   INT32r   rl   INT64)r   r   r   Nrl   r   )皙g@gffffff@N皙@FLOATro   rp   DOUBLEr   b  aaa
BYTE_ARRAY    zutf-8)TFFTTr   FTr   )       bs   12Ns   aaarx   ry   c                 C   s   t d| i}ttd|g}	tjj||	dd}
t|
}|j}|	d}|
d}|j}|js3J t||j|s<J t||j|sEJ |j|ksLJ |j|ksSJ |j|ksZJ |j|ksaJ d S )Nrg   F)r-   safer   )pd	DataFramer`   r-   fieldTablefrom_pandasr   r"   r9   r<   rD   has_min_max_closeminmaxrj   rA   rk   r1   )rg   r4   r1   rh   ri   rj   rA   rk   rQ   r-   ra   rR   rT   rX   rY   statrZ   rZ   r[   "test_parquet_column_statistics_api   s   .


r   c                 C   s@   | t  krt|| dk S | t  krt|| dk S ||kS )NgHz>gvIh%<=)r`   float32absfloat64)r4   leftrightrZ   rZ   r[   r      s
   r   c                  C   sf   t dt jt jgddi} ttj| j}|	d
djjr#J |	d
djjd u s1J d S )Ntzdatetime64[ns])dtyper   )r{   r|   SeriesNaTr   r`   r~   r   r"   r9   r<   rD   r   r   )rQ   rT   rZ   rZ   r[   &test_parquet_raise_on_unset_statistics   s    r   c           
      C   s  ddt  fddt  fddt  ftddddtd	dddt d
ftddddtd	dddt dftdddddddtdddddddt d
ftdddddddtdddddddt dft	dddt	dddt 
 ftdtdt ddfg	}t|D ]E\}\}}}t jt j||g|dgdg}t| d| }tj||dd t|}|jddj}	|	j|ksJ |	j|ksJ qd S )N
   l   5f|~W	 l    u   ähnlichu   öffentlich   r   rt      msusi           z20.123z20.124   r   r4   rU   zexample{}.parquetr   version)r`   uint64uint32utf8datetimetimetime32time64	timestampdatedate32decimalDecimal
decimal128	enumerater~   from_arraysarraystrformatr:   rb   rc   r"   r9   r<   rD   r   r   )
rd   casesimin_valmax_valtypr   pathpfstatsrZ   rZ   r[   %test_statistics_convert_logical_types   sJ   

r   c              	   C   sV  t jtdt g dfdt g dfg}t|| d  t| d }dD ]}|d	|}|j
du s9J |jd us@J q(t|| d	 d
d t| d	 }dD ]}|d	|}|j
d
u sdJ |jd u skJ qSt|| d dgd t| d }|d	d}|d	d}|j
du sJ |j
d
u sJ |jd usJ |jd u sJ d S )Nr]   r^   rs   r]   rs   cdata.parquet)r   r   r   Tzdata2.parquetF)write_statisticszdata3.parquetr   )r`   r~   from_pydictr   r   r   r:   read_metadatar9   r<   rC   rD   )rd   ra   rT   rU   cccc_acc_brZ   rZ   r[   %test_parquet_write_disable_statistics  s4   r   c                  C   s  t d} |  ddddksJ t jdddd} |  ddddks%J tdt fdt fg}t jd	dd
t jddd
f}t j||\}}|dksQJ |dksWJ t j|||}||kseJ t jj|ddd}t jddddt jd	dddf}||ksJ t j|d}|dksJ t j|ddksJ t	t
 t j|d W d    n1 sw   Y  tj	t
dd t jd	ddt jdddf}t j|| W d    d S 1 sw   Y  d S )Nr   Fcolumn_index
descendingnulls_firstr   T)r   r   r]   rs   r   r   )rs   r   )r]   	ascendingat_end)r]   r   at_start)null_placementrZ   )rZ   r   )r]   znot a valid sort orderzinconsistent null placementmatch)r   )r:   SortingColumnto_dictr`   r-   int64to_orderingfrom_orderingr5   r6   
ValueError)sorting_colr-   sorting_cols
sort_orderr   sorting_cols_roundtrippedexpectedempty_sorting_colsrZ   rZ   r[   test_parquet_sorting_column5  sT   
"r   c                  C   s   t t dt  fdt  fgt  d} tjdddtjdddg}tj| |\}}|d	ks4J t|dks<J |d d
ksDJ |d dksLJ d S )Nxyr]   rs   r   Tr   r   Fr   )za.xr   r   )rs   r   )r`   r-   structr   r:   r   r   r    )r-   sorting_columnsr   r   rZ   rZ   r[   "test_parquet_sorting_column_nestedj  s   r   c                  C   s   t g dg dd} tjddddtjddd	f}t  }t| ||d
 t | }t|}||	dj
ks<J | }|ddksIJ |ddksRJ |ddks[J d S )Nr^   r   r   r   Tr   r   F)r   r   )r   r%   r   r$   r_   r&   )r`   ra   r:   r   BufferOutputStreamr   BufferReadergetvaluer   r9   r   r   get)ra   r   writerreaderr"   metadata_dictrZ   rZ   r[   !test_parquet_file_sorting_columns|  s   
r   c               
   C   s"  d} t jdt  | did}t jdt |g| did}t jdt  dd	| d
idt jdt t jdt  | did| didt jdt |g| didt dt  t jdt  | didt jdt  | didg}dd |D }t j|t |d}t  }t	|| |
 }tt |}|j}	|	d j|  d
ksJ |	d jd d	ksJ |	d }
|
j|  dksJ |
jj}|j|  dksJ |	d }|j|  dksJ |jd }|j|  dksJ |jd }|j|  dksJ |	d jd u sJ |	d j|  dksJ |	d j|  dksJ d S )Ns   PARQUET:field_idinners   100r"   middles   101basics   others   abc   1listz
list-inners   10s   11r   s   102zno-metadataznon-integral-field-ids   xyzznegative-field-ids   -1000c                 S   s   g | ]}g qS rZ   rZ   ).0_rZ   rZ   r[   
<listcomp>  s    z*test_field_id_metadata.<locals>.<listcomp>r-   r   r   r   r_   rl   r   )r`   r}   int32r   list_ra   r-   r   r:   rb   r   rc   r   schema_arrowr"   r4   value_field)field_idr   r   fieldsarrsra   biocontentsr   r-   
list_fieldlist_item_fieldstruct_fieldstruct_middle_fieldstruct_inner_fieldrZ   rZ   r[   test_field_id_metadata  s`   


r   c                  C   sz   dD ]8} t dg di}t  }t||| d t | }t|}|d	d}|j
| u s3J |j| u s:J qd S )N)FTr]   r^   )write_page_indexr   )r`   ra   r   r   r   r   r:   r   r9   r<   has_offset_indexhas_column_index)r  ra   r   r   r"   r   rZ   rZ   r[   test_parquet_file_page_index  s   
r  c                 C   sl  ddg}t | d }tg dg dddgddgdd	ggd
}tj|}d }|D ]'}g }tj|t | | |d |d | |d u rJ|d }q*|	|d  q*t
|d}|| W d    n1 sgw   Y  t|}| }	| }
|
D ]}|dkr|
| |	| ksJ q{|
d dksJ |
d dksJ |
d dksJ |
d dksJ |	d dksJ d S )NzARROW-1983-dataset.0zARROW-1983-dataset.1	_metadatar^   )r   r   r   r_   rl   )onetwothreemetadata_collectorr   wbr*   r%   r$   r   r&   )r   r{   r|   r`   r~   r   r:   rb   set_file_pathappend_row_groupsopenwrite_metadata_filer   r   )rd   	filenamesmetapathrQ   ra   _metafilenamerT   fmd_mdkeyrZ   rZ   r[   test_multi_dataset_metadata  sB   

r  c           
      C   s   t | d }tddg}t|| t|}t | d }tddg}t|| t|}t | d }tddg}t|| t|}	t|t|ksRJ t|t|ks\J t|t|	ksfJ d S )N	metadata1r]   r   rs   r   	metadata2	metadata3)rs   r   )r   r`   r-   r:   write_metadatar   hash)
rd   path1schema1parquet_meta1path2schema2parquet_meta2path3schema3parquet_meta3rZ   rZ   r[   test_metadata_hashing  s   


r+  z#ignore:Parquet format:FutureWarningc           
      C   s^  t | d }tddg}t|| t|}|j }||s$J |jr.d|jvs.J dD ]}tj|||d t|}|dkrEdnd}|j	|ksNJ q0tj
d	d
gddgd|d}t|| d  tt | d }tj||||gd t|}|jd
ksJ d}	tjt|	d tjtddg|||gd W d    d S 1 sw   Y  d S )Nr"   r  r  s   ARROW:schema)1.0z2.0z2.4r   r   r,  r   r   r   g?g?r   r   r   r  zLAppendRowGroups requires equal schemas.
The two columns with index 0 differ.r   )r]   r   )rs   null)r   r`   r-   r:   r   r   to_arrow_schemaequalsr"   r'   ra   rb   r&   r5   r6   RuntimeError)
rd   r   r-   parquet_metaschema_as_arrowr   expected_versionra   parquet_meta_multmsgrZ   rZ   r[   test_write_metadata  s8   




"r6  c                  C   s@   t jt ddgddd id} t jtdg| d}t| d S )	Nf0doublelarger   i r   r   r   )r`   r-   r}   ra   r8   r   )	my_schemara   rZ   rZ   r[   test_table_large_metadataI  s
   
r;  c                  C   sH  t dd} t| }t| }t| | jd d d  }t|jtjs"J |j|js+J |j|jks3J |j|js<J |j|jksDJ |jdksKJ |j|jrTJ |j|jks\J t|jd tjsgJ |jd |jd stJ |jd |jd ksJ |jd |jd rJ |jd |jd ksJ |jd dksJ d S )Nr	   r
   r   zarbitrary objectr   r   )	r   r   r   r)   r-   r:   ParquetSchemar/  ColumnSchema)rQ   rR   fileh2fileh3rZ   rZ   r[   test_compare_schemasR  s$   
r@  c                 C   s   d}t jt|tj|dddgd}| d }tj|}t	|| t
|}t
j|dd}|j|s8J |j|s@J |jjd	 |jd	 ksMJ d S )
Nd   )r!   valuesr!   rB  r   ztest.parquetT)
memory_maps   pandas)r{   r|   r   aranger   randnr`   r~   r   r   r:   read_schemar-   r/  r"   )rd   NrQ   	data_pathra   read1read2rZ   rZ   r[   test_read_schemam  s   


rK  c                 C   s   t dt jg ddi}t|| d  t| d }| }t|d dks)J t|d d d dks7J |d d d d d	 d u sGJ d S )
Nr]   r   r   r   
row_groupsr   r   r   rD   )r`   ra   r   r:   rb   r   r   r    )rd   ra   r"   r   rZ   rZ   r[   #test_parquet_metadata_empty_to_dict  s   $rM  c                  C   s   d} d}t dd t| D }t  }t|| | }W d    n1 s)w   Y  tt |}tt |}t|D ]}|	| qBt  }|
| | }W d    n1 sbw   Y  tt |}d S )Nrt   i  c                 S   s   i | ]}t |tjd qS )r   )r   r   r   rE  )r   r   rZ   rZ   r[   
<dictcomp>  s    z6test_metadata_exceeds_message_size.<locals>.<dictcomp>)r`   ra   r8   r   r:   rb   r   r   r   r  r  )NCOLSNREPEATSra   outbuforiginal_metadatar"   r   rZ   rZ   r[   "test_metadata_exceeds_message_size  s    




rT  c                 C   sF  t dg di}d}t| | }d| }t|| t| | }|j}t||s/J tj|t d|s<J tj|d|  d|sKJ t	||sUJ tj	|t d|sbJ tj	|d|  d|sqJ t
| # tj|t d|sJ tj	|t d|sJ W d    d S 1 sw   Y  d S )Nr]   r^   r   zfile:///
filesystem)r`   ra   r   r:   rb   r   r-   r/  r   rF  r   
change_cwd)rd   ra   fnamer@   file_urir"   r-   rZ   rZ   r[   test_metadata_schema_filesystem  sR   

"rZ  c                  C   s   t dg di} t  }t| | | }W d    n1 s"w   Y  tt |}d}tj	t
|d |d  W d    d S 1 sIw   Y  d S )Nr]   r^   z#Argument 'other' has incorrect typer   )r`   ra   r   r:   rb   r   r   r   r5   r6   	TypeErrorr/  )ra   rQ  rR  rS  r   rZ   rZ   r[   test_metadata_equals  s   

"r\  zt1,t2,expected_errorcol1r   col2z$The two columns with index 0 differ.)r]  r^  col3z&This schema has 2 columns, other has 1c           
      C   s   t | }t |}t }t }t|| t|| |d |d t|j}t|j}|rYd}	t	j
t|	| d || W d    d S 1 sRw   Y  d S || d S )Nr   z(AppendRowGroups requires equal schemas.
r   )r`   ra   ioBytesIOr:   rb   seekrc   r"   r5   r6   r0  r  )
t1t2expected_errortable1table2buf1buf2meta1meta2prefixrZ   rZ   r[   $test_metadata_append_row_groups_diff  s    



"rm  c                 C   s  |\}}| d }| d }| d }| d }| d}t dtdi}	t|	j|g  tj|	j|g t d t|	j| g  |d	}
t|	j|
g  W d    n1 sWw   Y  tj|	j|g |d |	 |	   kr|	   kr|	   kr||
 ksJ  J d S )
Nrj  rk  meta3meta4z/meta5rU   r   rU  zwb+)r`   ra   r8   r:   r   r-   r   as_urir  
read_bytesread)rd   s3_example_s3fss3_fss3_pathrj  rk  rn  ro  meta5ra   meta4_streamrZ   rZ   r[   (test_write_metadata_fs_file_combinations  s*   
rx  c                 C   sT   t | d }|ddj}|dddksJ |ddj}|d u s(J d S )Nz'column_chunk_key_value_metadata.parquetr   s   barrw   )s   foos   thisiskeywithoutvaluer   )r:   r   r9   r<   r"   )parquet_test_datadirr"   key_value_metadata1key_value_metadata2rZ   rZ   r[   $test_column_chunk_key_value_metadata  s   r|  )Kr   r   collectionsr   r`  numpyr   ImportErrorr5   pyarrowr`   pyarrow.tests.parquet.commonr   r   
pyarrow.fsr   pyarrow.testsr   pyarrow.parquetparquetr:   r   pandasr{   pandas.testingtestingtmr   mark
pytestmarkr\   rf   parametrizeuint8uint16r   r   int8int16r   r   r   r   chrbinaryencodebool_r   r   r   r   r   r   r   r   r   r  r  r+  filterwarningsr6  r;  r@  rK  rM  slowlarge_memoryrT  rZ  r\  r8   rm  s3rx  r|  rZ   rZ   rZ   r[   <module>   s   
\*

#58
*

+	

#

 