o
    䯪g%Y                     @   s  d dl Z d dlZzd dlZW n ey   dZY nw d dlZd dlZd dlm	Z	m
Z
 d dlmZ d dlmZ zd dlmZ d dlmZmZmZ W n eyS   dZY nw zd dlZd dlmZ d dlmZmZ W n eyu   d ZZY nw ejjZejjdd Zejjd	d
 Z ejjdd Z!ejjdd Z"ejjdd Z#ejjdd Z$ejjdd Z%ejjdd Z&ejjdd Z'ejjdd Z(ejjej)ddd Z*ejjdd Z+ejjd d! Z,ejjd"d# Z-ejjd$d% Z.ejjd&d' Z/ejjd(d) Z0ejjd*d+ Z1ejjd,d- Z2ejjd.d/ Z3ejjd0d1 Z4ejjd2d3 Z5ejjd4d5 Z6ejjd6d7 Z7ejjej8d8g d9ej8d:d;d<gd=d> Z9ejjd?d@ Z:ejjdAdB Z;dS )C    N)LocalFileSystemSubTreeFileSystem)guid)Version)_read_table_test_dataframe_write_table)_roundtrip_pandas_dataframealltypes_samplec                 C   s   t dd}| d }tj|}d|jjv sJ t|| t|j}d|v s(J t	
|d d}|d dd ddd	d
gksAJ d S )N'  sizepandas_roundtrip.parquets   pandasutf8index_columnsranger      )kindnamestartstopstep)r
   paTablefrom_pandasschemametadatar   pqread_metadatajsonloadsdecode)tempdirdffilenamearrow_tabler   js r'   ]/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/pyarrow/tests/parquet/test_pandas.py#test_pandas_parquet_custom_metadata7   s   

r)   c              	   C   s   t t dt  t dt  t dt  g}ttj	dtj
dtj	dtjdg dd}tdd	gd
d gd d gd}t jj||dd}t jj||dd}|jj|jddr]J |j|jsfJ tj| d |d}|| || d S )Nintfloatstring   dtype)ABBAEDDAACDC)r*   r+   r,         g?F)r   preserve_indexT)check_metadatazmerged.parquet)r   )r   r   fieldint16float32r,   pd	DataFramenparangeuint8r   r   equalsr   ParquetWriterwrite_table)r"   r   df1df2table1table2writerr'   r'   r(   :test_merging_parquet_tables_with_different_pandas_metadataK   s,   
rG   c                 C   s   t dd}tjjtt|j|jd d d ddgd|_| d }tj	|}|j
jd us.J t|| t|}| }t|| d S )N
   r   level_1level_2namesr   )r
   r:   
MultiIndexfrom_tupleslistzipcolumnsr   r   r   r   pandas_metadatar   r   read_pandas	to_pandastmassert_frame_equal)r"   r#   r$   r%   
table_readdf_readr'   r'   r(   %test_pandas_parquet_column_multiindexh   s   


rZ   c                 C   s   t dd}| d }tjj|dd}|jj}|d rJ |d s!J t|| t|}|jj}|d r5J |jj	}|jj	|ksAJ |
 }t|| d S )Nr   r   r   Fr5   r   rR   )r
   r   r   r   r   rS   r   r   rT   r   rU   rV   rW   )r"   r#   r$   r%   r&   rX   r   rY   r'   r'   r(   >test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written{   s   


r\   c                  C   X   t d} tj| }t }t||dd | }t|}t|	 }t
| | d S )Nr   2.6versionr   r   r   r   BufferOutputStreamr   getvalueBufferReaderr   rU   rV   rW   r#   r%   imosbufreaderrY   r'   r'   r(   )test_pandas_parquet_native_file_roundtrip      
ri   c                  C   sj   t d} tj| }t }t||dd | }t|}tj	|ddgd
 }t| ddg | d S )Nr   r^   r_   stringsr>   rR   )r   r   r   r   rb   r   rc   rd   r   rT   rU   rV   rW   re   r'   r'   r(   test_read_pandas_column_subset   s   
rm   c                  C   r]   )Nr   r^   r_   ra   re   r'   r'   r(   #test_pandas_parquet_empty_roundtrip   rj   rn   c                  C   sJ   ddiddiddigdd} t j| d}tj|}t }t|| d S )	N	page_typer   record_typenon_consecutive_homer   1001)agg_col	uid_first)data)r:   r;   r   r   r   rb   r   )ru   r#   r%   rf   r'   r'   r(   !test_pandas_can_write_nested_data   s   rv   c           	      C   s   | d }d}t tj|tjdtj|tjdtj|tjdtj|dkg dd}t	j
|}|d}t||dd	 W d    n1 sHw   Y  t| }t|}| }t|| d S )
Nzpandas_pyfile_roundtrip.parquetr4   r.   r   )foobarNbazqux)int64r9   float64boolrk   wbr^   r_   )r:   r;   r<   r=   r{   r9   r|   randomrandnr   r   r   openr   ioBytesIO
read_bytesr   rU   rV   rW   )	r"   r$   r   r#   r%   fru   rX   rY   r'   r'   r(   $test_pandas_parquet_pyfile_roundtrip   s"   r   c           
      C   s  d}t jd tt j|t jdt j|t jdt j|t jdt j|t j	dt j|t j
dt j|t j
dt j|t jdt j|t jdt j|t jdt j|t jdt j|dkd}| d }tj|}dD ]}t||d|d t|}| }t|| qgdD ]}t||d|d	 t|}| }t|| qd
D ]$}	|	dkrtjj|	sqt||d|	d t|}| }t|| qd S )Nr   r   r.   )r>   uint16uint32uint64int8r8   int32r{   r9   r|   r}   r   )TFr^   )r`   use_dictionary)r`   write_statistics)NONESNAPPYGZIPLZ4ZSTDr   )r`   compression)r<   r   seedr:   r;   r=   r>   r   r   r   r8   r   r{   r9   r|   r   r   r   r   r   r   rU   rV   rW   libCodecis_available)
r"   r   r#   r$   r%   r   rX   rY   r   r   r'   r'   r(   )test_pandas_parquet_configuration_options   sV   r   z)ignore:Parquet format '2.0':FutureWarningc                  C   sJ   t dd} tddt|  d| _d| j_t| ddd}t||  d S )	Nd   r   r   rH   rw   z2.0spark)r`   flavor)	r   r<   r=   lenindexr   r	   rV   rW   )r#   resultr'   r'   r(   +test_spark_flavor_preserves_pandas_metadata  s   
r   c                 C   s   t ddt ddit dt dt dt did}t| d }t j|ddjdd	d
}tj|}t|| t	|}|
 }t|| d S )Nz2017-06-30 01:31:00g*_c@z2017-06-30 01:32:00)closetimedata.parquetzdatetime64[us]r.   r   Fdrop)r:   	Timestampstrr;   	set_indexr   r   r   r   r   rU   rV   rW   )r"   ru   pathdfxtdfxr%   	result_dfr'   r'   r(    test_index_column_name_duplicate  s$   


r   c           	      C   s   d}t t|}tjjg d|gddgd}tjd|i|d}tj|}| d }t	|| t
|}||s9J | }t|| d S )	Nr-   )rw   rw   rx   foobarsome_numbersrL   numbers)r   zdup_multi_index_levels.parquet)rP   r   r:   rN   from_arraysr;   r   r   r   r   r   r?   rU   rV   rW   )	r"   num_rowsr   r   r#   tabler$   result_tabler   r'   r'   r(    test_multiindex_duplicate_values:  s   

r   c                 C   sB   d}t jt|dd ddd}t| d }| }t|| d S )N  carat        cut  color  clarity  depth  table  price     x     y     z
 0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
 0.21    Premium      E      SI1   59.8   61.0    326  3.89  3.84  2.31
 0.23       Good      E      VS1   56.9   65.0    327  4.05  4.07  2.31
 0.29    Premium      I      VS2   62.4   58.0    334  4.20  4.23  2.63
 0.31       Good      J      SI2   63.3   58.0    335  4.34  4.35  2.75
 0.24  Very Good      J     VVS2   62.8   57.0    336  3.94  3.96  2.48
 0.24  Very Good      I     VVS1   62.3   57.0    336  3.95  3.98  2.47
 0.26  Very Good      H      SI1   61.9   55.0    337  4.07  4.11  2.53
 0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
 0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39\s{2,}r   pythonsep	index_colheaderenginezv0.7.1.parquet)r:   read_csvr   r   r   rU   rV   rW   datadirexpected_stringexpectedr   r   r'   r'   r(   &test_backwards_compatible_index_namingP  s   r   c                 C   sJ   d}t jt|dg dddd }t| d }| }t|| d S )Nr   r   cutcolorclarityr   r   r   zv0.7.1.all-named-index.parquet)	r:   r   r   r   
sort_indexr   rU   rV   rW   r   r'   r'   r(   1test_backwards_compatible_index_multi_level_namede  s   
r   c                 C   s\   d}t jt|dg dddd }|jg d|_t| d }| }t	
|| d S )	Nr   r   r   r   r   r   )r   Nr   zv0.7.1.some-named-index.parquet)r:   r   r   r   r   r   	set_namesr   rU   rV   rW   r   r'   r'   r(   6test_backwards_compatible_index_multi_level_some_named~  s   r   c              	   C   s   t dt tjkrtd tg dg dtjddddd	}tjjg d	tjddddgd
d gd|_	| d }t
|}| }t|| t
|dgd}| }t||dg jdd d S )Nz2.2.0zRegression in pandas 2.2.0r      r-   )g?g?g333333?z
2017-01-01r-   zEurope/Brussels)periodstzabcr   rL   z'v0.7.1.column-metadata-handling.parquetr   rl   Tr   )r   r:   __version__pytestskipr;   
date_rangerN   r   r   r   rU   rV   rW   reset_index)r   r   r   r   r   r'   r'   r(   2test_backwards_compatible_column_metadata_handling  s,   
r   c                  C   s   t jddgddggddgd} | d d| d< | dg} tj| }t }t	|| t
|  }t|jt js@J |j| jsIJ d S )	Nr   r   r   dc1c2rl   category)r:   r;   astyper   r   r   r   rb   r   rA   rT   rc   rU   
isinstancer   CategoricalIndexr?   )r#   r   bosref_dfr'   r'   r(   )test_categorical_index_survives_roundtrip  s   r   c                  C   sh   t dt jg dg dddi} tj| }t }t|| |	 }t
| }t||  d S )Nr   )r   r   r   r   )r   r   r   T)
categoriesordered)r:   r;   Categoricalr   r   r   rb   r   rA   rc   rT   rU   rV   rW   )r#   r   r   contentsr   r'   r'   r(   )test_categorical_order_survives_roundtrip  s   

r   c                  C   s   t d gd dgd d} | ddd}tj| }tj|}t }tj||ddd t	|
 }|d |d sAJ |d	 |d	 sLJ d S )
Nr   g      ?)colr*   r   r^   rH   )r`   
chunk_sizer   r   )r:   r;   r   r   r   r   rb   r   rA   
read_tablerc   r?   )r#   df_categoryr   	table_catrg   r   r'   r'   r(   *test_pandas_categorical_na_type_row_groups  s   r   c                  C   s   t jg ddd} g d}tdtjj| |di}t }t	t
|| t|  }|jjdks8J |jjj|k sCJ t|| d S )N)r   r   r   r   r   rI   r   r   r.   )rw   rx   ry   x)r   r   )r<   arrayr:   r;   r   
from_codesr   rb   r   rA   r   r   rc   rU   r   r/   catr   allrV   rW   )codesr   r#   rg   r   r'   r'   r(   !test_pandas_categorical_roundtrip  s   
r   c                 C   s   t tjt dk rtd tjdg didd}|d}tdg di}|d}t|d 	 t|d 	 ks@J t|d j
jj	 t|d j
jj	 ksZJ t| d }tt|| t| }t|| d S )	Nz1.3.0z:PyArrow backed string data type introduced in pandas 1.3.0r   )rw   rx   rw   zstring[pyarrow]r.   r   zcat.parquet)r   r:   r   r   r   r;   r   r   r   	to_pylistr   r   valuesr   r   rA   r   r   rU   rV   rW   )r"   rB   rC   r   r   r'   r'   r(   )test_categories_with_string_pyarrow_dtype  s    


(r   c                 C   s   t dg dd}|d d|d< t|}tj|t| d dgd tt| d 	 }t
|dg |dg  t|t| d	  tt| d	 	 }t
|dg |dg  t|t| d
  tt| d
 	 }t
|dg |dg  d S )Nr   r   partr   r   Int64case1r   partition_colscase2r   )r:   r;   r   r   r   r   write_to_datasetr   r   rU   rV   rW   rA   )r"   r#   r   r   r'   r'   r(   5test_write_to_dataset_pandas_preserve_extensiondtypes  s   
r   c                 C   s  t g dg dd}t jg ddd|_t|}|ddg  }|d d	|d< tj	|t
| d
 dgd tt
| d
  }t|| t	|t
| d  tt
| d  }t|| t|t
| d  tt
| d  }t|| d S )N)r   r   r   r   r   r   idxr   r   r   r   r   r   r   r   )r:   r;   Indexr   r   r   copyr   r   r   r   r   rU   rV   rW   rA   )r"   r#   r   df_catr   r'   r'   r(   +test_write_to_dataset_pandas_preserve_index"  s    
r  r5   )TFNmetadata_fname	_metadata_common_metadatac                    sL  d}d}| t   }|  g }g }g }t|D ]L}	t||	d}
tjtj|	| |	d | dddd|
_|d	|	 }t
jj|
|d	}|d }|jjd u sOJ t|| || ||
 || qt
jj|
|d	}t|j||  t|}d
dg |j d }t fdd|D }|dur|
jjnd |j_t|| d S )Nr4   )r   r   r{   r.   r   r  z
{}.parquetr[   r>   rk   rl   c                    s   g | ]}|  qS r'   r'   ).0r   rl   r'   r(   
<listcomp>g  s    z<test_dataset_read_pandas_common_metadata.<locals>.<listcomp>F)r   mkdirr   r   r:   r  r<   r=   r   formatr   r   r   replace_schema_metadatar   r   r   appendr   write_metadataParquetDatasetrT   rU   concatr   rV   rW   )r"   r5   r  nfilesr   dirpath	test_dataframespathsir#   r   r   table_for_metadatadatasetr   r   r'   rl   r(   (test_dataset_read_pandas_common_metadata;  s>   





r  c                 C   sV   t dg di}| d }t|| tjdtt| t d}|t	
|s)J d S )Nr   r   r   )
filesystem)r:   r;   r   r   rT   r   r   r   r?   r   r   )r"   r#   r$   r   r'   r'   r(   %test_read_pandas_passthrough_keywordsm  s   
r  c                 C   s   t t ddgddggt ddgd}| d }tt t }ttd	|td
t g}tj	||}t
|| t| }t|| d S )N)id	something)value2else)r  
something2)valueelse2rw   rx   )col1col2r   r%  r&  )r:   r;   Seriesr   map_r,   r   r7   r   r   r   r   rT   rU   rV   rW   )r"   r#   r$   udtr   r%   r   r'   r'   r(   test_read_pandas_map_fields}  s   "
r*  )<r   r   numpyr<   ImportErrorr   pyarrowr   
pyarrow.fsr   r   pyarrow.utilr   pyarrow.vendored.versionr   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   r   pandasr:   pandas.testingtestingrV   r	   r
   mark
pytestmarkr)   rG   rZ   r\   ri   rm   rn   rv   r   r   filterwarningsr   r   r   r   r   r   r   r   r   r   r   r   r   r  parametrizer  r  r*  r'   r'   r'   r(   <module>   s   









,














/
