o
    䯪g                     @   s  d dl Z d dlZd dlZd dlZd dlZzd dlZW n ey%   dZY nw d dlZd dl	m
Z
 d dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ zd dlmZ d dlmZm Z m!Z! W n eys   dZY nw zd dl"Z#d dl$m%Z& W n ey   d Z#Z&Y nw ej'jej'j(gZ)dd Z*ej'j"dd	 Z+ej'j"d
d Z,ej'j"dd Z-ej'j"dd Z.ej'j/e0e1fddej'j"dd Z2ej'j"dd Z3ej'j"dd Z4ej'j"dd Z5ej'j"dd Z6ej'j"dd Z7ej'j"ej'8ddgdgge9d d!k e9d"d#d!k e9d"d$:e; d!k fej'8d%d&d'd( Z<ej'j"d)d* Z=ej'j>d+d, Z?ej'j>d-d. Z@ej'j"d/d0 ZAej'j"ej'j>d1d2 ZBd3d4 ZCd5d6 ZDd7d8 ZEej'j"d9d: ZFej'j"d;d< ZGej'j"d=d> ZHej'j"d?d@ ZIej'j"dAdB ZJej'j"dCdD ZKddGdHZLdIdJ ZMej'j"ej'8dKdLdMgdNdO ZNej'j"dPdQ ZOej'j"dRdS ZPej'j"ej'8dKdLdMgdTdU ZQdVdW ZRdXdY ZS			ddZd[ZT	dd\d]ZUej'j"d^d_ ZVej'j"d`da ZWej'j"dbdc ZXej'j"ddde ZYej'j"dfdg ZZej'j"ej'j>dhdi Z[ej'j"ej'j>ej'j\ej]djkdkdldmdn Z^ej'j"ej'j>dodp Z_ej'j"dqdr Z`ddtduZaej'j"dvdw Zbej'j"dxdy Zcdzd{ Zdd|d} Zed~d Zfdd Zgdd Zhdd Zidd Zjej'8dddd Zkej'j"dd ZldS )    N)FileSelector
FileSystemLocalFileSystemPyFileSystemSubTreeFileSystemFSSpecHandler)util)guid)_read_table_test_dataframe_write_tablec                 C   s~   t dg di}| d }|  |d }t|t| tj|t d}||s,J tjdt	
| d}||s=J d S )Na         data_dirdata.parquet
filesystemzdata_dir/data.parquet)patablemkdirpqwrite_tablestr
read_tabler   equalsr   _filesystem_uri)tempdirr   	directorypathresult r#   ^/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/pyarrow/tests/parquet/test_dataset.pytest_filesystem_uri;   s   
r%   c                 C   s   t  }t||  d S N)r   _partition_test_for_filesystem)r   localr#   r#   r$   test_read_partitioned_directoryN   s   r)   c                 C   s@   t  }| }t|| t|}|jdgd}|jdgksJ d S )Nvaluescolumns)r   r'   r   ParquetDatasetreadcolumn_names)r   r(   	base_pathdatasetr"   r#   r#   r$   'test_read_partitioned_columns_selectionT   s   

r2   c                 C   s  t  }| }ddg}g d}ddg}d|gd|gd|gg}ttj|d	d
dtttj|td
ddtttj|dd
ddtdd}t	|||| t
j||g dd}| }	|	 jdd}
d|
d jvsqJ d|
d jvszJ d|
d jvsJ g dddgg}t
j|||d}| }	|	 jdd}
|
d dk|
d dk@ |
d dk@ }t|
d dk|
d dk@ }| dksJ | dksJ |
jd | |  ksJ dggdggfD ]}t
j|||d}| jdksJ qd S )Nr   r   r   bcTFintegerstringbooleani4dtype      r   boolr      r6   r7   r8   r*   ))r6   =r   )r7   !=r4   )r8   ==Truer   filtersdropr4   )r6   rA   r   )r8   rC   FalserD   rI   )r7   rC   s   1 a)r7   rC   z1 a)r   pd	DataFramenparrayrepeattileobjectarange_generate_partition_directoriesr   r-   r.   	to_pandasreset_indexr*   sumshapenum_rows)r   r(   r0   integer_keysstring_keysboolean_keyspartition_specdfr1   r   	result_dfrF   df_filter_1df_filter_2r#   r#   r$   test_filters_equivalencya   sh   



r`   c                 C      t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| tj||d	d
gd}|	 }|
 jddjdd}	dd tt|	d jD }
|
ddgksZJ d S )Nr   r   r   r      integersr=   r9   r:   indexrd   rf   r+   )rd   <rc   )rd   >r   rE   byTrG   c                 S   s   g | ]}|qS r#   r#   .0xr#   r#   r$   
<listcomp>   s    z9test_filters_cutoff_exclusive_integer.<locals>.<listcomp>r   r   r   rJ   rK   rL   rQ   rM   rR   r   r-   r.   rS   sort_valuesrT   mapintr*   r   r(   r0   rX   r[   Nr\   r1   r   r]   result_listr#   r#   r$   %test_filters_cutoff_exclusive_integer   6   rv   z5Loss of type information in creation of categoricals.)raisesreasonc              	   C   s  t  }| }tdddtdddtdddtdddtdddg}d|gg}d	}tjt|tj|d
ddddgd}t|||| t	j
||ddgd}| }| jddjdd}	tjtjtdddgd
dtj|d
dd}
|	d j|
ksJ d S )Ni  rc   	   
            datesr=   
datetime64r:   )rf   r   rf   r+   )r   rg   z
2018-04-12)r   rh   z
2018-04-10rE   ri   TrG   
categories)r   datetimedaterJ   rK   rL   rQ   rM   rR   r   r-   r.   rS   rp   rT   Categoricalr*   )r   r(   r0   	date_keysr[   rt   r\   r1   r   r]   expectedr#   r#   r$   &test_filters_cutoff_exclusive_datetime   sF   r   c              	   C   sp   | d }t t jddddtddj|dd tj|d	d
tdddfgd}|d	 g dks6J d S )Nztimestamps.parquetz
2020-01-01r{   D)periodsfreq)r   idT)use_deprecated_int96_timestampsr   <=i  r   r=   rF   r   rb   )
rJ   rK   
date_rangerange
to_parquetr   r   r   column	to_pylist)r   r!   r   r#   r#   r$   test_filters_inclusive_datetime   s   r   c                 C   ra   )Nrb   rd   r=   r9   r:   re   rf   r+   )rd   r   r   )rd   z>=r   rE   ri   TrG   c                 S   s   g | ]}t |qS r#   )rr   rk   r#   r#   r$   rn   *      z2test_filters_inclusive_integer.<locals>.<listcomp>r   r   ro   rs   r#   r#   r$   test_filters_inclusive_integer  rw   r   c                 C   s|  t  }| }ddg}g d}ddg}d|gd|gd|gg}ttj|d	d
dtttj|td
ddtttj|dd
ddtdd}t	|||| t
j||dgd}| }	|	 jdd}
d|
d jv spJ d|
d jv syJ d|
d jvsJ t
j||dddgfddddhfgd}| }	|	 jdd}
d|
d jvsJ d|
d jvsJ d|
d jvsJ d S )Nr   r   r3   TFr6   r7   r8   r9   r:   r<   r=   r   r>   r   r?   r@   )r7   inabrE   rG   r   r4   r5   r   )r7   r   r   r4   znot inrI   )r   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   r   r-   r.   rS   rT   r*   )r   r(   r0   rX   rY   rZ   r[   r\   r1   r   r]   r#   r#   r$   test_filters_inclusive_set.  sH   
r   c                 C   sV  t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| tt	 t
j||d	gd
 W d    n1 sDw   Y  tt t
j||dgd
 W d    n1 sbw   Y  t
j||ddt fgd
}| jdks}J t
j||dddhfgd
}tt | jdksJ W d    d S 1 sw   Y  d S )Nrb   rd   r=   r9   r:   re   rf   r+   )rd   r   r   rE   )rd   z=<r   r   r   rB   r   )r   rJ   rK   rL   rQ   rM   rR   pytestrx   	TypeErrorr   r-   
ValueErrorsetr.   rW   NotImplementedError)r   r(   r0   rX   r[   rt   r\   r1   r#   r#   r$   test_filters_invalid_pred_op]  sJ   "r   c                 C   s   t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| d	}tjt	|d
 t
j||dgd  W d    d S 1 sKw   Y  d S )Nrb   rd   r=   r9   r:   re   rf   r+   z1No match for FieldRef.Name\(non_existent_column\)match)non_existent_columnrg   r   rE   )r   rJ   rK   rL   rQ   rM   rR   r   rx   r   r   r-   r.   )r   r(   r0   rX   r[   rt   r\   msgr#   r#   r$   test_filters_invalid_column  s&   
"r   rF   )rd   rg   r   rd   r   nestedr   r4   read_method)r   read_pandasc              	   C   s   t t|}t }| }g d}d|gg}t|}tt|tj|ddtdd t	|D d}	t
||||	 t||d}
||fi |
}|jd	ksNJ d S )
Nrb   rd   r9   r:   c                 S   s   g | ]	}|t |d qS )r   )r   rl   ir#   r#   r$   rn         z+test_filters_read_table.<locals>.<listcomp>)rf   rd   r   rE   r   )getattrr   r   lenrJ   rK   rL   rQ   rM   r   rR   dictrW   )r   rF   r   r.   r(   r0   rX   r[   rt   r\   kwargsr   r#   r#   r$   test_filters_read_table  s    
	r   c           	      C   s   t  }| }ddg}d|gg}d}tjt|tj|dddddgd	}t|||| t|}|	 }|
d |ks?J d S )
N2019_22019_3	year_weekr   rP   r:   )rf   r   rf   r+   )r   rJ   rK   rL   rQ   rM   rR   r   r-   r.   r   r   )	r   r(   r0   rY   r[   rt   r\   r1   r"   r#   r#   r$   $test_partition_keys_with_underscores  s    
r   c                 C   sN   | \}}|d }t dg di}t|||d t||d}||s%J d S Nz/test.parquetr   r   r   r   r   r   r
   r   )s3_example_s3fsfsr!   r   r"   r#   r#   r$   test_read_s3fs     r   c                 C   sN   | \}}|d }t dg di}t|||d t||d}||s%J d S r   r   )r   r   r    r!   r   r"   r#   r#   r$   test_read_directory_s3fs  r   r   c                 C   sJ   t | d }tdg di}t|| t|g }||s#J d S )Nr   r   r   )r   r   r   r   r   r-   r.   r   )r   	data_pathr   r"   r#   r#   r$   test_read_single_file_list  s
   
r   c                 C   s   | \}}t || d S r&   )r'   r   r   r!   r#   r#   r$   $test_read_partitioned_directory_s3fs  s   r   c                 C   s  ddg}g d}d|gd|gg}d}t jt|tj|ddd	tttj|tdd
dtj	|dg dd}t
| ||| tj|| d}| }| jddjdd}	|jddjddj|	jd}
|
d d|
d< |
d d|
d< |	jg dk sJ t|	|
 d S )Nr   r   r3   foobarr?   r9   r:   r<   r=   r   )rf   r   r   r*   r+   r   rf   ri   TrG   category)rf   r*   r   r   )rJ   rK   rL   rQ   rM   rN   rO   rP   randomrandnrR   r   r-   r.   rS   rp   rT   reindexr,   astypealltmassert_frame_equal)r   r0   foo_keysbar_keysr[   rt   r\   r1   r   r]   expected_dfr#   r#   r$   r'      s>   

r'   c                    sX   t tsttt tdtdd fdd|dg  d S )Npathsepsep/c              	      sl  | \}}|D ]}|||fg } t| d||g}| | d krddlm}  |t g}	t|}
tj	
|
}|	}t|| W d    n1 sWw   Y  |	j|jksgJ |	j|jksrJ  |dg}|}W d    n1 sw   Y  q||d |  |dg}|}W d    n1 sw   Y  qd S )Nz{}={}r   r   )FileType_SUCCESS)joinr   format
create_dir
pyarrow.fsr   r	   _filter_partitionr   Tablefrom_pandasopen_output_streamr   get_file_infotypeNotFoundFile)base_dirlevel	part_keysnamer*   valuethis_part_keys	level_dirr   	file_pathfiltered_df
part_tableffile_successDEPTH_visit_levelr\   r   r[   r   r#   r$   r   2  s<   


z5_generate_partition_directories.<locals>._visit_levelr   )
isinstancer   r   r   r   r   )r   r   r[   r\   r#   r   r$   rR   '  s   
 rR   c                 C   sl   t jt| td}g }|D ]\}}|| t|tjtjfr$t	|}|| | |kM }q| | j
|ddS )Nr:   r   )axis)rL   onesr   r>   appendr   r   r   rJ   	TimestamprH   )r\   r   	predicateto_dropr   r   r#   r#   r$   r   U  s   

r   c                 C   s   | d }|   tjtdg di}t||d  | d }|   tjtdg di}t||d  tj| dggd}|	d
tg dgsSJ d S )	NzA=0Br   r   zA=1r3   )ArC   r   r   )r   r   r   r   rJ   rK   r   r   r   r   r   chunked_array)r   dir1table1dir2table2r   r#   r#   r$   "test_filter_before_validate_schemae  s   $r   c                    sx  d}d}| t   }|  g }g }t|D ].}t||d}|d tj|d< |d| }tj	
|}	t|	| ||	 || q|d   ddd	}
|
| t|} |s_J d
dd jd g} fdd|D }tj||d}tj	j fdd|D | jjd}||sJ tj|dd t||djd d d df }| dt   }tj	
|}t|| d S )Nr{   r=   seeduint32
{}.parquetz_SUCCESS.crcTc                 [   s    t j| fi |}|j||dS )N)r,   use_threads)r   r-   r.   )pathsr,   r  r   r1   r#   r#   r$   read_multiple_files  s   z5test_read_multiple_files.<locals>.read_multiple_filesr   r      r   c                    s   g | ]}  |jqS r#   )fieldr   r   r"   r#   r$   rn         z,test_read_multiple_files.<locals>.<listcomp>r+   c                    s   g | ]}  |qS r#   )r   r   r  r#   r$   rn         )namesmetadata)r  rc   )NT)r	   r   r   r   r   rL   int64r   r   r   r   r   r   touchconcat_tablesr   num_columnsr   r   from_arraysschemar
  iloc)r   nfilessizedirpath	test_datar  r   r\   r!   r   r  r   to_read	col_namesout	bad_applebad_apple_pathtr#   r  r$   test_read_multiple_files{  s@   




r  c                    s(  d}d}| t   }|  g }g }g }t|D ]:}t||d}t|| |d | |_d|j_|d| }	t	j
|}
t|
|	 ||
 || ||	 qt|}ddg |j d }t fd	d
|D }t|| |jt d }|j|jksJ t|j|jd| d S )Nr=   r   r   rf   r   uint8stringsr+   c                    s   g | ]}|  qS r#   r#   rk   r+   r#   r$   rn     r   z,test_dataset_read_pandas.<locals>.<listcomp>)r	   r   r   r   rL   rQ   rf   r   r   r   r   r   r   r   r   r-   r   rS   rJ   concatr   r   r   rV   r   r,   )r   r  r  r  r  framesr  r   r\   r!   r   r1   r"   r   r#   r+   r$   test_dataset_read_pandas  s2   




r!  c                 C   sj   | t   }|  tddd}|dd }tj|}t||dd tj	|dd}|
 |s3J d S )	Nr{   r   r   r   2.6versionT)
memory_map)r	   r   r   r   r   r   r   r   r   r-   r.   r   )r   r  r\   r!   r   r1   r#   r#   r$   test_dataset_memory_map  s   
r&  c                 C   s   | t   }|  tddd}|dd }tj|}t||dd t	t
 tj|dd W d    n1 s:w   Y  d	D ]}tj||d}| |sSJ qAd S )
Nr{   r   r   r   r"  r#  i)buffer_size)   i   )r	   r   r   r   r   r   r   r   r   rx   r   r   r-   r.   r   )r   r  r\   r!   r   r'  r1   r#   r#   r$   #test_dataset_enable_buffered_stream  s"   
r)  c                 C   s   | t   }|  tddd}|dd }tj|}t||dd dD ] }tj	||d}|
 |s7J tj||d}||sEJ q%d S )	Nr{   r   r   r   r"  r#  )TF)
pre_buffer)r	   r   r   r   r   r   r   r   r   r-   r.   r   r   )r   r  r\   r!   r   r*  r1   actualr#   r#   r$   test_dataset_enable_pre_buffer  s   
r,  r{   r=   c                 C   sN   g }g }t |D ]}t||d}| d| }|t|| || q|S )Nr   r   )r   r   r   r   r   )r0   r  
file_nrowsr  r  r   r\   r!   r#   r#   r$   _make_example_multifile_dataset  s   r.  c                 C   s(   dd |D }t |t | jksJ d S )Nc                 S   s   g | ]}t | qS r#   )r   as_posix)rl   r!   r#   r#   r$   rn   ,  r  z)_assert_dataset_paths.<locals>.<listcomp>)r   files)r1   r  r#   r#   r$   _assert_dataset_paths+  s   r1  
dir_prefix_.c                 C   sJ   | t   }|  t|ddd}|d|   t|}t|| d S )Nr{   r=   r  r-  z	{}staging)r	   r   r.  r   r   r-   r1  r   r2  r  r  r1   r#   r#   r$   test_ignore_private_directories0  s   

r7  c                 C      | t   }|  t|ddd}|d d}|d W d    n1 s'w   Y  |d d}|d W d    n1 sCw   Y  t|}t|| d S )Nr{   r=   r5  z	.DS_Storewbs	   gibberishz.privater	   r   r.  openwriter   r-   r1  r   r  r  r   r1   r#   r#   r$   test_ignore_hidden_files_dotA     

r>  c                 C   r8  )Nr{   r=   r5  _committed_123r9  s   abcd_started_321r:  r=  r#   r#   r$   #test_ignore_hidden_files_underscoreT  r?  rB  c                 C   sZ   | d | t  }|jdd t|ddd}t|}t|| t|}t|| d S )Nz{0}dataTparentsr{   r=   r5  )r   r	   r   r.  r   r-   r1  r6  r#   r#   r$   /test_ignore_no_private_directories_in_base_pathg  s   


rE  c                 C   s   dgd dgd  }t jt tt|t | gddgd}tj|t| dgd | d }|	  tj|t|dgd tj
| d	gd
}||sNJ d S )Nxxxr   yyyrf   _partr	  partition_cols_private_duplicate_private)ignore_prefixes)r   r   rM   r   r   dictionary_encoder   write_to_datasetr   r   r   r   )r   partr   private_duplicater.   r#   r#   r$   test_ignore_custom_prefixesz  s"   rS  c                 C   sB   | d }|   t|}| }|jdksJ |jdksJ d S )Nr1   r   )r   r   r-   r.   rW   r  )r   	empty_dirr1   r"   r#   r#   r$   test_empty_directory  s   
rU  c                 C   s  dd l }dd lm} dd lm} |tdtdttdtj	gd tj
ddddd	d
}|j }ddg}	tjj||ddd}
|j|
| |	|d tjt| d}|d urw||d}||
j| W d    n1 sqw   Y  n||
j| |j| |d}t|jj}|t|
jjksJ | }| }|j }|	|dt|	 d  ksJ || }|	D ]}|| d||< q|r|dj ! }|d ||d< |"|| d S )Nr   
aaabbbbccc
eefeffgeeer{   
2017-01-01
2017-01-11datetime64[D]r:   datetime64[ns])group1group2numnanr   r\  r]  F)r  safepreserve_indexr   _common_metadatar9  r   r   )#pandaspandas.testingtestingpyarrow.parquetparquetrK   listr   rL   r_  rQ   r   r,   tolistr   r   r   rP  osr!   r   r   r;  write_metadatar  r-   r   r	  r.   rS   r   r  r   to_pandas_dtyper   )r0   r   r  
index_namerJ   r   r   	output_dfcolspartition_byoutput_tablemetadata_pathr   r1   dataset_colsinput_tableinput_dfinput_df_colscolexpected_date_typer#   r#   r$   &_test_write_to_dataset_with_partitions  sV   




rz  c              
   C   s  dd l }dd lm} |tdtdttdtjddddd	d
}|j	
 }tj|}|d u r8t }nt|tsCtt|}d}t|D ]
}|j|| |d qItt| ddd}	||	}
dd |
D }t||ksqJ |j| |d }| }| }|| }t|| d S )Nr   rV  rW  r{   rX  rY  rZ  r:   r[  )r\  r]  r^  r   r=   r   FT)allow_not_found	recursivec                 S   s   g | ]
}|j d r|qS )z.parquet)r!   endswith)rl   infor#   r#   r$   rn     s    z8_test_write_to_dataset_no_partitions.<locals>.<listcomp>)rd  rg  rh  rK   ri  r   rL   rQ   r   r,   rj  r   r   r   r   r   r   r   r   rP  r   r   r   r   r-   r.   rS   drop_duplicatesr   r   )r0   r   rJ   r   ro  rp  rr  nr   selectorinfosoutput_filesru  rv  r#   r#   r$   $_test_write_to_dataset_no_partitions  sH   




r  c                 C      t t|  d S r&   rz  r   r   r#   r#   r$   %test_write_to_dataset_with_partitions     r  c                 C   sr   t t jdt  dt jdt  dt jdt  dt jdt  dt jdt jdddg}tt| |d	 d S )
Nr\  )r   r]  r^  r_  r   us)unitr  )	r   r  r  r7   r  int32	timestamprz  r   )r   r  r#   r#   r$   0test_write_to_dataset_with_partitions_and_schema
  s   
r  c                 C   s   t t| dd d S )Nrn  )rn  r  r  r#   r#   r$   4test_write_to_dataset_with_partitions_and_index_name  s   
r  c                 C   r  r&   )r  r   r  r#   r#   r$   #test_write_to_dataset_no_partitions  r  r  c                 C   s   t | d  t| d  d S )Ntest1test2)rz  r  r  r#   r#   r$   test_write_to_dataset_pathlib   s   r  c                 C   s   |\}}t jtdd t| d |d W d    n1 sw   Y  t jtdd t| d |d W d    d S 1 s>w   Y  d S )Nz"path-like objects are only allowedr   r  r   r  )r   rx   r   rz  r  )r   r   r   r3  r#   r#   r$   &test_write_to_dataset_pathlib_nonlocal&  s   "r  win32z,test fails because of unsupported characters)ry   c                 C      | \}}t ||d d S Nr   )rz  r   r#   r#   r$   *test_write_to_dataset_with_partitions_s3fs5  s   
r  c                 C   r  r  )r  r   r#   r#   r$   (test_write_to_dataset_no_partitions_s3fsA  s   
r  c                 C   sT   t dg di}tj|}t| }tj||t d t	|}|
|s(J d S )Nr   r   r   )rJ   rK   r   r   r   r   r   rP  r   r   r   )r   r\   r   r!   r"   r#   r#   r$    test_write_to_dataset_filesystemJ  s   
r  d   c                 C   s   | d }t  }tjt|tj|dddgd}tj	|}d}t
||j}t|D ]}|| q.W d    n1 s@w   Y  t
|}	|	jj|ksRJ | d }
|t|
}t
|j| W d    n1 sow   Y  t
j| |d}|S )	Nr   )rf   r*   rf   r*   r+   r   	_metadatar   )r   rJ   rK   rL   rQ   r   r   r   r   r   r   ParquetWriterr  r   r   ParquetFiler
  num_row_groupsr   r   rl  r-   )r   rt   r!   r(   r\   r   
num_groupswriterr   readerrs  r   r1   r#   r#   r$   _make_dataset_for_picklingU  s2   

r  c                    s$    fdd}t | }||sJ d S )Nc                    s   |    | kS r&   )loadsdumps)objpickle_moduler#   r$   is_pickleables  s   z*test_pickle_dataset.<locals>.is_pickleable)r  )r   r  r  r1   r#   r  r$   test_pickle_datasetq  s   r  c                 C   sl   | d }t g dg dg dd}tj|}tj|t|ddgd t|	 }t
||d	  d S )
Nz
ARROW-3208)rc  r{   g      @r    r   g333333=@)rc  r{   r   r  r  r   r|   )r   r   r   r   r   r   r   )onetwothreer  r  )	root_pathrK  zoutput.parquet)rJ   rK   r   r   r   r   rP  r   r-   r.   r   )r   r!   r\   r   r#   r#   r$   test_partitioned_datasetz  s   r  c                 C   s(  | d }t jdd tdD d gdgd}t jdd tdD d gdgd}tj|t|d	 tj|t|d	 tj|dgd
 }|d d	 |d d	 g}|d j
dks_J |d d|d d}}||d r||d s~J d S ||d sJ ||d sJ d S )NzARROW-3325-datasetc                 S      g | ]}t d qS r{   r   randsr   r#   r#   r$   rn     r  z0test_dataset_read_dictionary.<locals>.<listcomp>r=   r{   f0rI  c                 S   r  r  r  r   r#   r#   r$   rn     r  )r  )read_dictionaryr   r   r   )r   r   r   r   rP  r   r-   r.   chunkrO  
num_chunksr   )r   r!   t1t2r"   	ex_chunksc0c1r#   r#   r$   test_dataset_read_dictionary  s&   $$r  c                 C   s   t dt g dt  i}t|| d  t|| d  t dg}tj| d |d}t jdg di|d}||s@J tj| |d}t jdg di|d}||sYJ tj	| |d}t jdg di|d}|
 |stJ d S )Nr   r   zdata1.parquetzdata2.parquet)r   r  r  )r   r   r   r   r   r   )r   r   rM   r  r   r   r  r   r   r-   r.   )r   r   r  r"   r   r#   r#   r$   test_read_table_schema  s   r  c                 C   s   t t g dt  t g dt  d}t|| d  tj| d ddgd}t ddg}|j	ddgks;J |j|ksBJ d S )Nr   r   r   r   r+   )r   r  )
r   r   rM   r  r  r   r   r   r  r/   )r   r   r"   expected_schemar#   r#   r$   *test_read_table_duplicate_column_selection  s   r  c                 C   s   dd l m} | d }|d d d jdd tdg d	i}t|t|d d d d
  |jg dd}tj	t||d}|j
g dksIJ tjt||d }|j
g dks]J d S )Nr   test_partitioning20121001TrC  r   r   r   )yearmonthday)field_names)partitioning)r   r  r  r  )pyarrow.datasetr1   r   r   r   r   r   r   r  r   r/   r-   r.   )r   dsr  r   rQ  r"   r#   r#   r$   test_dataset_partitioning  s$   r  c                 C   sZ   t dg di}t|| d  tt| t }tjd|d}| }|	|s+J d S )Nr   r   r   r4  r   )
r   r   r   r   r   r   r   r-   r.   r   )r   r   r   r1   r"   r#   r#   r$   #test_parquet_dataset_new_filesystem  s   r  c                 C   st   t d}|d}tdg di}t|| d  t| dd}tj	||d}|d	 }|j
d
 j|ks8J d S )Nfsspecfiler   r   r   \r   r   z/data.parquetr   )r   importorskipr   r   r   r   r   r   replacer-   	fragmentsr!   )r   r  r   r   r!   r1   r   r#   r#   r$   6test_parquet_dataset_partitions_piece_path_with_fsspec  s   

r  c                    s   t dg di}| d }g   fdd}d}tj||dg||d |d d	 |d
 d	 |d d	 h}tttj }||ksAJ d S )Nr   r   r  c                    s     | j d S r&   )r   r!   )written_filepaths_writtenr#   r$   file_visitor  s   zDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitorzpart-{i}.parquet)r  r  basename_template1zpart-0.parquet23)r   r   r   rP  r   rq   pathlibPath)r   r   r!   r  r  expected_pathspaths_written_setr#   r  r$   .test_parquet_write_to_dataset_exposed_keywords  s   


r  write_dataset_kwarg))r   T)r   Fc                 C   s   ddl m} tdg di}| d }t|j}|\}}|ttjj	vs(J ||j	v s/J t
jj|ddd%}tj||fi ||i |jd \}	}
}|| |ksUJ W d   dS 1 s`w   Y  dS )	zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr   r   zout.parquetwrite_datasetT)autospec)r  r1   r   r   inspect	signaturer  r   rP  
parametersmockpatchrP   
mock_calls)r   r  r  r   r!   r  keyargmock_write_dataset_name_argsr   r#   r#   r$   #test_write_to_dataset_kwargs_passed  s   "r  c                 C   s   t t jg dg ddg dd}t|}| d }tj|| d dgd d	d
 | D }t|dks8J d|vs>J d S )N)r   r4   r   r3   r   r   )catrx  r1   r  rJ  c                 S   s   g | ]	}|  r|jqS r#   )is_dirr   )rl   r   r#   r#   r$   rn   8  r   z;test_write_to_dataset_category_observed.<locals>.<listcomp>r   zcat=c)	rJ   rK   r   r   r   r   rP  iterdirr   )r   r\   r   r!   subdirsr#   r#   r$   'test_write_to_dataset_category_observed*  s   
r  )r{   r=   )NNNr&   )r  )mr   r  rk  r  sysnumpyrL   ImportErrorr   unittest.mockr  pyarrowr   pyarrow.computecomputepcr   r   r   r   r   r   r   pyarrow.testsr   pyarrow.utilr	   rg  rh  r   pyarrow.tests.parquet.commonr
   r   r   rd  rJ   re  rf  r   markr1   
pytestmarkr%   r)   r2   r`   rv   xfailr   AssertionErrorr   r   r   r   r   r   parametrizer  castr  r   r   s3r   r   r   r   r'   rR   r   r   r  r!  r&  r)  r,  r.  r1  r7  r>  rB  rE  rS  rU  rz  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#   r#   r#   r$   <module>   s0   


F
!*

!
.
(







'.

G
%






>
.












