o
    篪g:                     @  sP  d dl mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dl	mZ d d	l	mZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ erd dlmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dlm'Z' G dd deZ(dS )    )annotations)chain)TYPE_CHECKING)Any)Literal)Sequence)ExprKind)native_to_narwhals_dtype)parse_exprs_and_named_exprs)InvalidOperationError)CompliantDataFrame)CompliantLazyFrame)Implementation)check_column_exists)import_dtypes_module)parse_columns_to_drop)parse_version)validate_backend_version)
ModuleType)	DataFrame)Self)SparkLikeExprSparkLikeLazyGroupBySparkLikeNamespace)DType)Versionc                   @  s.  e Zd ZdqddZedrddZedrddZedrddZdsddZdtddZ	duddZ
dvddZdwd!d"Zedxd$d%Zdyd*d+Zdzd.d/Zd{d3d4Zd{d5d6Zd|d9d:Zed}d<d=Zd}d>d?Zd~dCdDZddGdHZddLdMZddRdSZddVdWZddZd[Zdd^d_ZddfdgZddhdiZddndoZdpS )SparkLikeLazyFrameselfr   native_dataframer   backend_versiontuple[int, ...]versionr   implementationr   returnNonec                C  s*   || _ || _|| _|| _t| j| j d S N)_native_frame_backend_version_implementation_versionr   )r   r    r!   r#   r$    r,   Z/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/narwhals/_spark_like/dataframe.py__init__$   s
   zSparkLikeLazyFrame.__init__r   c                 C  ,   | j tju rddlm} |S ddlm} |S )Nr   )	functions)r*   r   SQLFRAMEsqlframe.duckdbr0   pyspark.sql)r   r0   r,   r,   r-   _F2   
   zSparkLikeLazyFrame._Fc                 C  r/   )Nr   )types)r*   r   r1   r2   r6   r3   )r   r6   r,   r,   r-   _native_dtypes<   r5   z!SparkLikeLazyFrame._native_dtypesc                 C  r/   )Nr   )Window)r*   r   r1   r2   r8   r3   )r   r8   r,   r,   r-   _WindowF   r5   zSparkLikeLazyFrame._Windowr   c                 C  s
   | j  S r'   )r*   to_native_namespacer   r,   r,   r-   __native_namespace__P   s   
z'SparkLikeLazyFrame.__native_namespace__r   c                 C  s    ddl m} || j| j| jdS )Nr   r   r!   r#   r$   )narwhals._spark_like.namespacer   r)   r+   r*   )r   r   r,   r,   r-   __narwhals_namespace__S   s   z)SparkLikeLazyFrame.__narwhals_namespace__c                 C  s   | S r'   r,   r;   r,   r,   r-   __narwhals_lazyframe__\   s   z)SparkLikeLazyFrame.__narwhals_lazyframe__c                 C  s   | j | j| j|| jdS Nr=   )	__class__r(   r)   r*   )r   r#   r,   r,   r-   _change_version_   s   z"SparkLikeLazyFrame._change_versiondfc                 C  s   | j || j| j| jdS rA   )rB   r)   r+   r*   )r   rD   r,   r,   r-   _from_native_frameg   s   z%SparkLikeLazyFrame._from_native_frame	list[str]c                 C  s   | j jS r'   )r(   columnsr;   r,   r,   r-   rG   o   s   zSparkLikeLazyFrame.columnsbackend(ModuleType | Implementation | str | Nonekwargsr   c              
   K  s  |t ju rdd l}ddlm} || j t jt|| jddS |d u s(|t j	u rdd l
}ddlm} z|j| j }W nK ty } z?dt|v r}ddlm}	 i }
g }|  }| D ]\}}g |
|< |||	|| jf q]|jj|
||d}n W Y d }~nd }~ww ||t|| jdd	S |t ju rdd l}dd l
}dd
lm} |||j| j t|| jdS d| }t|)Nr   )PandasLikeDataFrameF)r    r$   r!   r#   validate_column_names)ArrowDataFramezat least one RecordBatch)narwhals_to_native_dtypeschema)r!   r#   rL   )PolarsDataFrame)rD   r!   r#   zUnsupported `backend` value: )r   PANDASpandasnarwhals._pandas_like.dataframerK   r(   toPandasr   r+   PYARROWpyarrownarwhals._arrow.dataframerM   Tablefrom_batches_collect_as_arrow
ValueErrorstrnarwhals._arrow.utilsrN   collect_schemaitemsappendfrom_pydictrP   POLARSpolarsnarwhals._polars.dataframerQ   
from_arrow)r   rH   rJ   pdrK   parM   native_pyarrow_frameexcrN   datarP   current_schemakeyvalueplrQ   msgr,   r,   r-   collects   sl   



zSparkLikeLazyFrame.collectcolumn_namesr]   c                 G  s   |  | jj| S r'   )rE   r(   select)r   rr   r,   r,   r-   simple_select   s   z SparkLikeLazyFrame.simple_selectexprsr   named_exprsc                   s   t  g|R i |\}}|s" jj}|g  jg } |S tdd |D s=dd | D }  jj	| S  fddt
| |D }  jj| S )Nc                 s  s    | ]}|t ju V  qd S r'   )r   	TRANSFORM).0	expr_kindr,   r,   r-   	<genexpr>   s    z,SparkLikeLazyFrame.select.<locals>.<genexpr>c                 S  s   g | ]	\}}| |qS r,   )alias)rx   col_namecolr,   r,   r-   
<listcomp>       z-SparkLikeLazyFrame.select.<locals>.<listcomp>c              	     sJ   g | ]!\\}}}|t ju r|   jd |n||qS    )r   AGGREGATIONoverr9   partitionByr4   litr{   rx   r|   r}   ry   r;   r,   r-   r~      s    

")r
   r(   sparkSessioncreateDataFramer7   
StructTyperE   anyr`   aggziprs   )r   ru   rv   new_columns
expr_kindsspark_sessionspark_dfnew_columns_listr,   r;   r-   rs      s    

zSparkLikeLazyFrame.selectc                   sH   t  g|R i |\}} fddt| |D }  j|S )Nc              
     s@   i | ]\\}}}||t ju r|   jd n|qS r   )r   r   r   r9   r   r4   r   r   r;   r,   r-   
<dictcomp>   s    

z3SparkLikeLazyFrame.with_columns.<locals>.<dictcomp>)r
   r   r`   rE   r(   withColumns)r   ru   rv   r   r   new_columns_mapr,   r;   r-   with_columns   s
   
zSparkLikeLazyFrame.with_columns
predicatesconstraintsc                   sN   |     jt| fdd| D  }|| d }| j|}| |S )Nc                 3  s"    | ]\}}  ||kV  qd S r'   )r}   )rx   namevplxr,   r-   rz      s     z,SparkLikeLazyFrame.filter.<locals>.<genexpr>r   )r?   all_horizontalr   r`   _callr(   whererE   )r   r   r   expr	conditionr   r,   r   r-   filter   s   
zSparkLikeLazyFrame.filterdict[str, DType]c                   s    fdd j jD S )Nc                   s$   i | ]}|j t|j j jd qS ))dtyper#   spark_types)r   r	   dataTyper+   r7   )rx   fieldr;   r,   r-   r      s    z-SparkLikeLazyFrame.schema.<locals>.<dictcomp>)r(   rP   r;   r,   r;   r-   rP      s   
zSparkLikeLazyFrame.schemac                 C  s   | j S r'   rO   r;   r,   r,   r-   r_      s   z!SparkLikeLazyFrame.collect_schemarG   strictboolc                 C  s    t | ||d}| | jj| S )N)compliant_framerG   r   )r   rE   r(   drop)r   rG   r   columns_to_dropr,   r,   r-   r      s   zSparkLikeLazyFrame.dropnintc                 C  s"   | j j}| || j j|dS )N)num)r(   r   rE   r   take)r   r   r   r,   r,   r-   head  s   zSparkLikeLazyFrame.headkeysdrop_null_keysr   c                G  s   ddl m} || t||dS )Nr   r   )r   r   r   )narwhals._spark_like.group_byr   list)r   r   r   r   r,   r,   r-   group_by  s   
zSparkLikeLazyFrame.group_byby
descendingbool | Sequence[bool]
nulls_lastc                  sh   t |tr|gt| }|r fdd|D }n	 fdd|D }dd t||D }  jj| S )Nc                 3  $    | ]}|r
 j jn j jV  qd S r'   )r4   desc_nulls_lastasc_nulls_lastrx   dr;   r,   r-   rz     
    
z*SparkLikeLazyFrame.sort.<locals>.<genexpr>c                 3  r   r'   )r4   desc_nulls_firstasc_nulls_firstr   r;   r,   r-   rz   #  r   c                 S  s   g | ]\}}||qS r,   r,   )rx   r}   sort_fr,   r,   r-   r~   (  s    z+SparkLikeLazyFrame.sort.<locals>.<listcomp>)
isinstancer   lenr   rE   r(   sort)r   r   r   r   
sort_funcs	sort_colsr,   r;   r-   r     s   


zSparkLikeLazyFrame.sortsubsetlist[str] | Nonec                 C  s   |  | jj|dS )Nr   )rE   r(   dropna)r   r   r,   r,   r-   
drop_nulls+  s   zSparkLikeLazyFrame.drop_nullsmappingdict[str, str]c                   s8    fddj D }jfdd| D S )Nc                   s   i | ]	}|  ||qS r,   )getrx   colname)r   r,   r-   r   /  r   z-SparkLikeLazyFrame.rename.<locals>.<dictcomp>c                   "   g | ]\}} j ||qS r,   r4   r}   r{   rx   oldnewr;   r,   r-   r~   4     " z-SparkLikeLazyFrame.rename.<locals>.<listcomp>)rG   rE   r(   rs   r`   )r   r   rename_mappingr,   )r   r   r-   rename.  s   
zSparkLikeLazyFrame.renamekeepLiteral['any', 'none']c                C  s4   |dkr
d}t |t| j| | | jj|dS )Nr   zC`LazyFrame.unique` with PySpark backend only supports `keep='any'`.r   )r\   r   rG   rE   r(   dropDuplicates)r   r   r   rp   r,   r,   r-   unique8  s
   zSparkLikeLazyFrame.uniqueotherhow1Literal['inner', 'left', 'cross', 'semi', 'anti']left_onright_onsuffixc           
        s   j }|j }j |j}t|tr|g}ttrgi ttp"g |p%g  fddtt|tp7g D |	fdd
 D } }	|dv r_|	fdd|D  |j|||d	|	S )Nc                   s&   i | ]}|| v r|  n|qS r,   r,   r   )left_columnsr   r,   r-   r   \  s    z+SparkLikeLazyFrame.join.<locals>.<dictcomp>c                   r   r,   r   r   r;   r,   r-   r~   b  r   z+SparkLikeLazyFrame.join.<locals>.<listcomp>>   leftcrossinnerc                   s    g | ]}|pg vr | qS r,   r,   r   )r   r   r,   r-   r~   l  s
    )onr   )r(   rG   r   r]   dictr   r   set
differencers   r`   extendrE   join)
r   r   r   r   r   r   self_nativeother_nativeright_columns	col_orderr,   )r   r   r   r   r   r-   r   D  s8   

zSparkLikeLazyFrame.joinc           	        s   t j} } D ]}|| }||jkr d| d}t|qj}j}t dkr3d}t|	|j
 fdd|D  S )Nz-`explode` operation not supported for dtype `z`, expected List typer   zExploding on multiple columns is not supported with SparkLike backend since we cannot guarantee that the exploded columns have matching element counts.c                   s<   g | ]}| d  krj ||nj ||qS )r   )r4   r}   r{   explode_outer)rx   r|   rG   r   r,   r-   r~     s    z.SparkLikeLazyFrame.explode.<locals>.<listcomp>)r   r+   r_   Listr   r(   rG   r   NotImplementedErrorrE   rs   )	r   rG   dtypesrP   col_to_exploder   rp   native_framerr   r,   r   r-   explodew  s,   


zSparkLikeLazyFrame.exploder   indexvariable_name
value_namec                 C  s   |  | jj||||dS )N)idsvaluesvariableColumnNamevalueColumnName)rE   r(   unpivot)r   r   r   r   r   r,   r,   r-   r    s   zSparkLikeLazyFrame.unpivotN)r   r   r    r   r!   r"   r#   r   r$   r   r%   r&   )r   r   r%   r   )r   r   r%   r   )r   r   r%   r   )r   r   r%   r   )r   r   r#   r   r%   r   )r   r   rD   r   r%   r   )r   r   r%   rF   )r   r   rH   rI   rJ   r   r%   r   )r   r   rr   r]   r%   r   )r   r   ru   r   rv   r   r%   r   )r   r   r   r   r   r   r%   r   )r   r   r%   r   )r   r   rG   rF   r   r   r%   r   )r   r   r   r   r%   r   )r   r   r   r]   r   r   r%   r   )
r   r   r   r]   r   r   r   r   r%   r   )r   r   r   r   r%   r   )r   r   r   r   r%   r   )r   r   r   r   r   r   r%   r   )r   r   r   r   r   r   r   r   r   r   r   r]   r%   r   )r   r   rG   rF   r%   r   )r   r   r   r   r   r   r   r]   r   r]   r%   r   )__name__
__module____qualname__r.   propertyr4   r7   r9   r<   r?   r@   rC   rE   rG   rq   rt   rs   r   r   rP   r_   r   r   r   r   r   r   r   r   r   r  r,   r,   r,   r-   r   #   sB    
		
	

	



E















3#r   N))
__future__r   	itertoolsr   typingr   r   r   r   narwhals._spark_like.utilsr   r	   r
   narwhals.exceptionsr   narwhals.typingr   r   narwhals.utilsr   r   r   r   r   r   r6   r   r3   r   typing_extensionsr   narwhals._spark_like.exprr   r   r   r>   r   narwhals.dtypesr   r   r   r,   r,   r,   r-   <module>   s8    