o
    篪gO                     @  s  d dl mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dl	Z	d dl	m
Z
 d d	l	mZ d d
l	mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  erd dl!m"Z" d dl#Z$d dl%Z&d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d d l1m2Z2 d d!lm3Z3 G d"d# d#e3Z4dS )$    )annotations)chain)TYPE_CHECKING)Any)Literal)SequenceN)ColumnExpression)ConstantExpression)FunctionExpression)ExprKind)native_to_narwhals_dtype)parse_exprs_and_named_exprs
get_duckdb)ColumnNotFoundError)InvalidOperationError)CompliantDataFrame)Implementation)Version)check_column_names_are_unique)generate_temporary_column_name)import_dtypes_module)parse_columns_to_drop)parse_version)validate_backend_version)
ModuleType)Self)
DuckDBExprDuckDBGroupByDuckDBNamespaceDuckDBInterchangeSeries)DType)CompliantLazyFramec                   @  sR  e Zd ZejZdddZdddZdddZdddZ	dddZ
dddZdd#d$Zdd'd(Zdd*d+Zdd/d0Zdd4d5Zd6d7dd9d:Zdd;d<Zdd?d@ZeddBdCZeddDdEZddGdHZddJdKZddLdMZdNdOddPdQZddUdVZddYdZZddbdcZddidjZddkdlZddpdqZ ddvdwZ!ddxdyZ"ddzd{Z#dddZ$d6S )DuckDBLazyFrameselfr   dfduckdb.DuckDBPyRelationbackend_versiontuple[int, ...]versionr   validate_column_namesboolreturnNonec                C  s2   |rt |j || _|| _|| _t| j| j d S N)r   columns_native_frame_version_backend_versionr   _implementation)r'   r(   r*   r,   r-    r7   V/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/narwhals/_duckdb/dataframe.py__init__1   s   
zDuckDBLazyFrame.__init__c                 C  s   | j tjurd}t|| S )Nz=__narwhals_dataframe__ is not implemented for DuckDBLazyFrame)r4   r   V1AttributeError)r'   msgr7   r7   r8   __narwhals_dataframe__@   s   z&DuckDBLazyFrame.__narwhals_dataframe__c                 C  s   | S r1   r7   r'   r7   r7   r8   __narwhals_lazyframe__G   s   z&DuckDBLazyFrame.__narwhals_lazyframe__r   c                 C  s   t  S r1   r   r>   r7   r7   r8   __native_namespace__J   s   z$DuckDBLazyFrame.__native_namespace__r!   c                 C  s   ddl m} || j| jdS )Nr   r    )r*   r,   )narwhals._duckdb.namespacer!   r5   r4   )r'   r!   r7   r7   r8   __narwhals_namespace__M   s   z&DuckDBLazyFrame.__narwhals_namespace__itemstrr#   c                 C  s"   ddl m} || j|| jdS )Nr   r"   )r,   )narwhals._duckdb.seriesr#   r3   selectr4   )r'   rC   r#   r7   r7   r8   __getitem__T   s   zDuckDBLazyFrame.__getitem__backend(ModuleType | Implementation | str | Nonekwargsr   r   c           
      K  s   |d u s	|t ju r!dd l}ddlm} || j t|| jddS |t j	u r@dd l
}ddlm} || j t j	t|| jddS |t ju r\dd l}ddlm} || j t|| jdS d	| }	t|	)
Nr   )ArrowDataFrameF)native_dataframer*   r,   r-   )PandasLikeDataFrame)rL   implementationr*   r,   r-   )PolarsDataFrame)r(   r*   r,   zUnsupported `backend` value: )r   PYARROWpyarrownarwhals._arrow.dataframerK   r3   arrowr   r4   PANDASpandasnarwhals._pandas_like.dataframerM   r(   POLARSpolarsnarwhals._polars.dataframerO   pl
ValueError)
r'   rH   rJ   parK   pdrM   rZ   rO   r<   r7   r7   r8   collect[   s:   


zDuckDBLazyFrame.collectnintc                 C  s   | j | j|ddS NFr-   )_from_native_framer3   limit)r'   r_   r7   r7   r8   head      zDuckDBLazyFrame.headcolumn_namesc                 G  s   | j | jj| ddS ra   )rc   r3   rF   )r'   rg   r7   r7   r8   simple_select   rf   zDuckDBLazyFrame.simple_selectexprsr   named_exprsc                 O  s   t | g|R i |}|s| j| jdddS tdd |D s>tdd | D s>| j| jdd | D ddS tdd |D sRtdd | D rXd	}t|| j| jj	d
d | D  ddS )Nr   Frb   c                 s      | ]	}|j tju V  qd S r1   )
_expr_kindr   	TRANSFORM.0exprr7   r7   r8   	<genexpr>       z)DuckDBLazyFrame.select.<locals>.<genexpr>c                 S  s   g | ]	\}}| |qS r7   aliasro   colvalr7   r7   r8   
<listcomp>   s    z*DuckDBLazyFrame.select.<locals>.<listcomp>c                 s  rk   r1   rl   r   AGGREGATIONrn   r7   r7   r8   rq      rr   Mixing expressions which aggregate and expressions which don't
is not yet supported by the DuckDB backend. Once they introduce
duckdb.WindowExpression to their Python API, we'll be able to
support this.c                 s      | ]
\}}| |V  qd S r1   rs   ru   r7   r7   r8   rq          )
r   rc   r3   rd   anyvalues	aggregateitemsNotImplementedErrorrF   )r'   ri   rj   new_columns_mapr<   r7   r7   r8   rF      s4   zDuckDBLazyFrame.selectr2   	list[str]strictc                   s8   t | ||d  fdd| jD }| j| jj| ddS )N)compliant_framer2   r   c                 3  s    | ]	}| vr|V  qd S r1   r7   ro   rv   columns_to_dropr7   r8   rq      rr   z'DuckDBLazyFrame.drop.<locals>.<genexpr>Frb   )r   r2   rc   r3   rF   )r'   r2   r   	selectionr7   r   r8   drop   s   zDuckDBLazyFrame.dropN)rH   Implementation | Nonec                C  s   |d ur
d}t || S )Nz.`backend` argument is not supported for DuckDB)r[   )r'   rH   r<   r7   r7   r8   lazy   s   zDuckDBLazyFrame.lazyc                   s   t | g|R i | tdd |D stdd | D r%d}t| fdd| jjD }|dd   D  | j| jj	| ddS )	Nc                 s  rk   r1   ry   rn   r7   r7   r8   rq      rr   z/DuckDBLazyFrame.with_columns.<locals>.<genexpr>r{   c                   s,   g | ]}| v r  ||nt|qS r7   )poprt   r   r   r   r7   r8   rx      s    z0DuckDBLazyFrame.with_columns.<locals>.<listcomp>c                 s  r|   r1   rs   )ro   rv   valuer7   r7   r8   rq      r}   Frb   )
r   r~   r   r   r3   r2   extendr   rc   rF   )r'   ri   rj   r<   resultr7   r   r8   with_columns   s   
zDuckDBLazyFrame.with_columns
predicatesconstraintsc                   sN   |     jt| fdd| D  }|| d }| j| j|ddS )Nc                 3  s"    | ]\}}  ||kV  qd S r1   )rv   )ro   namevplxr7   r8   rq      s     z)DuckDBLazyFrame.filter.<locals>.<genexpr>r   Frb   )rB   all_horizontalr   r   _callrc   r3   filter)r'   r   r   rp   maskr7   r   r8   r      s   zDuckDBLazyFrame.filterdict[str, DType]c                        fddt  jj jjD S )Nc                   "   i | ]\}}|t t| jqS r7   r   rD   r4   ro   column_nameduckdb_dtyper>   r7   r8   
<dictcomp>       z*DuckDBLazyFrame.schema.<locals>.<dictcomp>zipr3   r2   typesr>   r7   r>   r8   schema   s
   
zDuckDBLazyFrame.schemac                 C  s   | j jS r1   )r3   r2   r>   r7   r7   r8   r2      s   zDuckDBLazyFrame.columnspd.DataFramec                 C  s2   dd l }t|dkr| j S d|j }t|)Nr   )   r   r   z3Conversion to pandas requires pandas>=1.0.0, found )rU   r   r3   r(   __version__r   )r'   r]   r<   r7   r7   r8   	to_pandas   s
   
zDuckDBLazyFrame.to_pandaspa.Tablec                 C  s
   | j  S r1   )r3   rS   r>   r7   r7   r8   to_arrow
  s   
zDuckDBLazyFrame.to_arrowc                 C  s   | j | j|| jddS )NF)r,   r*   r-   )	__class__r3   r5   )r'   r,   r7   r7   r8   _change_version  s   zDuckDBLazyFrame._change_versionTrb   c                C  s   | j || j| j|dS )N)r*   r,   r-   )r   r5   r4   )r'   r(   r-   r7   r7   r8   rc     s   z"DuckDBLazyFrame._from_native_framekeysdrop_null_keysr   c                G  s   ddl m} || t||dS )Nr   r   )r   r   r   )narwhals._duckdb.group_byr   list)r'   r   r   r   r7   r7   r8   group_by   s   
zDuckDBLazyFrame.group_bymappingdict[str, str]c                   s0   | j } fdd|jD }| |d|S )Nc                   s*   g | ]}| v r| d  |  n|qS )z as r7   r   r   r7   r8   rx   )  s    z*DuckDBLazyFrame.rename.<locals>.<listcomp>, )r3   r2   rc   rF   join)r'   r   r(   r   r7   r   r8   rename'  s
   
zDuckDBLazyFrame.renameotherhow1Literal['left', 'inner', 'cross', 'anti', 'semi']left_onlist[str] | Noneright_onsuffixc             	   C  sD  | j j}|dkr%| jdk rd| j }t|| j d|j d}n+|d us+J |d us1J dd t||D }	d|	}
| j dj|j d|
|d	}|d
v rdd | j jD }|j jD ],}|| j jv r|d u sq||vr|	d| d| | d qa|d u s||vr|	| qandg}|
d||}| |S )Ncross)r   r      z9DuckDB>=1.1.4 is required for cross-join, found version: lhsrhsc                 S  "   g | ]\}}d | d| dqS lhs."z	" = rhs.""r7   ro   leftrightr7   r7   r8   rx   E      z(DuckDBLazyFrame.join.<locals>.<listcomp> and )	conditionr   )innerr   r   c                 S  s   g | ]}d | dqS )r   r   r7   ro   xr7   r7   r8   rx   N      rhs."" as "r   lhs.*r   )r3   rt   r5   r   	set_aliasr   r   r   r2   appendrF   rc   )r'   r   r   r   r   r   original_aliasr<   rel
conditionsr   rF   rv   resr7   r7   r8   r   .  s<   	




zDuckDBLazyFrame.join
str | Noneby_leftby_rightstrategy)Literal['backward', 'forward', 'nearest']c             	   C  s<  | j }|j }	g }
|d ur|d ur|
dd t||D 7 }
ng  }}|dkr2|
d| d| dg7 }
n|dkrC|
d| d| dg7 }
nd	}t|d
|
}dg}|	jD ]3}||jv rv|d u sg||g|R vrv|d| d| | d qT|d u s||g|R vr|| qTdd| d| d}t|}| |S )Nc                 S  r   r   r7   r   r7   r7   r8   rx   k  r   z-DuckDBLazyFrame.join_asof.<locals>.<listcomp>backwardr   z
" >= rhs."r   forwardz
" <= rhs."zKOnly 'backward' and 'forward' strategies are currently supported for DuckDBr   r   r   r   z
            SELECT ,zD
            FROM lhs
            ASOF LEFT JOIN rhs
            ON z
            )	r3   r   r   r   r2   r   duckdbsqlrc   )r'   r   r   r   r   r   r   r   r   r   r   r<   r   rF   rv   queryr   r7   r7   r8   	join_asof\  s<   






zDuckDBLazyFrame.join_asofc                   r   )Nc                   r   r7   r   r   r>   r7   r8   r     r   z2DuckDBLazyFrame.collect_schema.<locals>.<dictcomp>r   r>   r7   r>   r8   collect_schema  s
   
zDuckDBLazyFrame.collect_schemasubsetSequence[str] | Nonekeepc                   s  |d urw| j  t fdd|D r&dt| j d j d}t|dtd j d}dtdg  j| d}|dkrId	| d
}nd	| d
}dd| d| dd| d| d| d| d| d}| jt	
|ddS | j| j d| jddS )Nc                 3  s    | ]}| j vV  qd S r1   r2   r   r   r7   r8   rq     s    z)DuckDBLazyFrame.unique.<locals>.<genexpr>zColumns z not found in .r      nonezwhere z=1zx
                with cte as (
                    select *,
                           row_number() over (partition by r   z) as z9,
                           count(*) over (partition by zR
                    from rel
                )
                select * exclude (r   z) from cte z
                Frb   )r3   r~   set
differencer2   r   r   r   rc   r   r   unique)r'   r   r   r<   idx_name
count_namekeep_conditionr   r7   r   r8   r     s>    	zDuckDBLazyFrame.uniqueby
descendingbool | Sequence[bool]
nulls_lastc                  sZ   t |tr|gt| }dd |D }| jd fddt||D }| j|ddS )Nc                 S  s   g | ]}|rd ndqS )desc r7   r   r7   r7   r8   rx     r   z(DuckDBLazyFrame.sort.<locals>.<listcomp>r   c                 3  s<    | ]\}} rd | d| dnd | d| dV  qdS )r   z" z nulls lastz nulls firstNr7   )ro   rv   r   r   r7   r8   rq     s    
z'DuckDBLazyFrame.sort.<locals>.<genexpr>Frb   )
isinstancer.   lenr3   orderr   r   rc   )r'   r   r   r   descending_strr   r7   r  r8   sort  s   


zDuckDBLazyFrame.sortc                 C  sJ   | j }|d ur	|n|j}ddd |D }d| }| jt|ddS )Nr   c                 s  s    | ]	}d | dV  qdS )r   z" is not nullNr7   r   r7   r7   r8   rq     rr   z-DuckDBLazyFrame.drop_nulls.<locals>.<genexpr>zselect * from rel where Frb   )r3   r2   r   rc   r   r   )r'   r   r   subset_r   r   r7   r7   r8   
drop_nulls  s
   
zDuckDBLazyFrame.drop_nullsc                   s   t | j}|  }D ]}|| }||jkr d| d}t|qtdkr-d}t|td  | j}| j	} 
 td @ dk}	||	j fdd|D  }
||	 jfd	d|D  }| j|
|d
dS )Nz-`explode` operation not supported for dtype `z`, expected List typer   zExploding on multiple columns is not supported with DuckDB backend since we cannot guarantee that the exploded columns have matching element counts.r   r  c                 3  s,    | ]}|v rt d  |n|V  qdS )unnestN)r
   rt   r   col_to_exploder2   r7   r8   rq     s    
z*DuckDBLazyFrame.explode.<locals>.<genexpr>c                 3  s*    | ]}| v rt d |n|V  qd S r1   )r	   rt   r   r   r7   r8   rq     s
    
Frb   )r   r4   r   Listr   r  r   r   r3   r2   	isnotnullr
   r   rF   rc   union)r'   r2   dtypesr   rv   dtyper<   r   original_columnsnot_null_conditionnon_null_relnull_relr7   r
  r8   explode  s>   



	

zDuckDBLazyFrame.explodeonindexvariable_name
value_namec              	     s   |d u rg n| |d u r fdd| j D n|}|dkr"d}t||dkr,d}t|ddd g  ||R D }dd	d |D }| j}	d
| d| d| d| d	}
| t|
S )Nc                   s   g | ]}| vr|qS r7   r7   )ro   cindex_r7   r8   rx     r   z+DuckDBLazyFrame.unpivot.<locals>.<listcomp>r   z:`variable_name` cannot be empty string for duckdb backend.z7`value_name` cannot be empty string for duckdb backend.r   c                 s      | ]	}d | d V  qdS r   Nr7   r   r7   r7   r8   rq     s    
z*DuckDBLazyFrame.unpivot.<locals>.<genexpr>c                 s  r  r  r7   r   r7   r7   r8   rq     rr   zR
            with unpivot_cte as (
                unpivot rel
                on z/
                into
                    name z
                    value z"
            )
            select z+
            from unpivot_cte;
            )r2   r   r   r3   rc   r   r   )r'   r  r  r  r  on_r<   cols_to_select
unpivot_onr   r   r7   r  r8   unpivot  s0   
zDuckDBLazyFrame.unpivot)r'   r   r(   r)   r*   r+   r,   r   r-   r.   r/   r0   )r'   r   r/   r   )r'   r   r/   r   )r'   r   r/   r!   )r'   r   rC   rD   r/   r#   )r'   r   rH   rI   rJ   r   r/   r   )r'   r   r_   r`   r/   r   )rg   rD   r/   r   )r'   r   ri   r   rj   r   r/   r   )r'   r   r2   r   r   r.   r/   r   )r'   r   rH   r   r/   r   )r'   r   r   r   r   r   r/   r   )r'   r   r/   r   )r'   r   r/   r   )r'   r   r/   r   )r'   r   r/   r   )r'   r   r,   r   r/   r   )r'   r   r(   r)   r-   r.   r/   r   )r'   r   r   rD   r   r.   r/   r   )r'   r   r   r   r/   r   )r'   r   r   r   r   r   r   r   r   r   r   rD   r/   r   )r'   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rD   r/   r   )r'   r   r   r   r   rD   r/   r   )
r'   r   r   rD   r   r   r   r.   r/   r   )r'   r   r   r   r/   r   )r'   r   r2   r   r/   r   )r'   r   r  r   r  r   r  rD   r  rD   r/   r   )%__name__
__module____qualname__r   DUCKDBr6   r9   r=   r?   r@   rB   rG   r^   re   rh   rF   r   r   r   r   propertyr   r2   r   r   r   rc   r   r   r   r   r   r   r  r  r  r"  r7   r7   r7   r8   r&   .   sF    







,


'	






	




.
-



/r&   )5
__future__r   	itertoolsr   typingr   r   r   r   r   r   r	   r
   narwhals._duckdb.utilsr   r   r   narwhals.dependenciesr   narwhals.exceptionsr   r   narwhals.typingr   narwhals.utilsr   r   r   r   r   r   r   r   r   r   rU   r]   rQ   r\   typing_extensionsr   narwhals._duckdb.exprr   r   r   rA   r!   rE   r#   narwhals.dtypesr$   r%   r&   r7   r7   r7   r8   <module>   sJ    