o
    沪gB                     @  s  U d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZ ddlmZmZmZmZ ddlmZmZmZ ddlmZ ddlmZmZmZmZmZmZmZm Z  dd	l!m"Z"m#Z# dd
l$m%Z%m&Z&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3 erddl4Z5ddl6Z7ddl8m9Z9m:Z:m;Z; ddl<m=Z= ddl>m?Z? e'@eAZBdeCd< dZDeEdZFdeCd< dZGdeCd< dZHdeCd< dZIdeCd< dZJdeCd< dZKdeCd< dZLdeCd< d ZMdeCd!< d"ZNdeCd#< d$ZOdeCd%< d&ZPdeCd'< d(ZQdeCd)< d*ZRdeCd+< d,ZSdeCd-< d.ZTdeCd/< d0ZUdeCd1< d2ZVdeCd3< d4ZWdeCd5< d6ZXdeCd7< d8ZYdeCd9< d:ZZdeCd;< d<Z[deCd=< d>Z\deCd?< ed@dAdBZ]e G dCdD dDeZ^G dEdF dFee] Z_G dGdH dHeZ`G dIdJ dJeZaeee] e_e] e`eaf ZbdKeCdL< edMdNdOdPdQdRdSee dTe^e`eae*df ZcdKeCdU< G dVdW dWeZddd\d]Zedd^d_ZfddbdcZgddddeZhddfdgZiddhdiZjddjdkZkddldmZlddndoZmddpdqZnddrdsZoddtduZpddvdwZqddxdyZrddzd{Zsdd|d}Ztdd~dZudddZvdddZwdddZxdddZy	ddddZzdddZ{dddZ|eDdfdddZ}dddZ~dddZdddZdddZeDfdddZdddZ	ddddZdddZ	ddddZdddZdddZdddńZdddɄZdS )z8A bunch of useful utilities for dealing with dataframes.    )annotationsN)ChainMapUserDictUserListdeque)	ItemsViewIterableMappingSequence)EnumEnumMetaauto)MappingProxyType)TYPE_CHECKINGAnyFinalProtocolTypeVarUnioncastruntime_checkable)	TypeAlias	TypeGuard)configerrorsloggerstring_util)

CustomDict
NumpyShapehas_callable_attris_custom_dictis_dataclass_instanceis_list_likeis_namedtupleis_pydantic_modelis_typeis_version_less_than)	DataFrameIndexSeries)_iLocIndexer)Stylerr   _LOGGERi'  z
^pandas.*$_PANDAS_DATA_OBJECT_TYPE_REzdask.dataframe.core.DataFrame_DASK_DATAFRAMEzdask.dataframe.core.Index_DASK_INDEXzdask.dataframe.core.Series_DASK_SERIESzduckdb.duckdb.DuckDBPyRelation_DUCKDB_RELATIONz modin.pandas.dataframe.DataFrame_MODIN_DF_TYPE_STRzmodin.pandas.series.Series_MODIN_SERIES_TYPE_STRzpandas.io.formats.style.Styler_PANDAS_STYLER_TYPE_STRz polars.dataframe.frame.DataFrame_POLARS_DATAFRAMEz polars.lazyframe.frame.LazyFrame_POLARS_LAZYFRAMEzpolars.series.series.Series_POLARS_SERIESzpyspark.sql.dataframe.DataFrame_PYSPARK_DF_TYPE_STRz'pyspark.sql.connect.dataframe.DataFrame_PYSPARK_CONNECT_DF_TYPE_STRzray.data.dataset.Dataset_RAY_DATASETz$ray.data.dataset.MaterializedDataset_RAY_MATERIALIZED_DATASETz3snowflake.snowpark.modin.pandas.dataframe.DataFrame_SNOWPANDAS_DF_TYPE_STRz6snowflake.snowpark.modin.plugin.extensions.index.Index_SNOWPANDAS_INDEX_TYPE_STRz-snowflake.snowpark.modin.pandas.series.Series_SNOWPANDAS_SERIES_TYPE_STRzsnowflake.snowpark.row.Row_SNOWPARK_DF_ROW_TYPE_STRz&snowflake.snowpark.dataframe.DataFrame_SNOWPARK_DF_TYPE_STRzsnowflake.snowpark.table.Table_SNOWPARK_TABLE_TYPE_STRzxarray.core.dataset.Dataset_XARRAY_DATASET_TYPE_STRzxarray.core.dataarray.DataArray_XARRAY_DATA_ARRAY_TYPE_STRV_coT)	covariantc                   @  s4   e Zd ZdZedddZddd
dZdddZdS )DBAPICursora`  Protocol for DBAPI 2.0 Cursor objects (PEP 249).

    This is a simplified version of the DBAPI Cursor protocol
    that only contains the methods that are relevant or used for
    our DB API Integration.

    Specification: https://peps.python.org/pep-0249/
    Inspired by: https://github.com/python/typeshed/blob/main/stdlib/_typeshed/dbapi.pyi
    returndSequence[tuple[str, Any | None, int | None, int | None, int | None, int | None, bool | None]] | Nonec                 C     d S N selfrK   rK   T/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/streamlit/dataframe_util.pydescriptionq   s   zDBAPICursor.description.sizeintSequence[Sequence[Any]]c                C  rI   rJ   rK   )rM   rP   rK   rK   rN   	fetchmany       zDBAPICursor.fetchmanyc                 C  rI   rJ   rK   rL   rK   rK   rN   fetchall   rT   zDBAPICursor.fetchallN)rG   rH   ).)rP   rQ   rG   rR   )rG   rR   )__name__
__module____qualname____doc__propertyrO   rS   rU   rK   rK   rK   rN   rF   e   s    
rF   c                   @  s   e Zd ZdZedddZdS )DataFrameGenericAliasat  Technically not a GenericAlias, but serves the same purpose in
    OptionSequence below, in that it is a type which admits DataFrame,
    but is generic. This allows OptionSequence to be a fully generic type,
    significantly increasing its usefulness.

    We can't use types.GenericAlias, as it is only available from python>=3.9,
    and isn't easily back-ported.
    rG   r*   c                 C  rI   rJ   rK   rL   rK   rK   rN   iloc   s   zDataFrameGenericAlias.ilocN)rG   r*   )rV   rW   rX   rY   rZ   r\   rK   rK   rK   rN   r[      s    	r[   c                   @  s   e Zd ZdZdddZdS )PandasCompatiblezFProtocol for Pandas compatible objects that have a `to_pandas` method.rG   DataFrame | Seriesc                 C  rI   rJ   rK   rL   rK   rK   rN   	to_pandas   rT   zPandasCompatible.to_pandasN)rG   r^   )rV   rW   rX   rY   r_   rK   rK   rK   rN   r]      s    r]   c                   @  s   e Zd ZdZd	ddZdS )
DataframeInterchangeCompatiblezProtocol for objects support the dataframe-interchange protocol.

    https://data-apis.org/dataframe-protocol/latest/index.html
    
allow_copyboolrG   r   c                 C  rI   rJ   rK   )rM   ra   rK   rK   rN   __dataframe__   rT   z,DataframeInterchangeCompatible.__dataframe__N)ra   rb   rG   r   )rV   rW   rX   rY   rc   rK   rK   rK   rN   r`      s    r`   r   OptionSequencer'   r)   r+   r(   pa.Tablezpa.Arrayznp.ndarray[Any, np.dtype[Any]]zMapping[Any, Any]Datac                   @  s   e Zd ZdZe Ze Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Z e Z!e Z"e Z#e Z$e Z%dS )
DataFormatz7DataFormat is used to determine the format of the data.N)&rV   rW   rX   rY   r   UNKNOWNEMPTYCOLUMN_INDEX_MAPPINGCOLUMN_SERIES_MAPPINGCOLUMN_VALUE_MAPPINGDASK_OBJECTDBAPI_CURSORDUCKDB_RELATIONKEY_VALUE_DICTLIST_OF_RECORDSLIST_OF_ROWSLIST_OF_VALUESMODIN_OBJECT
NUMPY_LISTNUMPY_MATRIXPANDAS_ARRAYPANDAS_DATAFRAMEPANDAS_INDEXPANDAS_SERIESPANDAS_STYLERPOLARS_DATAFRAMEPOLARS_LAZYFRAMEPOLARS_SERIESPYARROW_ARRAYPYARROW_TABLEPYSPARK_OBJECTRAY_DATASETSET_OF_VALUESSNOWPANDAS_OBJECTSNOWPARK_OBJECTTUPLE_OF_VALUESXARRAY_DATASETXARRAY_DATA_ARRAYrK   rK   rK   rN   rg      sF    
rg   vstrrG   rb   c                 C     ddl }t|j| S )a  Return True if the current Pyarrow version is less than the input version.

    Parameters
    ----------
    v : str
        Version string, e.g. "0.25.0"

    Returns
    -------
    bool


    Raises
    ------
    InvalidVersion
        If the version strings are not valid.

    r   N)pyarrowr&   __version__)r   parK   rK   rN   is_pyarrow_version_less_than   s   r   c                 C  r   )a  Return True if the current Pandas version is less than the input version.

    Parameters
    ----------
    v : str
        Version string, e.g. "0.25.0"

    Returns
    -------
    bool


    Raises
    ------
    InvalidVersion
        If the version strings are not valid.
    r   N)pandasr&   r   )r   pdrK   rK   rN   is_pandas_version_less_than   s   r   objobjectc                 C  s   | du st | tttttttfrdS t| t	j
t	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jt	jhv S )zTrue if the object is a dataframe-like object.

    This does not include basic collection types like list, dict, tuple, etc.
    NF) 
isinstancetuplesetr   bytesrQ   floatrb   determine_data_formatrg   rk   rm   rn   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   rK   rK   rN   is_dataframe_like  s4    r   c                 C  sR   t | p(t| p(t| p(t| p(t| p(t| p(t| p(t| p(t| p(t	
| S )a  True if the object is one of the supported unevaluated data objects:

    Currently supported objects are:
    - Snowpark DataFrame / Table
    - PySpark DataFrame
    - Modin DataFrame / Series
    - Snowpandas DataFrame / Series / Index
    - Dask DataFrame / Series / Index
    - Ray Dataset
    - Polars LazyFrame
    - Generator functions
    - DB API 2.0 Cursor (PEP 249)
    - DuckDB Relation (Relational API)

    Unevaluated means that the data is not yet in the local memory.
    Unevaluated data objects are treated differently from other data objects by only
    requesting a subset of the data instead of loading all data into th memory
    )is_snowpark_data_objectis_pyspark_data_objectis_snowpandas_data_objectis_modin_data_objectis_ray_datasetis_polars_lazyframeis_dask_objectis_duckdb_relationis_dbapi_cursorinspectisgeneratorfunctionr   rK   rK   rN   is_unevaluated_data_object<  s&   	r   c                 C  
   t | tS )zLTrue if obj is a Pandas object (e.g. DataFrame, Series, Index, Styler, ...).)r%   r-   r   rK   rK   rN   is_pandas_data_object]     
r   c                 C     t | tp	t | tS )z-True if obj is a Snowpark DataFrame or Table.)r%   rA   r@   r   rK   rK   rN   r   b     r   c                 C  s2   t | tot| dkot| d tot| d dS )z4True if obj is a list of snowflake.snowpark.row.Row.r   as_dict)r   listlenr%   r?   r   r   rK   rK   rN   is_snowpark_row_listg  s   

r   c                 C  s   t | tp	t | tot| dS )z5True if obj is a PySpark or PySpark Connect dataframetoPandas)r%   r8   r9   r   r   rK   rK   rN   r   q  s   r   c                 C     t | tpt | tpt | tS )z2True if obj is a Dask DataFrame, Series, or Index.)r%   r.   r0   r/   r   rK   rK   rN   r   x  
   
r   c                 C  r   )z+True if obj is of Modin Dataframe or Series)r%   r2   r3   r   rK   rK   rN   r     r   r   c                 C  r   )z5True if obj is a Snowpark Pandas DataFrame or Series.)r%   r<   r>   r=   r   rK   rK   rN   r     r   r   c                 C  r   )z"True if obj is a Polars Dataframe.)r%   r5   r   rK   rK   rN   is_polars_dataframe  r   r   c                 C  r   )z True if obj is a Xarray Dataset.)r%   rB   r   rK   rK   rN   is_xarray_dataset  r   r   c                 C  r   )z"True if obj is a Xarray DataArray.)r%   rC   r   rK   rK   rN   is_xarray_data_array  r   r   c                 C  r   )zTrue if obj is a Polars Series.)r%   r7   r   rK   rK   rN   is_polars_series  r   r   c                 C  r   )z"True if obj is a Polars Lazyframe.)r%   r6   r   rK   rK   rN   r     r   r   c                 C  r   )zTrue if obj is a Ray Dataset.)r%   r:   r;   r   rK   rK   rN   r     r   r   TypeGuard[Styler]c                 C  r   )zTrue if obj is a pandas Styler.)r%   r4   r   rK   rK   rN   is_pandas_styler  r   r   TypeGuard[DBAPICursor]c                 C  r   )zWTrue if obj looks like a DB API 2.0 Cursor.

    https://peps.python.org/pep-0249/
    )r   rF   r   rK   rK   rN   r     s   
r   c                 C  r   )z]True if obj is a DuckDB relation.

    https://duckdb.org/docs/api/python/relational_api
    )r%   r1   r   rK   rK   rN   r     s   
r   dataIterable[Any]c                 C  s   ddl m} || dddvS )z.Check if the list only contains scalar values.r   )infer_dtypeTskipna)mixedzunknown-array)pandas.api.typesr   )r   r   rK   rK   rN   _is_list_of_scalars  s   r   iterablemax_iterations
int | None	list[Any]c                 C  sB   |du rt | S g }t| D ]\}}||kr |S || q|S )aA  Convert an iterable to a list.

    Parameters
    ----------
    iterable : Iterable
        The iterable to convert to a list.

    max_iterations : int or None
        The maximum number of iterations to perform. If None, all iterations are performed.

    Returns
    -------
    list
        The converted list.
    N)r   	enumerateappend)r   r   resultiitemrK   rK   rN   _iterable_to_list  s   r   data_dfc                 C  s2   t | jdkr| jd dkr| jddidd | S )zRename the first column to "value" if it is not named
    and if there is only one column in the dataframe.

    The default name of the first column is 0 if it is not named
    which is not very descriptive.
       r   valueT)columnsinplace)r   r   rename)r   rK   rK   rN   _fix_column_naming  s   r   dict[Any, Any]c                 C  s   ddl }t|jj| ddS )zConvert a key-value dict to a Pandas DataFrame.

    Parameters
    ----------
    data : dict
        The dict to convert to a Pandas DataFrame.

    Returns
    -------
    pandas.DataFrame
        The converted Pandas DataFrame.
    r   Nindexorient)r   r   r'   	from_dict)r   r   rK   rK   rN   _dict_to_pandas_df  s   r   Fr   max_unevaluated_rowsrQ   ensure_copyc           	      C  sJ  ddl }ddl}ddl}t| |jr|r|  S t|j| S t| |j|j|j	j
jfr0|| S t| rCt|j|r?| j S | jS t| |jr\t| jdkrU|g S t|| S t| rl|rf|  n| } |  S t| r~|rv|  n| } |   S t| r| |  } | jd |krtdt| d t|j| S t| r|r| jdd} |  S t| r|r| jdd} |    S t!| r| j"|dd} t| |j|jfr|  } | jd |krtdt| d t|j| S t#| r| | } | jd |krtdt| d	 t|j| S t$| rM| "|% } t| |j|jfr4|  } | jd |krGtdt| d
 t|j| S t&| rr| |' } | jd |krltdt| d t|j| S t(| r| d|  } t| |j|jfr|  } | jd |krtdt| d t|j| S t)| r| | } | jd |krtdt| d t|j| S t*| r| |+ } | jd |krtdt| d | S t,| r| j-rdd | j-D nd}|j| .||d} | jd |krtdt| d | S t/| r-|dd | D S t0| dr:||  S t0| drWt1ddu rW|j	j23| }|rU| S |S t45| r~t|t6|  |d} | jd |kr|tdt| d | S t| t7rt|dd | D S t| t8t9|j:t;frt|t<| S t=| rt>| ? S t@| rt>| A S tB| rt>tCD| S t| tEtFtGfstH| rt>tI| S zt|| W S  tJy$ } z5t| tIrtKLtJ t>| W  d   W  Y d}~S 1 sw   Y  tMNdtO|  d|  d|d}~ww )aU  Try to convert different formats to a Pandas Dataframe.

    Parameters
    ----------
    data : dataframe-, array-, or collections-like object
        The data to convert to a Pandas DataFrame.

    max_unevaluated_rows: int
        If unevaluated data is detected this func will evaluate it,
        taking max_unevaluated_rows, defaults to 10k.

    ensure_copy: bool
        If True, make sure to always return a copy of the data. If False, it depends on
        the type of the data. For example, a Pandas DataFrame will be returned as-is.

    Returns
    -------
    pandas.DataFrame

    r   Nu   ⚠️ Showing only z6 rows. Call `collect()` on the dataframe to show more.T)deep)computez8 rows. Call `compute()` on the data object to show more.z6 rows. Call `to_pandas()` on the dataset to show more.z; rows. Call `_to_pandas()` on the data object to show more.z9 rows. Call `toPandas()` on the data object to show more.z: rows. Call `to_pandas()` on the data object to show more.z0 rows. Call `df()` on the relation to show more.c                 S  s   g | ]
}|r
|d  ndqS )r    rK   ).0drK   rK   rN   
<listcomp>  s    z1convert_anything_to_pandas_df.<locals>.<listcomp>)r   z4 rows. Call `fetchall()` on the Cursor to show more.c                 S  s   g | ]}|  qS rK   )r   )r   rowrK   rK   rN   r     s    r_   rc   z1.5.0F)r   z/ rows. Convert the data to a list to show more.c                 S  s   g | ]}|j qS rK   )r   )r   crK   rK   rN   r     s    z#
Unable to convert object of type `z1` to `pandas.DataFrame`.
Offending object:
```py
z
```)Parraynumpyr   r   r'   copyr   r)   r(   api
extensionsExtensionArrayr   r   ndarrayr   shaper   r   cloner_   r   to_framer   limitcollect_show_data_informationr   simplify_numberr   to_dataframer   	to_seriesr   headr   r   
_to_pandasr   r   r   r   r   dfr   rO   rS   r   r   r   interchangefrom_dataframer   r   r   r   r   map	ArrayTyper   r   r    r   to_dictr#   _asdictr!   dataclassesasdictr   r   r   r$   dict
ValueError
contextlibsuppressr   StreamlitAPIExceptiontype)	r   r   r   r   npr   r   r   exrK   rK   rN   convert_anything_to_pandas_df  s,  











.r  tabler   c              
   C  s   zt | } W n ty } ztjd|d W Y d}~nd}~ww ddl}| }||| j}||  |	  t
t|  S )zSerialize pyarrow.Table to Arrow IPC bytes.

    Parameters
    ----------
    table : pyarrow.Table
        A table to convert.

    Returns
    -------
    bytes
        The serialized Arrow IPC bytes.
    zMRecursion error while truncating Arrow table. This is not supposed to happen.exc_infoNr   )_maybe_truncate_tableRecursionErrorr,   warningr   BufferOutputStreamRecordBatchStreamWriterschemawrite_tablecloser   r   getvalue
to_pybytes)r	  errr   sinkwriterrK   rK   rN   "convert_arrow_table_to_arrow_bytes  s   
r  r   c              
   C  sx   ddl }z|j| }W t	|S  |j|j|jfy; } ztjd|d t| } |j| }W Y d}~t	|S d}~ww )zSerialize pandas.DataFrame to Arrow IPC bytes.

    Parameters
    ----------
    df : pandas.DataFrame
        A dataframe to convert.

    Returns
    -------
    bytes
        The serialized Arrow IPC bytes.
    r   NzSerialization of dataframe to Arrow table was unsuccessful. Applying automatic fixes for column types to make the dataframe Arrow-compatible.r
  )
r   Tablefrom_pandasArrowTypeErrorArrowInvalidArrowNotImplementedErrorr,   info#fix_arrow_incompatible_column_typesr  )r   r   r	  r  rK   rK   rN    convert_pandas_df_to_arrow_bytes  s   
r!  sourcec                 C  s   ddl }|| }| S )a  Convert Arrow bytes (IPC format) to pandas.DataFrame.

    Using this function in production needs to make sure that
    the pyarrow version >= 14.0.1, because of a critical
    security vulnerability in pyarrow < 14.0.1.

    Parameters
    ----------
    source : bytes
        A bytes object to convert.

    Returns
    -------
    pandas.DataFrame
        The converted dataframe.
    r   N)r   RecordBatchStreamReaderread_pandas)r"  r   readerrK   rK   rN    convert_arrow_bytes_to_pandas_df;  s   
r&  msgNonec                 C  s   ddl m} | j|  dS )zVShow a message to the user with important information
    about the processed dataset.r   )get_dg_singleton_instanceN)$streamlit.delta_generator_singletonsr)  main_dgcaption)r'  r)  rK   rK   rN   r   R  s   r   c                 C  s.   ddl }t| |jrt| S t| |}t|S )ad  Try to convert different formats to Arrow IPC format (bytes).

    This method tries to directly convert the input data to Arrow bytes
    for some supported formats, but falls back to conversion to a Pandas
    DataFrame and then to Arrow bytes.

    Parameters
    ----------
    data : dataframe-, array-, or collections-like object
        The data to convert to Arrow bytes.

    max_unevaluated_rows: int
        If unevaluated data is detected this func will evaluate it,
        taking max_unevaluated_rows, defaults to 10k.

    Returns
    -------
    bytes
        The serialized Arrow IPC bytes.
    r   N)r   r   r  r  r  r!  )r   r   r   r   rK   rK   rN   convert_anything_to_arrow_bytesZ  s
   
r-  OptionSequence[V_co]
list[V_co]c                 C  s   | du rg S t | ttttfr| gS t | trdd | D S t | tr)t|  S t	| r5t
| s5t| S zt| dd}|jrBg W S ttt t|jdddf  W S  tjya   | g Y S w )a  Try to convert different formats to a list.

    If the input is a dataframe-like object, we just select the first
    column to iterate over. Non sequence-like objects and scalar types,
    will just be wrapped into a list.

    Parameters
    ----------

    obj : dataframe-, array-, or collections-like object
        The object to convert to a list.

    Returns
    -------
    list
        The converted list.
    Nc                 S  s    g | ]}t |tr|jn|qS rK   )r   r   r   )r   memberrK   rK   rN   r     s     z,convert_anything_to_list.<locals>.<listcomp>T)r   r   )r   r   rQ   r   rb   r   r	   r   keysr"   r   r  emptyr   rD   r\   to_listr   r  )r   r   rK   rK   rN   convert_anything_to_list  s(   

"
r4  truncated_rowsc           	   	   C  s  t drtt dd }t| jd }| j}|dkrU||krUt|||  }ttt	|t|| d  ||d  |d d}| 
d|}t||pOd||  S |rt| j}t| j| }||krut| j}t| j| }td	| d
| d | S )a  Experimental feature to automatically truncate tables that
    are larger than the maximum allowed message size. It needs to be enabled
    via the server.enableArrowTruncation config option.

    Parameters
    ----------
    table : pyarrow.Table
        A table to truncate.

    truncated_rows : int or None
        The number of rows that have been truncated so far. This is used by
        the recursion logic to keep track of the total number of truncated
        rows.

    zserver.enableArrowTruncationzserver.maxMessageSizeg    .Ar   g?g{Gz?   r   u   ⚠️ Showing z out of z# rows due to data size limitations.)r   
get_optionrQ   nbytesnum_rowsmathceilfloormaxminslicer  r   r   r   r   )	r	  r5  max_message_size
table_size
table_rowstargeted_rowssliced_tabledisplayed_rows
total_rowsrK   rK   rN   r    s<   

	
r  columnSeries[Any] | Indexc                 C  s   ddl m}m}m} | jjdv rdS t| jdv rdS | jdkrS|| dd}|dv r,dS |d	krSt| dks;t| d
s=dS | j	d }||rO||sOt
|trQdS dS dS )zTReturn True if the column type is known to cause issues during
    Arrow conversion.r   )r   is_dict_liker"   )r   T>   	period[B]	period[N]	period[U]
period[ns]
period[us]geometryr   r   )zmixed-integercomplexr   r\   F)r   r   rI  r"   dtypekindr   r   hasattrr\   r   	frozenset)rG  r   rI  r"   inferred_typefirst_valuerK   rK   rN    is_colum_type_arrow_incompatible  s,   


	rW  selected_columnslist[str] | Nonec                 C  s   ddl }d}|p
| jD ]}t| | r$|du r|  }| | d||< q|sBt| j|jsBt| jrB|du r;|  }| jd|_|durH|S | S )a  Fix column types that are not supported by Arrow table.

    This includes mixed types (e.g. mix of integers and strings)
    as well as complex numbers (complex128 type). These types will cause
    errors during conversion of the dataframe to an Arrow table.
    It is fixed by converting all values of the column to strings
    This is sufficient for displaying the data on the frontend.

    Parameters
    ----------
    df : pandas.DataFrame
        A dataframe to fix.

    selected_columns: List[str] or None
        A list of columns to fix. If None, all columns are evaluated.

    Returns
    -------
    The fixed dataframe.
    r   Nstring)r   r   rW  r   astyper   r   
MultiIndex)r   rX  r   df_copycolrK   rK   rN   r   @  s(   r   
input_datac                 C  s  ddl }ddl}ddl}| du rtjS t| |jrtjS t| |jr2t	t
t| jdkr/tjS tjS t| |jr;tjS t| |jrDtjS t| |jrMtjS t| |jrVtjS t| r]tjS t| |jjjrhtjS t| rotjS t| rvtj S t!| r}tj"S t#| rtj$S t%| rtj&S t'| rtj(S t)| rtj*S t+| rtj,S t-| rtj.S t/| rtj0S t1| st2| rtj3S t4| rtj5S t6| rtj7S t| t8t9t:fst;| st<| st=| st>| rtj?S t| t@tAfrtjBS t| tCtDtEtFfr1tG| rt| tDrtjHS t| tEtFfrtjIS tjJS tKtL| }t|tMr!tjNS t|tCtDtEtFfr.tjBS tjUS t| tMtOfrr| s?tj?S t	| dkrotKtL| P }t|tMrWtjQS t|tCtDfrbtjRS t||jrltjSS tj?S tjUS tT| rztjJS tjUS )zDetermine the data format of the input data.

    Parameters
    ----------
    input_data : Any
        The input data to determine the data format of.

    Returns
    -------
    DataFormat
        The data format of the input data.
    r   Nr   )Vr   r   r   rg   ri   r   r'   rx   r   r   r   r   r   ru   rv   r  r   Arrayr   r)   rz   r(   ry   r   r{   r   r   r   rw   r   r~   r   r|   r   r}   r   rt   r   r   r   r   r   r   r   r   r   r   r   rm   r   r   r   r   ro   r   rn   r   r   r   r!   r#   r    r$   rp   r   r   rr   r   r   r   rT  r   r   r   rs   nextiterr   rq   r	   valuesrj   rl   rk   r"   rh   )r_  r  r   r   first_elementrV  rK   rK   rN   r   r  s   


r   c                 C  s&   ddl }| |j|jgdg S )a   Unify all missing values in a DataFrame to None.

    Pandas uses a variety of values to represent missing values, including np.nan,
    NaT, None, and pd.NA. This function replaces all of these values with None,
    which is the only missing value type that is supported by all data
    r   N)r   fillnananreplaceinfer_objects)r   r  rK   rK   rN   _unify_missing_values  s   ri  Series[Any]c                 C  s2   t | jdkrtdt | j d| | jd  S )zConvert a Pandas DataFrame to a Pandas Series by selecting the first column.

    Raises
    ------
    ValueError
        If the DataFrame has more than one column.
    r   6DataFrame is expected to have a single column but has .r   )r   r   r  )r   rK   rK   rN   _pandas_df_to_series  s
   	rm  data_formatDataFrame | Series[Any] | pa.Table | pa.Array | np.ndarray[Any, np.dtype[Any]] | tuple[Any] | list[Any] | set[Any] | dict[str, Any]c                 C  s  |t jt jt jt jt jt jt jt jt j	t j
t jt jt jhv r | S |t jkr<ddl}| jr1|dS | jdddf  S |t jkrQddl}| jrM|dS |  S |t jkr`ddl}|j| S |t jkrqddl}|jt| S |t jkrzt| S |t jt jhv rddl}|| S |t j krddl}|t| S |t j!krddl"}|j#$| S |t j%krddl"}|j&'t| S |t j(krt)| j*ddS |t j+krt)|  , S |t j-krt)| j*ddS |t j.krt)| j*ddS |t j/kr| j*ddS |t j0t j1t j2fv rFt)| } g }t3| j4dkr| | j4d  , }nt3| j4dkr0t5d	t3| j4 d
|t j1kr:t6|S |t j2krDt7|S |S |t j8krat)| } | jrVi S | jdddf * S t5d| )a  Convert a Pandas DataFrame to the specified data format.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe to convert.

    data_format : DataFormat
        The data format to convert to.

    Returns
    -------
    pd.DataFrame, pd.Series, pyarrow.Table, np.ndarray, xarray.Dataset, xarray.DataArray, polars.Dataframe, polars.Series, list, set, tuple, or dict.
        The converted dataframe.
    r   Nrecordsr   r   r   seriesr   rk  rl  zUnsupported input data format: )9rg   ri   rm   rn   ro   rt   rw   rx   ry   r{   r   r   r   r   ru   r   r2  r   r\   to_numpyrv   r   r   r  r  r   r`  rm  rz   r|   r}   polarsr~   r   xarrayDatasetr   r   	DataArrayfrom_seriesrq   ri  r   rr   tolistrj   rl   rk   rs   r   r   r   r   r  r   r   rp   )r   rn  r  r   plxrreturn_listrK   rK   rN    convert_pandas_df_to_data_format  s   
&












"r|  )r   r   rG   rb   )r   r   rG   rb   )r   r   rG   r   )r   r   rG   r   )r   r   rG   rb   rJ   )r   r   r   r   rG   r   )r   r'   rG   r'   )r   r   rG   r'   )r   r   r   rQ   r   rb   rG   r'   )r	  re   rG   r   )r   r'   rG   r   )r"  r   rG   r'   )r'  r   rG   r(  )r   r   r   rQ   rG   r   )r   r.  rG   r/  )r	  re   r5  r   rG   re   )rG  rH  rG   rb   )r   r'   rX  rY  rG   r'   )r_  r   rG   rg   )r   r'   rG   r'   )r   r'   rG   rj  )r   r'   rn  rg   rG   ro  )rY   
__future__r   r  r   r   r:  recollectionsr   r   r   r   collections.abcr   r   r	   r
   enumr   r   r   typesr   typingr   r   r   r   r   r   r   r   typing_extensionsr   r   	streamlitr   r   r   r   streamlit.type_utilr   r   r   r    r!   r"   r#   r$   r%   r&   r   r  r   r   r   r'   r(   r)   pandas.core.indexingr*   pandas.io.formats.styler+   
get_loggerrV   r,   __annotations___MAX_UNEVALUATED_DF_ROWScompiler-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rF   r[   r]   r`   rd   rf   rg   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r!  r&  r   r-  r4  r  rW  r   r   ri  rm  r|  rK   rK   rK   rN   <module>   s   (0
 	

'


'
!





	

	








	


 
p
$



&:
K=
2
u
