o
    ȳgz_                     @  s   d Z ddlmZ ddlZddlmZmZmZmZm	Z	m
Z
mZmZ ddlmZmZmZmZ ddlmZ er<ddlmZmZ g dZG d	d
 d
ZG dd deZG dd deZdS )z"Apache Cassandra database wrapper.    )annotationsN)TYPE_CHECKINGAnyDictListOptionalSequenceTupleUnion)	BaseModel
ConfigDictFieldmodel_validator)Self)	ResultSetSession)systemsystem_authsystem_distributedsystem_schemasystem_tracessystem_viewsdatastax_sladata_endpoint_authc                   @  s   e Zd ZdZ				dGdHd
dZ	dIdJddZdKddZdLddZdMddZdNd!d"Z	dOd'd(Z
dPd)d*Z	dQdRd-d.ZdSd/d0ZdTdUd4d5ZdQdVd8d9ZdWd:d;ZdXd<d=ZdXd>d?ZdXd@dAZ	dQdYdCdDZe		dZd[dEdFZdS )\CassandraDatabaseu$   Apache Cassandra® database wrapper.NsessionOptional[Session]exclude_tablesOptional[List[str]]include_tablescassio_init_kwargsOptional[Dict[str, Any]]c                 C  s<   |  ||}|std|| _t| _|pg | _|pg | _d S )Nz+Session not provided and cannot be resolved)_resolve_session
ValueError_sessionIGNORED_KEYSPACES_exclude_keyspaces_exclude_tables_include_tables)selfr   r   r   r    r$    r*   l/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/langchain_community/utilities/cassandra_database.py__init__   s   
zCassandraDatabase.__init__allquerystrfetchkwargsr   return&Union[list, Dict[str, Any], ResultSet]c                 K  sV   |dkr| j |fi |S |dkr| j|fi |S |dkr'| j|fi |S td)z+Execute a CQL query and return the results.r-   onecursorz8Fetch parameter must be either 'one', 'all', or 'cursor')	fetch_all	fetch_one_fetchr#   )r)   r.   r0   r1   r*   r*   r+   run-   s   zCassandraDatabase.runr   c                 K  s    |  |d}| jj|fi |S )NSELECT)_validate_cqlr$   execute)r)   r.   r1   clean_queryr*   r*   r+   r8   =   s   zCassandraDatabase._fetchlistc                 K  s   t | j|fi |S N)r>   r8   )r)   r.   r1   r*   r*   r+   r6   A   s   zCassandraDatabase.fetch_allDict[str, Any]c                 K  s&   | j |fi |}|r|  S i S r?   )r8   r4   _asdict)r)   r.   r1   resultr*   r*   r+   r7   D   s   zCassandraDatabase.fetch_onekeyspaceList[Table]c                 C  s    |  |g}||v r|| S g S )z1Get the Table objects for the specified keyspace.)_resolve_schema)r)   rC   schemar*   r*   r+   get_keyspace_tablesH   s   z%CassandraDatabase.get_keyspace_tablestable	predicatelimitintc                 C  s^   d| d| }|r|d| 7 }|r|d| 7 }|d7 }|  |}ddd |D }|S )	z<Get data from the specified table in the specified keyspace.zSELECT * FROM .z WHERE z LIMIT ;
c                 s  s    | ]}t |V  qd S r?   )r/   ).0rowr*   r*   r+   	<genexpr>b   s    z3CassandraDatabase.get_table_data.<locals>.<genexpr>)r6   join)r)   rC   rH   rI   rJ   r.   rB   datar*   r*   r+   get_table_dataS   s   
z CassandraDatabase.get_table_datac                 C  s   |   }dd|iS )z4Return db context that you may want in agent prompt.	keyspaces, )_fetch_keyspacesrR   )r)   rU   r*   r*   r+   get_contexte   s   zCassandraDatabase.get_contexttablesOptional[List[Table]]c                 C  sZ   |s|  |}|r+d| d}|r%|D ]}||jddd7 }|d7 }q|S |d7 }|S dS )a  
        Generates a markdown representation of the schema for a specific keyspace
        by iterating over all tables within that keyspace and calling their
        as_markdown method.

        Args:
            keyspace: The name of the keyspace to generate markdown documentation for.
            tables: list of tables in the keyspace; it will be resolved if not provided.

        Returns:
            A string containing the markdown representation of the specified
            keyspace schema.
        z## Keyspace: 

F   )include_keyspaceheader_levelzNo tables present in keyspace

 )rG   as_markdown)r)   rC   rY   outputrH   r*   r*   r+   format_keyspace_to_markdownj   s   

z-CassandraDatabase.format_keyspace_to_markdownc                 C  s8   |   }d}| D ]\}}|| || d7 }q
|S )a  
        Generates a markdown representation of the schema for all keyspaces and tables
        within the CassandraDatabase instance. This method utilizes the
        format_keyspace_to_markdown method to create markdown sections for each
        keyspace, assembling them into a comprehensive schema document.

        Iterates through each keyspace in the database, utilizing
        format_keyspace_to_markdown to generate markdown for each keyspace's schema,
        including details of its tables. These sections are concatenated to form a
        single markdown document that represents the schema of the entire database or
        the subset of keyspaces that have been resolved in this instance.

        Returns:
            A markdown string that documents the schema of all resolved keyspaces and
            their tables within this CassandraDatabase instance. This includes keyspace
            names, table names, comments, columns, partition keys, clustering keys,
            and indexes for each table.
        z# Cassandra Database Schema

r[   )rE   itemsrb   )r)   rF   ra   rC   rY   r*   r*   r+   format_schema_to_markdown   s
   z+CassandraDatabase.format_schema_to_markdownr:   cqltypec                 C  s   dg}|r|  |vrtd| d| | }|  |  s,td|   d|d}tdd|}td	d|}d|v rGtd
|S )a  
        Validates a CQL query string for basic formatting and safety checks.
        Ensures that `cql` starts with the specified type (e.g., SELECT) and does
        not contain content that could indicate CQL injection vulnerabilities.

        Args:
            cql: The CQL query string to be validated.
            type: The expected starting keyword of the query, used to verify
                that the query begins with the correct operation type
                (e.g., "SELECT", "UPDATE"). Defaults to "SELECT".

        Returns:
            The trimmed and validated CQL query string without a trailing semicolon.

        Raises:
            ValueError: If the value of `type` is not supported
            DatabaseError: If `cql` is considered unsafe
        r:   zUnsupported CQL type: z1. Supported types: 
                             zCQL must start with rL   rM   z'.*?'r_   z".*?"zPotentially unsafe CQL, as it contains a ; at a 
                                place other than the end or within quotation marks.)upperr#   strip
startswithDatabaseErrorrstripresub)r)   re   rf   SUPPORTED_TYPEScql_trimmedcql_sanitizedr*   r*   r+   r;      s$   
zCassandraDatabase._validate_cqlrU   	List[str]c                 C  s`   |  d}g }|D ]$}t|tsq	|d }|r!||v r!|| q	|s-|| jvr-|| q	|S )a  
        Fetches a list of keyspace names from the Cassandra database. The list can be
        filtered by a provided list of keyspace names or by excluding predefined
        keyspaces.

        Args:
            keyspaces: A list of keyspace names to specifically include.
                If provided and not empty, the method returns only the keyspaces
                present in this list.
                If not provided or empty, the method returns all keyspaces except those
                specified in the _exclude_keyspaces attribute.

        Returns:
            A list of keyspace names according to the filtering criteria.
        z1SELECT keyspace_name FROM system_schema.keyspaceskeyspace_name)r6   
isinstancer   appendr&   )r)   rU   all_keyspacesfiltered_keyspacesksrr   r*   r*   r+   rW      s   

z"CassandraDatabase._fetch_keyspacesc                 C  s$   d dd |D }| d| dS )NrV   c                 S  s   g | ]}d | d qS )'r*   )rO   rw   r*   r*   r+   
<listcomp>   s    z<CassandraDatabase._format_keyspace_query.<locals>.<listcomp>z WHERE keyspace_name IN ())rR   )r)   r.   rU   keyspace_in_clauser*   r*   r+   _format_keyspace_query   s   z(CassandraDatabase._format_keyspace_queryc                 C     |  d|}| |S )a  Fetches tables schema data, filtered by a list of keyspaces.
        This method allows for efficiently fetching schema information for multiple
        keyspaces in a single operation, enabling applications to programmatically
        analyze or document the database schema.

        Args:
            keyspaces: A list of keyspace names from which to fetch tables schema data.

        Returns:
            Dictionaries of table details (keyspace name,  table name, and comment).
        zDSELECT keyspace_name, table_name, comment  FROM system_schema.tablesr|   r6   r)   rU   tables_queryr*   r*   r+   _fetch_tables_data   s
   
z$CassandraDatabase._fetch_tables_datac                 C  r}   )a  Fetches columns schema data, filtered by a list of keyspaces.
        This method allows for efficiently fetching schema information for multiple
        keyspaces in a single operation, enabling applications to programmatically
        analyze or document the database schema.

        Args:
            keyspaces: A list of keyspace names from which to fetch tables schema data.

        Returns:
            Dictionaries of column details (keyspace name, table name, column name,
            type, kind, and position).
        z
                    SELECT keyspace_name, table_name, column_name, type, kind, 
                        clustering_order, position 
                    FROM system_schema.columns
                    r~   r   r*   r*   r+   _fetch_columns_data  
   
z%CassandraDatabase._fetch_columns_datac                 C  r}   )a	  Fetches indexes schema data, filtered by a list of keyspaces.
        This method allows for efficiently fetching schema information for multiple
        keyspaces in a single operation, enabling applications to programmatically
        analyze or document the database schema.

        Args:
            keyspaces: A list of keyspace names from which to fetch tables schema data.

        Returns:
            Dictionaries of index details (keyspace name, table name, index name, kind,
            and options).
        z
                    SELECT keyspace_name, table_name, index_name, 
                        kind, options 
                    FROM system_schema.indexes
                    r~   r   r*   r*   r+   _fetch_indexes_data&  r   z%CassandraDatabase._fetch_indexes_dataDict[str, List[Table]]c              
     s   |s|   }| |}| |}| |}i }|D ]_}|j |j|j}| jr-| jvr-q| jr6| jv r6q fdd|D } fdd|D }	 fdd|D }
 fdd|D }t	 |||	|
|d} |vrqg | < |  
| q|S )a?  
        Efficiently fetches and organizes Cassandra table schema information,
        such as comments, columns, and indexes, into a dictionary mapping keyspace
        names to lists of Table objects.

        Args:
            keyspaces: An optional list of keyspace names from which to fetch tables
                schema data.

        Returns:
            A dictionary with keyspace names as keys and lists of Table objects as
            values, where each Table object is populated with schema details
            appropriate for its keyspace and table name.
        c                   s,   g | ]}|j  kr|jkr|j|jfqS r*   )rr   
table_namecolumn_namerf   rO   crC   r   r*   r+   ry   b  s
    
z5CassandraDatabase._resolve_schema.<locals>.<listcomp>c                   s0   g | ]}|j d kr|j kr|jkr|jqS )partition_key)kindrr   r   r   r   r   r*   r+   ry   h  s    


c                   s6   g | ]}|j d kr|j kr|jkr|j|jfqS )
clustering)r   rr   r   r   clustering_orderr   r   r*   r+   ry   p  s    



c                   s0   g | ]}|j  kr|jkr|j|j|jfqS r*   )rr   r   
index_namer   optionsr   r   r*   r+   ry   x  s
    rC   r   commentcolumns	partitionr   indexes)rW   r   r   r   rr   r   r   r(   r'   Tablert   )r)   rU   tables_datacolumns_dataindexes_datakeyspace_dict
table_datar   table_columnspartition_keysclustering_keystable_indexes	table_objr*   r   r+   rE   =  sN   



z!CassandraDatabase._resolve_schemac                 C  sv   | r| S zddl }W n ty   tdw |j }|r|S |r9t|tr5|jdi | |j }|S tddS )a%  
        Attempts to resolve and return a Session object for use in database operations.

        This function follows a specific order of precedence to determine the
        appropriate session to use:
        1. `session` parameter if given,
        2. Existing `cassio` session,
        3. A new `cassio` session derived from `cassio_init_kwargs`,
        4. `None`

        Args:
            session: An optional session to use directly.
            cassio_init_kwargs: An optional dictionary of keyword arguments to `cassio`.

        Returns:
            The resolved session object if successful, or `None` if the session
            cannot be resolved.

        Raises:
            ValueError: If `cassio_init_kwargs` is provided but is not a dictionary of
            keyword arguments.
        r   NzBcassio package not found, please install with `pip install cassio`z/cassio_init_kwargs must be a keyword dictionaryr*   )	cassio.configImportErrorr#   configresolve_sessionrs   dictinitcheck_resolve_session)r   r    cassiosr*   r*   r+   r"     s&   


z"CassandraDatabase._resolve_session)NNNN)r   r   r   r   r   r   r    r!   )r-   )r.   r/   r0   r/   r1   r   r2   r3   )r.   r/   r1   r   r2   r   )r.   r/   r1   r   r2   r>   )r.   r/   r1   r   r2   r@   )rC   r/   r2   rD   )
rC   r/   rH   r/   rI   r/   rJ   rK   r2   r/   )r2   r@   r?   )rC   r/   rY   rZ   r2   r/   )r2   r/   )r:   )re   r/   rf   r/   r2   r/   )rU   r   r2   rq   )r.   r/   rU   rq   r2   r/   )rU   rq   r2   r>   )rU   r   r2   r   )NN)r   r   r    r!   r2   r   )__name__
__module____qualname____doc__r,   r9   r8   r6   r7   rG   rT   rX   rb   rd   r;   rW   r|   r   r   r   rE   staticmethodr"   r*   r*   r*   r+   r      s<    






 3
"


Qr   c                      s"   e Zd ZdZd fddZ  ZS )rj   zuException raised for errors in the database schema.

    Attributes:
        message -- explanation of the error
    messager/   c                   s   || _ t | j  d S r?   )r   superr,   )r)   r   	__class__r*   r+   r,     s   zDatabaseError.__init__)r   r/   )r   r   r   r   r,   __classcell__r*   r*   r   r+   rj     s    rj   c                   @  s   e Zd ZU ded< 	 ded< 	 dZded< 	 eedZded	< eedZd
ed< eedZ	ded< eedZ
ded< eddZeddd(ddZed)ddZ	d*d+dd Zed,d!d"Zed-d$d%Zed.d&d'ZdS )/r   r/   rC   r   NOptional[str]r   )default_factoryzList[Tuple[str, str]]r   rq   r   r   List[Tuple[str, str, str]]r   T)frozenafter)moder2   r   c                 C  s    | j std| jstd| S )Nz*non-empty column list for must be providedz)non-empty partition list must be provided)r   r#   r   )r)   r*   r*   r+   check_required_fields  s
   zTable.check_required_fieldsdbr   c                 C  s>   |  |||\}}}| ||| ||||||| |||dS )Nr   )_resolve_columns_resolve_comment_resolve_indexes)clsrC   r   r   r   r   r   r*   r*   r+   from_database  s   zTable.from_databaser]   boolr^   Optional[int]c              	   C  sH  d}|dur|d|  d7 }|d| j  d7 }|r#|d| j d7 }| jr/|d| j d7 }|d	7 }| jD ]\}}|d
| d| d7 }q6|dd| j d7 }|d7 }| jrg }| jD ]\}}| dkrn|| q^|| d|  q^|dd| d7 }| j	r|d7 }| j	D ]\}}	}
|d
| d|	 d|
 d7 }q|S )a  
        Generates a Markdown representation of the Cassandra table schema, allowing for
        customizable header levels for the table name section.

        Args:
            include_keyspace: If True, includes the keyspace in the output.
                Defaults to True.
            header_level: Specifies the markdown header level for the table name.
                If None, the table name is included without a header.
                Defaults to None (no header level).

        Returns:
            A string in Markdown format detailing the table name
            (with optional header level), keyspace (optional), comment, columns,
            partition keys, clustering keys (with optional clustering order),
            and indexes.
        r_   N# zTable Name: rN   z- Keyspace: z- Comment: z
- Columns
z  - z (z)
z- Partition Keys: (rV   z- Clustering Keys: none(z
- Indexes
z : kind=z
, options=)
r   rC   r   r   rR   r   r   lowerrt   r   )r)   r]   r^   ra   columnrf   cluster_listr   namer   r   r*   r*   r+   r`     s2   zTable.as_markdownc                 C  sP   |j d|  d| ddd}t|tr|d}|r|S d S tdt|j )NzbSELECT comment 
                FROM system_schema.tables 
                WHERE keyspace_name = 'z%' 
                AND table_name = '';r4   )r0   r   zBUnexpected result type from db.run: 
                             )r9   rs   r   getr#   rf   r   )rC   r   r   rB   r   r*   r*   r+   r   3  s$   

zTable._resolve_comment>Tuple[List[Tuple[str, str]], List[str], List[Tuple[str, str]]]c           
      C  s   g }g }g }| d|  d| d}t|tstd|D ]9}t|ts%q||d |d f |d dkrB||d |d	 f q|d d
krV||d |d |d	 f qdd t|dd dD }dd t|dd dD }	|||	fS )NzSELECT column_name, type, kind, clustering_order, position 
                           FROM system_schema.columns 
                           WHERE keyspace_name = 'z0' 
                           AND table_name = 'r   6Expected a sequence of dictionaries from 'run' method.r   rf   r   r   positionr   r   c                 S  s   g | ]\}}|qS r*   r*   )rO   r   _r*   r*   r+   ry   h  s    z*Table._resolve_columns.<locals>.<listcomp>c                 S     | d S )N   r*   xr*   r*   r+   <lambda>i      z(Table._resolve_columns.<locals>.<lambda>)keyc                 S  s   g | ]	\}}}||fqS r*   r*   )rO   r   r   r   r*   r*   r+   ry   l  s    c                 S  r   )N   r*   r   r*   r*   r+   r   o  r   )r9   rs   r   	TypeErrorr   rt   sorted)
rC   r   r   r   partition_infocluster_inforesultsrP   r   clusterr*   r*   r+   r   K  s@   


zTable._resolve_columnsc                 C  sz   g }| d|  d| d}t|tstd|D ]!}t|ts!q|d }t|ts.t|}||d |d |f q|S )NzSELECT index_name, kind, options 
                        FROM system_schema.indexes 
                        WHERE keyspace_name = 'z-' 
                        AND table_name = 'r   r   r   r   r   )r9   rs   r   r   r   r/   rt   )rC   r   r   r   r   rP   index_optionsr*   r*   r+   r   u  s$   


zTable._resolve_indexes)r2   r   )rC   r/   r   r/   r   r   r2   r   )TN)r]   r   r^   r   r2   r/   )rC   r/   r   r/   r   r   r2   r   )rC   r/   r   r/   r   r   r2   r   )rC   r/   r   r/   r   r   r2   r   )r   r   r   __annotations__r   r   r>   r   r   r   r   r   model_configr   r   classmethodr   r`   r   r   r   r   r*   r*   r*   r+   r     s4   
 4)r   )r   
__future__r   rl   typingr   r   r   r   r   r   r	   r
   pydanticr   r   r   r   typing_extensionsr   cassandra.clusterr   r   r%   r   	Exceptionrj   r   r*   r*   r*   r+   <module>   s    (   2