o
    ȳgk                  	   @   sb  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZm Z m!Z!m"Z"m#Z# e $e%Z&defddZ'dee( fddZ)dee( fddZ*		dde(dedede+fddZ,de(de(defddZ-G dd dZ.G dd deeZ/dS )     N)deepcopy)Path)AnyDictListOptionalSequenceUnion)AgentActionAgentFinish)BaseCallbackHandler)Document)	LLMResult)get_from_dict_or_envguard_import)BaseMetadataCallbackHandlerflatten_dicthash_stringimport_pandasimport_spacyimport_textstatreturnc                   C   s   t dS )zKImport the mlflow python package and raise an error if it is not installed.mlflow)r    r   r   i/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/langchain_community/callbacks/mlflow_callback.pyimport_mlflow      r   c                   C      g dS )z!Get the metrics to log to MLFlow.)stepstartsendserrorstext_ctrchain_starts
chain_ends
llm_startsllm_endsllm_streamstool_starts	tool_ends
agent_endsretriever_startsretriever_endsr   r   r   r   r   mlflow_callback_metrics"   r   r-   c                   C   r   )z.Get the text complexity metrics from textstat.)flesch_reading_easeflesch_kincaid_grade
smog_indexcoleman_liau_indexautomated_readability_indexdale_chall_readability_scoredifficult_wordslinsear_write_formulagunning_fogfernandez_huertaszigriszt_pazosgutierrez_polinicrawfordgulpease_indexosmanr   r   r   r   r   get_text_complexity_metrics7   r   r=   textnlptextstatc           
         s   i }dur fddt  D }|d|i || |durFt }| }|jj|dddd}|jj|d	ddd}||d
}	||	 |S )a  Analyze text using textstat and spacy.

    Parameters:
        text (str): The text to analyze.
        nlp (spacy.lang): The spacy language model to use for visualization.
        textstat: The textstat library to use for complexity metrics calculation.

    Returns:
        (dict): A dictionary containing the complexity metrics and visualization
            files serialized to  HTML string.
    Nc                    s   i | ]
}|t | qS r   )getattr.0keyr>   r@   r   r   
<dictcomp>_   s    z analyze_text.<locals>.<dictcomp>text_complexity_metricsdepFT)stylejupyterpageent)dependency_treeentities)r=   updater   displacyrender)
r>   r?   r@   resprG   spacydocdep_outent_outtext_visualizationsr   rE   r   analyze_textM   s"   

rX   prompt
generationc                 C   s*   |  dd}| dd}d| d| dS )zConstruct an html element from a prompt and a generation.

    Parameters:
        prompt (str): The prompt.
        generation (str): The generation.

    Returns:
        (str): The html string.
z<br>z
    <p style="color:black;">z>:</p>
    <blockquote>
      <p style="color:green;">
        z"
      </p>
    </blockquote>
    )replace)rY   rZ   formatted_promptformatted_generationr   r   r   )construct_html_from_prompt_and_generationw   s   	r_   c                	   @   s"  e Zd ZdZdefddZ	d)dedeeef dee d	dfd
dZ	d*ddZ
deded	dfddZ	d+deeeef eeef f dee d	dfddZdeeef ded	dfddZdeded	dfddZdeded	dfddZd eded	dfd!d"Zd#ed	dfd$d%Zd&ed	dfd'd(ZdS ),MlflowLoggera  Callback Handler that logs metrics and artifacts to mlflow server.

    Parameters:
        name (str): Name of the run.
        experiment (str): Name of the experiment.
        tags (dict): Tags to be attached for the run.
        tracking_uri (str): MLflow tracking server uri.

    This handler implements the helper functions to initialize,
    log metrics and artifacts to the mlflow server.
    kwargsc                 K   s   t  | _dtjv r | jd | jjj | _| j	| j| _
n=t|ddd}| j| |d }r>| j|jj| _nt|dd}| j|| _
| j
d urV| j
j| _n| j|| _| |d	 |d
 |dd  |dd| _d S )NDATABRICKS_RUNTIME_VERSION
databrickstracking_uriMLFLOW_TRACKING_URI run_idexperiment_nameMLFLOW_EXPERIMENT_NAMErun_namerun_tagsartifacts_dir)r   r   osenvironset_tracking_uritrackingfluent_get_experiment_id	mlf_expidget_experimentmlf_expr   getget_runinfoexperiment_idget_experiment_by_namecreate_experiment	start_rundir)selfra   rd   rg   rh   r   r   r   __init__   s,   

zMlflowLogger.__init__Nnametagsrg   r   c                 C   sh   |du r/| drdtjtjtj dd}|dd | }| j j	| j
||d}|jj}|| _dS )z
        If run_id is provided, it will reuse the run with the given run_id.
        Otherwise, it starts a new run, auto generates the random suffix for name.
        Nz-%rf      )k)rj   r   )endswithjoinrandomchoicesstringascii_uppercasedigitsr   MlflowClient
create_runrs   rx   rg   )r~   r   r   rg   rnamerunr   r   r   r|      s   


zMlflowLogger.start_runc                 C   s   | j   dS )zTo finish the run.N)r   end_runr~   r   r   r   
finish_run   s   zMlflowLogger.finish_runrD   valuec                 C   s   | j j||| jd dS )zTo log metric to mlflow server.rg   N)r   
log_metricrg   )r~   rD   r   r   r   r   metric   s   zMlflowLogger.metricr   datar   c                 C      | j j|| jd dS )z%To log all metrics in the input dict.r   N)r   log_metricsrg   )r~   r   r   r   r   r   metrics   s   zMlflowLogger.metricsfilenamec                 C   *   | j j|tj| j| d| jd dS )z,To log the input data as json file artifact.z.jsonr   N)r   log_dictrm   pathr   r}   rg   )r~   r   r   r   r   r   jsonf      
zMlflowLogger.jsonf	dataframec                 C   s   |  | d|  dS )z1To log the input pandas dataframe as a html tabletable_N)htmlto_html)r~   r   r   r   r   r   table   s   zMlflowLogger.tabler   c                 C   r   )z3To log the input html string as html file artifact.z.htmlr   Nr   log_textrm   r   r   r}   rg   )r~   r   r   r   r   r   r      r   zMlflowLogger.htmlr>   c                 C   r   )z,To log the input text as text file artifact.z.txtr   Nr   )r~   r>   r   r   r   r   r>      r   zMlflowLogger.textr   c                 C   r   )z/To upload the file from given path as artifact.r   N)r   log_artifactrg   )r~   r   r   r   r   artifact   s   zMlflowLogger.artifactchainc                 C   s   | j jj|d| jd d S )Nzlangchain-modelr   )r   	langchain	log_modelrg   )r~   r   r   r   r   langchain_artifact   s   zMlflowLogger.langchain_artifactNr   Nr   )__name__
__module____qualname____doc__r   r   strr   r   r|   r   floatr   r	   intr   r   r   r   r>   r   r   r   r   r   r   r`      s:    !



r`   c                       sT  e Zd ZdZ						dHdee dee dee d	ee d
ee deddf fddZdIddZdeee	f de
e de	ddfddZdede	ddfddZdede	ddfddZdede	ddfddZdeee	f deee	f de	ddfd d!Zd"eeee	f ee
e f de	ddfd#d$Zdede	ddfd%d&Zdeee	f d'ede	ddfd(d)Zd*e	de	ddfd+d,Zdede	ddfd-d.Zd/ede	ddfd0d1Zd2ede	ddfd3d4Zd5ede	de	fd6d7Zdeee	f d8ede	de	fd9d:Zd;ee de	de	fd<d=Z dede	de	fd>d?Z!de	fd@dAZ"de#fdBdCZ$dJdEe	d2e#ddfdFdGZ%  Z&S )KMlflowCallbackHandleraO  Callback Handler that logs metrics and artifacts to mlflow server.

    Parameters:
        name (str): Name of the run.
        experiment (str): Name of the experiment.
        tags (dict): Tags to be attached for the run.
        tracking_uri (str): MLflow tracking server uri.

    This handler will utilize the associated callback method called and formats
    the input of each callback function with metadata regarding the state of LLM run,
    and adds the response to the list of records for both the {method}_records and
    action. It then logs the response to mlflow server.
    langchainrun-%r   Nrf   r   
experimentr   rd   rg   rl   r   c           	         s\  t   t  t   || _|| _|pi | _|| _|| _|| _	t
 | _t| j| j| j| j| j| j	d| _g | _d| _zt }W n tyX } zt|j W Y d}~nd}~ww z|d| _W n tyo   td Y nw zt | _W n ty } zt|j d| _W Y d}~nd}~ww dd t D | _g g g g g g g g g g g g g d| _dS )zInitialize callback handler.)rd   rh   rj   rk   rg   rl   Nen_core_web_smzfRun `python -m spacy download en_core_web_sm` to download en_core_web_sm model for text visualization.c                 S   s   i | ]}|d qS r   r   rB   r   r   r   rF   =  s    z2MlflowCallbackHandler.__init__.<locals>.<dictcomp>)on_llm_start_recordson_llm_token_recordson_llm_end_recordson_chain_start_recordson_chain_end_recordson_tool_start_recordson_tool_end_recordson_text_recordson_agent_finish_recordson_agent_action_recordson_retriever_start_recordson_retriever_end_recordsaction_records)r   r   superr   r   r   r   rd   rg   rl   tempfileTemporaryDirectorytemp_dirr`   mlflgr   r?   r   ImportErrorloggerwarningmsgloadOSErrorr   r@   r-   r   records)	r~   r   r   r   rd   rg   rl   rS   e	__class__r   r   r     sn   



	
zMlflowCallbackHandler.__init__c                 C   s@   | j  D ]	\}}d| j |< q| j D ]	\}}g | j|< qd S )Nr   )r   itemsr   )r~   r   vr   r   r   _resetO  s
   zMlflowCallbackHandler._reset
serializedpromptsra   c           	      K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|ddi |t| || j  | jj | j | j d d t|D ])\}}t|}||d< | jd	 | | jd
 | | j|d| d|  qFdS )zRun when LLM starts.r      r%   r   actionon_llm_startr   rY   r   r   
llm_start__prompt_N)	r   rO   r   r   	enumerater   r   appendr   )	r~   r   r   ra   r%   rR   idxrY   prompt_respr   r   r   r   U  s    
z"MlflowCallbackHandler.on_llm_starttokenc                 K      | j d  d7  < | j d  d7  < | j d }i }|d|d || j  | jj | j | j d d | jd | | jd | | j|d	|  d
S )z#Run when LLM generates a new token.r   r   r'   on_llm_new_token)r   r   r   r   r   llm_new_tokens_Nr   rO   r   r   r   r   )r~   r   ra   r'   rR   r   r   r   r   m  s   
z&MlflowCallbackHandler.on_llm_new_tokenresponsec              	   K   s  | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|ddi |t|jp0i  || j  | jj | j | j d d |jD ]~}t|D ]w\}}t|}|t|  |t	|j
| j| jd d	|v r|d	}	| jj |	| j d d | jd
 | | jd | | j|d| d|  d|v r|d }
| j|
dt|j
  d|v r|d }| j|dt|j
  qNqHdS )zRun when LLM ends running.r   r   r&   r    r   
on_llm_endr   )r?   r@   rG   r   r   llm_end__generation_rM   zdep-rN   zent-N)r   rO   r   
llm_outputr   generationsr   r   dictrX   r>   r?   r@   popr   r   r   r   r   )r~   r   ra   r&   rR   r   r   rZ   generation_respcomplexity_metricsrM   rN   r   r   r   r   ~  sT   

z MlflowCallbackHandler.on_llm_enderrorc                 K   (   | j d  d7  < | j d  d7  < dS )zRun when LLM errors.r   r   r!   Nr   r~   r   ra   r   r   r   on_llm_error     z"MlflowCallbackHandler.on_llm_errorinputsc                 K   s  | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|ddi |t| || j  | jj | j | j d d t|trTdd	d
 | D }nt|trdddd
 |D }nt	|}t
|}||d< | jd | | jd | | j|d|  dS )zRun when chain starts running.r   r   r#   r   r   on_chain_startr   ,c                 S      g | ]\}}| d | qS =r   rC   r   r   r   r   r   
<listcomp>      z8MlflowCallbackHandler.on_chain_start.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   r   )rC   inputr   r   r   r    s    r   r   r   chain_start_N)r   rO   r   r   
isinstancer   r   r   listr   r   r   r   r   )r~   r   r   ra   r#   rR   chain_input
input_respr   r   r   r     s&   


z$MlflowCallbackHandler.on_chain_startoutputsc                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }t|tr4ddd | D }nt|trBdtt|}nt|}|d|d	 || j  | j	j | j | j d d
 | j
d | | j
d | | j	|d|  dS )zRun when chain ends running.r   r   r$   r    r   c                 S   r   r   r   r  r   r   r   r    r  z6MlflowCallbackHandler.on_chain_end.<locals>.<listcomp>on_chain_end)r   r  r   r   r   
chain_end_N)r   r  r   r   r   r	  mapr   rO   r   r   r   r   )r~   r  ra   r$   rR   chain_outputr   r   r   r    s    


z"MlflowCallbackHandler.on_chain_endc                 K   r   )zRun when chain errors.r   r   r!   Nr   r   r   r   r   on_chain_error  r   z$MlflowCallbackHandler.on_chain_error	input_strc                 K      | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|d|d |t| || j  | jj | j | j d d | jd | | jd	 | | j|d
|  dS )zRun when tool starts running.r   r   r(   r   on_tool_start)r   r  r   r   r   tool_start_Nr   rO   r   r   r   r   r   )r~   r   r  ra   r(   rR   r   r   r   r    s   
z#MlflowCallbackHandler.on_tool_startoutputc                 K   s   t |}| jd  d7  < | jd  d7  < | jd  d7  < | jd }i }|d|d || j | jj| j| jd d | jd | | jd	 | | j|d
|  dS )zRun when tool ends running.r   r   r)   r    on_tool_end)r   r  r   r   r   	tool_end_N)r   r   rO   r   r   r   r   )r~   r  ra   r)   rR   r   r   r   r    s   
z!MlflowCallbackHandler.on_tool_endc                 K   r   )zRun when tool errors.r   r   r!   Nr   r   r   r   r   on_tool_error  r   z#MlflowCallbackHandler.on_tool_errorr>   c                 K   r   )z,
        Run when text is received.
        r   r   r"   on_text)r   r>   r   r   r   on_text_Nr   )r~   r>   ra   r"   rR   r   r   r   r    s   
zMlflowCallbackHandler.on_textfinishc                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|d|jd |jd || j  | jj | j | j d d | jd	 | | jd
 | | j|d|  dS )zRun when agent ends running.r   r   r*   r    on_agent_finishr  )r   r  logr   r   r   agent_finish_N)r   rO   return_valuesr  r   r   r   r   )r~   r  ra   r*   rR   r   r   r   r  -  s    
z%MlflowCallbackHandler.on_agent_finishr   c                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }|d|j|j|jd || j  | jj | j | j d d | jd | | jd	 | | j|d
|  dS )zRun on agent action.r   r   r(   r   on_agent_action)r   tool
tool_inputr  r   r   r   agent_action_N)	r   rO   r#  r$  r  r   r   r   r   )r~   r   ra   r(   rR   r   r   r   r"  D  s"   
z%MlflowCallbackHandler.on_agent_actionqueryc                 K   r  )z"Run when Retriever starts running.r   r   r+   r   on_retriever_start)r   r&  r   r   r   retriever_start_Nr  )r~   r   r&  ra   r+   rR   r   r   r   r'  Z  s   
z(MlflowCallbackHandler.on_retriever_start	documentsc                 K   s   | j d  d7  < | j d  d7  < | j d  d7  < | j d }i }dd |D }|d|d || j  | jj | j | j d d	 | jd
 | | jd | | j|d|  dS )z Run when Retriever ends running.r   r   r,   r    c                 S   s(   g | ]}|j d d |j D dqS )c                 S   s8   i | ]\}}|t |tst|n	d dd |D qS )r   c                 s   s    | ]}t |V  qd S r   r  rC   xr   r   r   	<genexpr>  s    zOMlflowCallbackHandler.on_retriever_end.<locals>.<listcomp>.<dictcomp>.<genexpr>)r  r	  r   r   r  r   r   r   rF     s    
zEMlflowCallbackHandler.on_retriever_end.<locals>.<listcomp>.<dictcomp>)page_contentmetadata)r-  r.  r   )rC   rT   r   r   r   r    s    z:MlflowCallbackHandler.on_retriever_end.<locals>.<listcomp>on_retriever_end)r   r)  r   r   r   retriever_end_Nr   )r~   r)  ra   r,   rR   retriever_documentsr   r   r   r/  r  s   
z&MlflowCallbackHandler.on_retriever_endc                 K   r   )zRun when Retriever errors.r   r   r!   Nr   r   r   r   r   on_retriever_error  r   z(MlflowCallbackHandler.on_retriever_errorc           
         s4  t  }|| jd }|| jd  ddg}d|jv r"|d nd|jv r7|d dd |d< |d || jd	d
jddid	d
}| jdurNt	 ng }| j
durYddgng }g d} fdd|D } ddg| | |  jd	d
jdddd	d
}|j||gd	d
}	|	ddg jdd d	d
|	d< |	S )z=Create a dataframe with all the information from the session.r   r   r   rY   r   idc                 S   s   | d S )Nr   r   )id_r   r   r   <lambda>  s    zCMlflowCallbackHandler._create_session_analysis_df.<locals>.<lambda>r   )axisprompt_stepNrM   rN   )token_usage_total_tokenstoken_usage_prompt_tokenstoken_usage_completion_tokensc                    s   g | ]	}| j v r|qS r   )columnsr*  on_llm_end_records_dfr   r   r    s    zEMlflowCallbackHandler._create_session_analysis_df.<locals>.<listcomp>r>   output_stepr  )r   r>   c                 S   s   t | d | d S )NrY   r  )r_   )rowr   r   r   r5    s    	chat_html)r   	DataFramer   r;  r   applydropnarenamer@   r=   r?   concat)
r~   pdon_llm_start_records_dfllm_input_columnsllm_input_prompts_dfcomplexity_metrics_columnsvisualizations_columnstoken_usage_columnsllm_outputs_dfsession_analysis_dfr   r<  r   _create_session_analysis_df  sd   




	

z1MlflowCallbackHandler._create_session_analysis_dfc                 C   s   t | jd S )Nr   )boolr   r   r   r   r   _contain_llm_records  s   z*MlflowCallbackHandler._contain_llm_recordsFlangchain_assetc                 C   st  t  }| jd|| jd  |  r;|  }|d}|jdddd}| jd|| | j	d
| d |rdtt|v rL| j| n_tt| jjd	}z|| | j| W nH ty   z|| | j| W n! ty   td
 t  Y n ty   td
 t  Y nw Y n ty   td
 t  Y nw |r| j  |   d S d S )Nr   r@  r[   rf   T)regexsession_analysiszlangchain.chains.llm.LLMChainz
model.jsonzCould not save model.)r   r   r   rA  r   rQ  rO  r   r\   r   r   tolistr   typer   r   r   r   saver   
ValueError
save_agentAttributeErrorprint	traceback	print_excNotImplementedErrorr   r   )r~   rR  r  rF  rN  r@  langchain_asset_pathr   r   r   flush_tracker  sL   



z#MlflowCallbackHandler.flush_tracker)r   r   NNNrf   r   )NF)'r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   BaseExceptionr   r   r	   r  r  r  r  r  r  r   r  r
   r"  r'  r   r   r/  r2  rO  rP  rQ  r`  __classcell__r   r   r   r   r      s    
G

.








$= r   )NN)0loggingrm   r   r   r   r\  copyr   pathlibr   typingr   r   r   r   r   r	   langchain_core.agentsr
   r   langchain_core.callbacksr   langchain_core.documentsr   langchain_core.outputsr   langchain_core.utilsr   r   #langchain_community.callbacks.utilsr   r   r   r   r   r   	getLoggerr   r   r   r   r-   r=   r   rX   r_   r`   r   r   r   r   r   <module>   sB      
	
*l