o
    粪g~)                     @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlm Z m!Z! ddl"m#Z# edZ$ededZ%dee$ de
e$ge%f dee$ fddZ&G dd deZ'dS )zo
Ensemble retriever that ensemble the results of
multiple retrievers by using weighted  Reciprocal Rank Fusion
    N)defaultdict)Hashable)chain)	AnyCallableDictIterableIteratorListOptionalTypeVarcast)#AsyncCallbackManagerForRetrieverRunCallbackManagerForRetrieverRun)Document)BaseRetrieverRetrieverLike)RunnableConfig)ensure_configpatch_config)ConfigurableFieldSpecget_unique_config_specs)model_validatorTH)bounditerablekeyreturnc                 c   s6    t  }| D ]}|| }|vr|| |V  qdS )a  Yield unique elements of an iterable based on a key function.

    Args:
        iterable: The iterable to filter.
        key: A function that returns a hashable key for each element.

    Yields:
        Unique elements of the iterable based on the key function.
    N)setadd)r   r   seenek r$   Y/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/langchain/retrievers/ensemble.pyunique_by_key(   s   

r&   c                
   @   sr  e Zd ZU dZee ed< ee ed< dZe	ed< dZ
ee ed< edee fd	d
Zeddedeeef defddZ	d%dedee dedee fddZ	d%dedee dedee fddZdededee fddZdededee fddZdddededee dee fddZdddededee dee fd d!Zd"eee  dee fd#d$ZdS )&EnsembleRetrieverae  Retriever that ensembles the multiple retrievers.

    It uses a rank fusion.

    Args:
        retrievers: A list of retrievers to ensemble.
        weights: A list of weights corresponding to the retrievers. Defaults to equal
            weighting for all retrievers.
        c: A constant added to the rank, controlling the balance between the importance
            of high-ranked items and the consideration given to lower-ranked items.
            Default is 60.
        id_key: The key in the document's metadata used to determine unique documents.
            If not specified, page_content is used.
    
retrieversweights<   cNid_keyr   c                 C   s   t dd | jD S )z+List configurable fields for this runnable.c                 s   s     | ]}|j D ]}|V  qqd S N)config_specs).0	retrieverspecr$   r$   r%   	<genexpr>Q   s    z1EnsembleRetriever.config_specs.<locals>.<genexpr>)r   r(   selfr$   r$   r%   r.   N   s   zEnsembleRetriever.config_specsbefore)modevaluesc                 C   s,   | dst|d }d| g| |d< |S )Nr)   r(      )getlen)clsr7   n_retrieversr$   r$   r%   set_weightsU   s   
zEnsembleRetriever.set_weightsinputconfigkwargsc           	   
   K   s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
p5|  i|}z
| j	|||d}W n t
yW } z|| |d }~ww |j|fi | |S )Nr   )CallbackManager	callbacksverboseFtagsmetadatarC   inheritable_tags
local_tagsinheritable_metadatalocal_metadatanamerun_namerun_managerr?   )langchain_core.callbacksrA   r   	configurer9   rD   rE   on_retriever_startget_namerank_fusion	Exceptionon_retriever_erroron_retriever_end)	r4   r>   r?   r@   rA   callback_managerrN   resultr"   r$   r$   r%   invoke]   s@   


	
zEnsembleRetriever.invokec           	   
      s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
p6|  i|I d H }z| j	|||dI d H }W n t
ya } z
||I d H  |d }~ww |j|fi |I d H  |S )Nr   )AsyncCallbackManagerrB   rC   FrD   rE   rF   rK   rL   rM   )rO   rZ   r   rP   r9   rD   rE   rQ   rR   arank_fusionrT   rU   rV   )	r4   r>   r?   r@   rZ   rW   rN   rX   r"   r$   r$   r%   ainvoke~   sF   


	zEnsembleRetriever.ainvokequeryrN   c                C   s   |  ||}|S )z
        Get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        )rS   r4   r]   rN   fused_documentsr$   r$   r%   _get_relevant_documents   s   z)EnsembleRetriever._get_relevant_documentsc                   s   |  ||I dH }|S )z
        Asynchronously get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        N)r[   r^   r$   r$   r%   _aget_relevant_documents   s   z*EnsembleRetriever._aget_relevant_documents)r?   c                   sR    fddt | jD }tt|D ]}dd || D ||< q| |}|S )z
        Retrieve the results of the retrievers and use rank_fusion_func to get
        the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    6   g | ]\}}| t jd |d  ddqS 
retriever_r8   )tag)rB   )rY   r   	get_childr/   ir0   r?   r]   rN   r$   r%   
<listcomp>   s    z1EnsembleRetriever.rank_fusion.<locals>.<listcomp>c                 S   s*   g | ]}t |trttt|d n|qS )page_content)
isinstancestrr   r   r/   docr$   r$   r%   rj      s    )	enumerater(   ranger:   weighted_reciprocal_rankr4   r]   rN   r?   retriever_docsrh   r_   r$   ri   r%   rS      s   
zEnsembleRetriever.rank_fusionc                   s`   t j fddt| jD  I dH }tt|D ]}dd || D ||< q| |}|S )z
        Asynchronously retrieve the results of the retrievers
        and use rank_fusion_func to get the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    rb   rc   )r\   r   rf   rg   ri   r$   r%   rj     s    z2EnsembleRetriever.arank_fusion.<locals>.<listcomp>Nc                 S   s$   g | ]}t |tst|d n|qS rk   )rm   r   ro   r$   r$   r%   rj     s    )asynciogatherrq   r(   rr   r:   rs   rt   r$   ri   r%   r[      s   

zEnsembleRetriever.arank_fusion	doc_listsc                    s   t |t jkrtdtt t|jD ](\}}t|ddD ]\}} jdu r.|jn|j	j   ||j
  7  < q!qt|}tt|fddd fddd	}|S )
a  
        Perform weighted Reciprocal Rank Fusion on multiple rank lists.
        You can find more details about RRF here:
        https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf

        Args:
            doc_lists: A list of rank lists, where each rank list contains unique items.

        Returns:
            list: The final aggregated list of items sorted by their weighted RRF
                    scores in descending order.
        z<Number of rank lists must be equal to the number of weights.r8   )startNc                    s    j d u r| jS | j j  S r-   r,   rl   rE   rp   r3   r$   r%   <lambda>F  s   
z<EnsembleRetriever.weighted_reciprocal_rank.<locals>.<lambda>Tc                    s"    j d u r
| j S | jj   S r-   rz   r{   	rrf_scorer4   r$   r%   r|   M  s
    
)reverser   )r:   r)   
ValueErrorr   floatziprq   r,   rl   rE   r+   r   from_iterablesortedr&   )r4   rx   doc_listweightrankrp   all_docssorted_docsr$   r}   r%   rs      s2   




z*EnsembleRetriever.weighted_reciprocal_rankr-   ) __name__
__module____qualname____doc__r
   r   __annotations__r   r+   intr,   r   rn   propertyr   r.   r   classmethodr   r   r=   r   r   rY   r\   r   r`   r   ra   rS   r[   rs   r$   r$   r$   r%   r'   9   s   
 
"
#


.
,
r'   )(r   rv   collectionsr   collections.abcr   	itertoolsr   typingr   r   r   r   r	   r
   r   r   r   rO   r   r   langchain_core.documentsr   langchain_core.retrieversr   r   langchain_core.runnablesr   langchain_core.runnables.configr   r   langchain_core.runnables.utilsr   r   pydanticr   r   r   r&   r'   r$   r$   r$   r%   <module>   s"    ,(