o
    ȳg                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ eeZG dd deZdS )    N)AnyDictIteratorList)Document)	BaseModelmodel_validatorc                   @   sN  e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< d	Zeed
< dZe
ed< dZe
ed< dZe
ed< dZeed< dZeed< eddededefddZdedefddZdedee fddZdedee fd d!Zd"edefd#d$Zdedee fd%d&Zdedee fd'd(Zd)ed*edefd+d,Z d)ed-edefd.d/Z!d0S )1PubMedAPIWrappera`  
    Wrapper around PubMed API.

    This wrapper will use the PubMed API to conduct searches and fetch
    document summaries. By default, it will return the document summaries
    of the top-k results of an input search.

    Parameters:
        top_k_results: number of the top-scored document used for the PubMed tool
        MAX_QUERY_LENGTH: maximum length of the query.
          Default is 300 characters.
        doc_content_chars_max: maximum length of the document content.
          Content will be truncated if it exceeds this length.
          Default is 2000 characters.
        max_retry: maximum number of retries for a request. Default is 5.
        sleep_time: time to wait between retries.
          Default is 0.2 seconds.
        email: email address to be used for the PubMed API.
        api_key: API key to be used for the PubMed API.
    parsez;https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?base_url_esearchz:https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?base_url_efetch   	max_retryg?
sleep_time   top_k_resultsi,  MAX_QUERY_LENGTHi  doc_content_chars_maxzyour_email@example.comemail api_keybefore)modevaluesreturnc                 C   s0   zddl }|j|d< W |S  ty   tdw )z7Validate that the python package exists in environment.r   Nr
   zZCould not import xmltodict python package. Please install it with `pip install xmltodict`.)	xmltodictr
   ImportError)clsr   r    r   `/var/www/html/chatdoc2/venv/lib/python3.10/site-packages/langchain_community/utilities/pubmed.pyvalidate_environment5   s   z%PubMedAPIWrapper.validate_environmentqueryc              
   C   sn   zdd |  |d| j D }|rd|d| j W S dW S  ty6 } zd| W  Y d}~S d}~ww )z
        Run PubMed search and get the article meta information.
        See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
        It uses only the most informative fields of article meta information.
        c                 S   s8   g | ]}d |d  d|d  d|d  d|d  qS )zPublished: 	Publishedz
Title: Titlez
Copyright Information: Copyright Informationz
Summary::
Summaryr   ).0resultr   r   r   
<listcomp>M   s    z(PubMedAPIWrapper.run.<locals>.<listcomp>Nz

zNo good PubMed Result was foundzPubMed exception: )loadr   joinr   	Exception)selfr!   docsexr   r   r   runD   s   zPubMedAPIWrapper.runc                 c   s    | j d ttj|h d| j d }| jdkr#|d| j 7 }tj|}|	 
d}t|}|d d }|d d	 D ]	}| ||V  qAd
S )z
        Search PubMed for documents matching the query.
        Return an iterator of dictionaries containing the document metadata.
        zdb=pubmed&term=z&retmode=json&retmax=z&usehistory=yr   	&api_key=utf-8esearchresultwebenvidlistN)r   strurllibr
   quoter   r   requesturlopenreaddecodejsonloadsretrieve_article)r,   r!   urlr'   text	json_textr3   uidr   r   r   	lazy_load^   s$   

zPubMedAPIWrapper.lazy_loadc                 C   s   t | |S )z
        Search PubMed for documents matching the query.
        Return a list of dictionaries containing the document metadata.
        )listrC   r,   r!   r   r   r   r)   t   s   zPubMedAPIWrapper.loaddocc                 C   s   | d}t||dS )Nr%   )page_contentmetadata)popr   )r,   rF   summaryr   r   r   _dict2document{   s   
zPubMedAPIWrapper._dict2documentc                 c   s$    | j |dD ]}| |V  qd S N)r!   )rC   rK   )r,   r!   dr   r   r   lazy_load_docs   s   zPubMedAPIWrapper.lazy_load_docsc                 C   s   t | j|dS rL   )rD   rN   rE   r   r   r   	load_docs   s   zPubMedAPIWrapper.load_docsrB   r3   c           	   
   C   s   | j d | d | }| jdkr|d| j 7 }d}	 ztj|}W n= tjjy_ } z.|jdkrS|| jk rSt	d| j
d	d
 t| j
 |  j
d9  _
|d7 }n|W Y d }~nd }~ww q| d}| |}| ||S )Nzdb=pubmed&retmode=xml&id=z&webenv=r   r0   r   Ti  zToo Many Requests, waiting for z.2fz seconds...      r1   )r   r   r6   r8   r9   error	HTTPErrorcoder   printr   timesleepr:   r;   r
   _parse_article)	r,   rB   r3   r?   retryr'   exml_text	text_dictr   r   r   r>      sF   


z!PubMedAPIWrapper.retrieve_articler\   c           	      C   s   z|d d d d }W n t y   |d d d }Y nw |di dg }d	d
 |D }|r6d|nt|tr=|nt|trNddd | D nd}|di }d|dd|dd|ddg}||dd||di dd|dS )NPubmedArticleSetPubmedArticleMedlineCitationArticlePubmedBookArticleBookDocumentAbstractAbstractTextc                 S   s2   g | ]}d |v rd|v r|d  d|d   qS )z#textz@Labelz: r   )r&   txtr   r   r   r(      s
    z3PubMedAPIWrapper._parse_article.<locals>.<listcomp>
c                 s   s    | ]}t |V  qd S )N)r5   )r&   valuer   r   r   	<genexpr>   s    z2PubMedAPIWrapper._parse_article.<locals>.<genexpr>zNo abstract availableArticleDate-Yearr   MonthDayArticleTitleCopyrightInformation)rB   r#   r"   r$   r%   )KeyErrorgetr*   
isinstancer5   dictr   )	r,   rB   r\   arabstract_text	summariesrJ   a_dpub_dater   r   r   rX      s>    
zPubMedAPIWrapper._parse_articleN)"__name__
__module____qualname____doc__r   __annotations__r   r5   r   r   intr   floatr   r   r   r   r   r   classmethodr   r    r/   r   rs   rC   r   r)   r   rK   rN   rO   r>   rX   r   r   r   r   r	      s0   
 
"r	   )r<   loggingrV   urllib.errorr6   urllib.parseurllib.requesttypingr   r   r   r   langchain_core.documentsr   pydanticr   r   	getLoggerry   loggerr	   r   r   r   r   <module>   s    
