o
    iY-                     @  s  U d dl mZ d dlmZmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ dd	lmZ d
dlmZ d
dlmZmZmZmZ dZeddG dd deeeef ZeddG dd deeeef Zd:d;ddZeddG dd deeeef Z eddG dd  d eeeef Z!eddG d!d" d"eeeef Z"G d#d$ d$edd%Z#d<d.d/Z$eddG d0d1 d1eeeef Z%eddG d2d3 d3eeeef Z&eee e!e"e%e&fZ'd4e(d5< d=d7d8Z)d9S )>    )annotations)	dataclassfield)	timedelta)AnyLiteralcast)	TypedDict)models)ModelSettings   )	SpanQuery   )EvaluatorContext)EvaluationReasonEvaluationScalar	EvaluatorEvaluatorOutput)EqualsEqualsExpectedContains
IsInstanceMaxDurationLLMJudgeHasMatchingSpanOutputConfigF)reprc                   @  6   e Zd ZU dZded< eddZded< dddZdS )r   z6Check if the output exactly equals the provided value.r   valueNdefault
str | Noneevaluation_namectx(EvaluatorContext[object, object, object]returnboolc                 C  s   |j | jkS N)outputr   selfr#    r+   f/var/www/html/karishye-ai-python/venv/lib/python3.10/site-packages/pydantic_evals/evaluators/common.pyevaluate#   s   zEquals.evaluater#   r$   r%   r&   __name__
__module____qualname____doc____annotations__r   r"   r-   r+   r+   r+   r,   r      
   
 r   c                   @  s.   e Zd ZU dZeddZded< dd
dZdS )r   z7Check if the output exactly equals the expected output.Nr   r!   r"   r#   r$   r%   bool | dict[str, bool]c                 C  s   |j d u ri S |j|j kS r'   )expected_outputr(   r)   r+   r+   r,   r-   -   s   
zEqualsExpected.evaluate)r#   r$   r%   r6   )r0   r1   r2   r3   r   r"   r4   r-   r+   r+   r+   r,   r   '   s   
 r   d   r   r   
max_lengthintr%   strc                 C  s>   t | }t||kr|d |d  d || d d   }|S )Nr   z...)r   len)r   r9   
repr_valuer+   r+   r,   _truncated_repr7   s   &r>   c                   @  sN   e Zd ZU dZded< dZded< dZded< ed	d
Zded< dddZ	d	S )r   a\  Check if the output contains the expected output.

    For strings, checks if expected_output is a substring of output.
    For lists/tuples, checks if expected_output is in output.
    For dicts, checks if all key-value pairs in expected_output are in output.

    Note: case_sensitive only applies when both the value and output are strings.
    r   r   Tr&   case_sensitiveF
as_stringsNr   r!   r"   r#   r$   r%   r   c              
   C  s  d }| j pt| jtot|jt}|rJt|j}t| j}| js(| }| }d }||vrBt|dd}t|dd}d| d| }t|d u |dS zt|jt	rt| jt	rt
t	ttf |j}t
t	ttf | j}	|	D ]>}
|
|vrt|
dd}d| } n,||
 |	|
 krt|
dd}t||
 dd}t|	|
 dd}d| d	| d
| } nqmn'| j|jvrt|jdd}d| d}n| j|jvrt|jdd}d| d}W n ttfy } zd| }W Y d }~nd }~ww t|d u |dS )Nr8   )r9   zOutput string z" does not contain expected string r   reason   z0Output dictionary does not contain expected key z.Output dictionary has different value for key z: z !=    zOutput z) does not contain provided value as a keyz  does not contain provided valuezContainment check failed: )r@   
isinstancer   r;   r(   r?   lowerr>   r   dictr   r   	TypeError
ValueError)r*   r#   failure_reasonr@   
output_strexpected_stroutput_truncexpected_truncoutput_dictexpected_dictkk_truncoutput_v_truncexpected_v_truncer+   r+   r,   r-   N   sZ   


zContains.evaluater#   r$   r%   r   )
r0   r1   r2   r3   r4   r?   r@   r   r"   r-   r+   r+   r+   r,   r   >   s   
 	r   c                   @  r   )r   zACheck if the output is an instance of a type with the given name.r;   	type_nameNr   r!   r"   r#   r$   r%   r   c                 C  s   |j }t|jD ]}|j| jks|j| jkrtdd  S qdt|j }t|jt|jkr;|dt|j d7 }td|dS )NT)r   zoutput is of type z (qualname: )FrA   )r(   type__mro__r0   rW   r2   r   )r*   r#   r(   clsrB   r+   r+   r,   r-      s   zIsInstance.evaluaterV   r/   r+   r+   r+   r,   r      r5   r   c                   @  s$   e Zd ZU dZded< ddd	Zd
S )r   z;Check if the execution time is under the specified maximum.zfloat | timedeltasecondsr#   r$   r%   r&   c                 C  s.   t |jd}| j}t|t st |d}||kS )N)r\   )r   durationr\   rE   )r*   r#   r]   r\   r+   r+   r,   r-      s
   

zMaxDuration.evaluateNr.   )r0   r1   r2   r3   r4   r-   r+   r+   r+   r,   r      s   
 r   c                   @  s"   e Zd ZU dZded< ded< dS )r   zLConfiguration for the score and assertion outputs of the LLMJudge evaluator.r;   r"   r&   include_reasonN)r0   r1   r2   r3   r4   r+   r+   r+   r,   r      s   
 r   )totalcombined_output.dict[str, EvaluationScalar | EvaluationReason]r   rB   r!   configdefault_nameNonec                 C  s@   | dp|}| dr|d urt||d| |< d S || |< d S )Nr"   r^   rA   )getr   )r`   r   rB   rb   rc   namer+   r+   r,   _update_combined_output   s   rg   c                      s   e Zd ZU dZded< dZded< dZded	< dZded
< dZded< dZ	ded< e
dd dZded< dddZ fddZ  ZS )r   aI  Judge whether the output of a language model meets the criteria of a provided rubric.

    If you do not specify a model, it uses the default model for judging. This starts as 'openai:gpt-4o', but can be
    overridden by calling [`set_default_judge_model`][pydantic_evals.evaluators.llm_as_a_judge.set_default_judge_model].
    r;   rubricNz+models.Model | models.KnownModelName | NonemodelFr&   include_inputinclude_expected_outputzModelSettings | Nonemodel_settingszOutputConfig | Literal[False]scorec                   C  s
   t ddS )NT)r^   )r   r+   r+   r+   r,   <lambda>   s   
 zLLMJudge.<lambda>)default_factory	assertionr#   r$   r%   r   c                   sR  | j r7| jr ddlm} ||j|j|j| j| j| j	I d H }nEddlm
} ||j|j| j| j| j	I d H }n.| jrQddlm} ||j|j| j| j| j	I d H }nddlm} ||j| j| j| j	I d H }i }| jduop| jdu}|  }	| jdur|r|	 dn|	}
t||j|j| j|
 | jdur|r|	 dn|	}
t||j|j| j|
 |S )	Nr   )judge_input_output_expected)judge_input_output)judge_output_expected)judge_outputF_score_pass)rj   rk   llm_as_a_judgerq   inputsr(   r7   rh   ri   rl   rr   rs   rt   rm   rp   get_default_evaluation_namerg   rB   pass_)r*   r#   rq   grading_outputrr   rs   rt   r(   include_bothr"   rc   r+   r+   r,   r-      s8   

zLLMJudge.evaluatec                   s>   t   }|d }rt|tjr|j d|j |d< |S )Nri   :)superbuild_serialization_argumentsre   rE   r
   Modelsystem
model_name)r*   resultri   	__class__r+   r,   r      s   
z&LLMJudge.build_serialization_arguments)r#   r$   r%   r   )r0   r1   r2   r3   r4   ri   rj   rk   rl   rm   r   rp   r-   r   __classcell__r+   r+   r   r,   r      s   
 
+r   c                   @  r   )r   zHCheck if the span tree contains a span that matches the specified query.r   queryNr   r!   r"   r#   r$   r%   r&   c                 C  s   |j | jS r'   )	span_treeanyr   r)   r+   r+   r,   r-     s   zHasMatchingSpan.evaluater.   r/   r+   r+   r+   r,   r      r5   r   z3tuple[type[Evaluator[object, object, object]], ...]DEFAULT_EVALUATORSrf   c                 C  s$   | dkrt dtdtd| )NPythonzThe `Python` evaluator has been removed for security reasons. See https://github.com/pydantic/pydantic-ai/pull/2808 for more details and a workaround.zmodule z has no attribute )ImportErrorAttributeErrorr0   )rf   r+   r+   r,   __getattr__  s
   r   N)r8   )r   r   r9   r:   r%   r;   )r`   ra   r   r   rB   r!   rb   r   rc   r;   r%   rd   )rf   r;   )*
__future__r   _annotationsdataclassesr   r   datetimer   typingr   r   r   typing_extensionsr	   pydantic_air
   pydantic_ai.settingsr   otel.span_treer   contextr   	evaluatorr   r   r   r   __all__objectr   r   r>   r   r   r   r   rg   r   r   r   r4   r   r+   r+   r+   r,   <module>   sJ    
D
F