o
    i7                     @   s   d Z ddlZddlmZmZ ddlmZmZ ddlmZ ddl	m
Z
 ddl	mZ dd	l	mZ dd
l	mZ edZg dZG dd dZdedejjjdefddZdedefddZG dd dZdS )z)[Experimental] Text Only Local Tokenizer.    N)AnyIterable)OptionalUnion)sentencepiece_model_pb2   )_common)_local_tokenizer_loader)_transformers)typeszgoogle_genai.local_tokenizer)_parse_hex_byte_token_str_to_bytesLocalTokenizer_TextsAccumulatorc                   @   s&  e Zd ZdZd)ddZdee fddZdeej	 ddfd	d
Z
dej	ddfddZdejddfddZdejdejfddZdeej ddfddZdeej ddfddZdejddfddZdejdejfddZd ejdejfd!d"Zd#eeef deeef fd$d%Zd&edefd'd(ZdS )*r   a  Accumulates countable texts from `Content` and `Tool` objects.

  This class is responsible for traversing complex `Content` and `Tool`
  objects and extracting all the text content that should be included when
  calculating token counts.

  A key feature of this class is its ability to detect unsupported fields in
  `Content` objects. If a user provides a `Content` object with fields that
  this local tokenizer doesn't recognize (e.g., new fields added in a future
  API update), this class will log a warning.

  The detection mechanism for `Content` objects works by recursively building
  a "counted" version of the input object. This "counted" object only
  contains the data that was successfully processed and added to the text
  list for tokenization. After traversing the input, the original `Content`
  object is compared to the "counted" object. If they don't match, it
  signifies the presence of unsupported fields, and a warning is logged.
  returnNc                 C   s
   g | _ d S N_textsself r   b/var/www/html/karishye-ai-python/venv/lib/python3.10/site-packages/google/genai/local_tokenizer.py__init__;   s   
z_TextsAccumulator.__init__c                 C   s   | j S r   r   r   r   r   r   	get_texts>   s   z_TextsAccumulator.get_textscontentsc                 C      |D ]}|  | qd S r   )add_content)r   r   contentr   r   r   add_contentsA      z_TextsAccumulator.add_contentsr   c                 C   s  t jg |jd}|jrg|jD ]X}|jd usJ t  }|jd us%|jd ur)td|jd ur2|j|_|j	d urA| 
|j	 |j	|_	|jd urP| |j |j|_|jd ur`|j|_| j|j |j| q|jdd|jddkrtd| d| d d S d S )N)partsrolez6LocalTokenizers do not support non-text content types.T)exclude_nonezHContent contains unsupported types for token counting. Supported fields z. Got .)r   Contentr!   r    Part	file_datainline_data
ValueErrorvideo_metadatafunction_calladd_function_callfunction_responseadd_function_responsetextr   append
model_dumploggerwarning)r   r   counted_contentpartcounted_partr   r   r   r   E   s@   




z_TextsAccumulator.add_contentr*   c                 C   sB   |j r
| j|j  tj|j d}|jr| |j}||_dS dS )zProcesses a function call and adds relevant text to the accumulator.

    Args:
        function_call: The function call to process.
    )nameN)r6   r   r/   r   FunctionCallargs_dict_traverse)r   r*   counted_function_callcounted_argsr   r   r   r+   d   s   
z#_TextsAccumulator.add_function_calltoolc                 C   sH   t jg d}|jr"|jD ]}| |}|jd u rg |_|j| q|S )N)function_declarations)r   Toolr=   _function_declaration_traverser/   )r   r<   counted_toolfunction_declarationcounted_function_declarationr   r   r   add_toolq   s   

z_TextsAccumulator.add_tooltoolsc                 C   r   r   )rC   )r   rD   r<   r   r   r   	add_tools~   r   z_TextsAccumulator.add_toolsfunction_responsesc                 C   r   r   )r-   )r   rF   r,   r   r   r   add_function_responses   s   z(_TextsAccumulator.add_function_responsesr,   c                 C   sD   t  }|jr| j|j |j|_|jr | |j}||_d S d S r   )r   FunctionResponser6   r   r/   responser9   )r   r,   counted_function_responsecounted_responser   r   r   r-      s   
z'_TextsAccumulator.add_function_responserA   c                 C   st   t  }|jr| j|j |j|_|jr | j|j |j|_|jr,| |j}||_|jr8| |j}||_|S r   )	r   FunctionDeclarationr6   r   r/   description
parameters
add_schemarI   )r   rA   rB   counted_parametersrK   r   r   r   r?      s   z0_TextsAccumulator._function_declaration_traverseschemac           	      C   s*  t  }|jr|j|_|jr|j|_|jdur|j|_|jr)| j|j |j|_|jr7| j|j |j|_|j	rE| j
|j	 |j	|_	|jrS| j
|j |j|_|jrZ|j|_|jrf| |j}||_|jri }|j D ]\}}| j| | |}|||< qp||_|jr| |j}||_|S )zProcesses a schema and adds relevant text to the accumulator.

    Args:
        schema: The schema to process.

    Returns:
        The new schema object with only countable fields.
    N)r   Schematypetitledefaultformatr   r/   rM   enumextendrequiredproperty_orderingitemsrO   
propertiesexample_any_traverse)	r   rQ   counted_schemacounted_schema_itemsdkeyvaluecounted_valuecounted_schema_exampler   r   r   rO      sF   	


z_TextsAccumulator.add_schemara   c                 C   s<   i }| j t|  | D ]\}}| |||< q|S )zProcesses a dict and adds relevant text to the accumulator.

    Args:
        d: The dict to process.

    Returns:
        The new dict object with only countable fields.
    )r   rX   listkeysr[   r^   )r   ra   counted_dictrb   valr   r   r   r9      s
   	z _TextsAccumulator._dict_traverserc   c                    sN   t |tr j| |S t |tr |S t |tr% fdd|D S |S )zProcesses a value and adds relevant text to the accumulator.

    Args:
        value: The value to process.

    Returns:
        The new value with only countable fields.
    c                    s   g | ]}  |qS r   )r^   ).0itemr   r   r   
<listcomp>   s    z3_TextsAccumulator._any_traverse.<locals>.<listcomp>)
isinstancestrr   r/   dictr9   rf   )r   rc   r   r   r   r^      s   
	


z_TextsAccumulator._any_traverse)r   N)__name__
__module____qualname____doc__r   r   rn   r   r   r$   r   r   r7   r+   r>   rC   rE   rH   rG   r-   rL   r?   rR   rO   ro   r   r9   r^   r   r   r   r   r   '   s6    



"-r   tokenrS   r   c                 C   s4   |t jjjjkrt| jdddS | dddS )Nr   big)length	byteorderu   ▁ zutf-8)	r   
ModelProtoSentencePieceTypeBYTEr   to_bytesreplaceencode)rt   rS   r   r   r   r      s   r   c                 C   s   t | dkrtd|  | dr| dstd|  zt| dd d}W n ty7   td	|  w |d
krCtd|  |S )zParses a hex byte string of the form '<0xXX>' and returns the integer value.

  Raises ValueError if the input is malformed or the byte value is invalid.
     zInvalid byte length: z<0x>zInvalid byte format:          zInvalid hex value:    zByte value out of range: )lenr(   
startswithendswithint)rt   ri   r   r   r   r      s   r   c                   @   s   e Zd ZdZdefddZeddddee	j
e	jf d	ee	j d
e	jfddZeddee	j
e	jf d
e	jfddZdS )r   a  [Experimental] Text Only Local Tokenizer.

  This class provides a local tokenizer for text only token counting.

  LIMITATIONS:
  - Only supports text based tokenization and no multimodal tokenization.
  - Forward compatibility depends on the open-source tokenizer models for future
  Gemini versions.
  - For token counting of tools and response schemas, the `LocalTokenizer` only
  supports `types.Tool` and `types.Schema` objects. Python functions or Pydantic
  models cannot be passed directly.
  
model_namec                 C   s,   t || _t | j| _t | j| _d S r   )loaderget_tokenizer_name_tokenizer_nameload_model_proto_model_protoget_sentencepiece
_tokenizer)r   r   r   r   r   r   $  s   zLocalTokenizer.__init__zThe SDK's local tokenizer implementation is experimental and may change in the future. It only supports text based tokenization.N)configr   r   r   c                C   s   t |}t }tj|pi }|| |jr||j |j	r,|j	j
r,||j	j
 |jr9|t |jg | jt| }tjtdd |D dS )a  Counts the number of tokens in a given text.

    Args:
      contents: The contents to tokenize.
      config: The configuration for counting tokens.

    Returns:
      A `CountTokensResult` containing the total number of tokens.

    Usage:

    .. code-block:: python

      from google import genai
      tokenizer = genai.LocalTokenizer(model_name='gemini-2.0-flash-001')
      result = tokenizer.count_tokens("What is your name?")
      print(result)
      # total_tokens=5
    c                 s   s    | ]}t |V  qd S r   )r   )rj   tokensr   r   r   	<genexpr>R  s    z.LocalTokenizer.count_tokens.<locals>.<genexpr>)total_tokens)t
t_contentsr   r   CountTokensConfigmodel_validater   rD   rE   generation_configresponse_schemarO   system_instructionr   r   rf   r   CountTokensResultsum)r   r   r   processed_contentstext_accumulatortokens_listr   r   r   count_tokens)  s   

zLocalTokenizer.count_tokensc                    s   t |}t }|D ]}|| q
 j| }g }|D ]}|jr/|jD ]}||j	 q&qg }t
||D ]\}	}
|tjdd |	jD  fdd|	jD |
d q7tj|dS )a,  Computes the tokens ids and string pieces in the input.

    Args:
      contents: The contents to tokenize.

    Returns:
      A `ComputeTokensResult` containing the token information.

    Usage:

    .. code-block:: python

      from google import genai
      tokenizer = genai.LocalTokenizer(model_name='gemini-2.0-flash-001')
      result = tokenizer.compute_tokens("What is your name?")
      print(result)
      # tokens_info=[TokensInfo(token_ids=[279, 329, 1313, 2508, 13], tokens=[b' What', b' is', b' your', b' name', b'?'], role='user')]
    c                 S   s   g | ]}|j qS r   )idrj   piecer   r   r   rl     s    z1LocalTokenizer.compute_tokens.<locals>.<listcomp>c                    s$   g | ]}t |j jj|j jqS r   )r   r   r   piecesr   rS   r   r   r   r   rl     s    )	token_idsr   r!   )tokens_info)r   r   r   r   r   EncodeAsImmutableProtor   r    r/   r!   zipr   
TokensInfor   ComputeTokensResult)r   r   r   r   r   tokens_protosroles_token_infostokens_protor!   r   r   r   compute_tokensU  s2   


zLocalTokenizer.compute_tokens)rp   rq   rr   rs   rn   r   r   experimental_warningr   r   ContentListUnionContentListUnionDictr   CountTokensConfigOrDictr   r   r   r   r   r   r   r   r     s,    (r   )rs   loggingtypingr   r   r   r   sentencepiecer    r   r	   r   r
   r   r   	getLoggerr1   __all__r   rn   ry   rz   r{   bytesr   r   r   r   r   r   r   r   <module>   s,   
 Q
	