o
    i                     @   sF  d dl Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. e /e j0dZ1G dd dZ2G dd dZ3dS )    N   )AsyncClientWrapperSyncClientWrapper)RequestOptions)ChatMessages)CitationOptions)EmbedByTypeResponse)
EmbedInput)EmbedInputType)EmbeddingType)ResponseFormatV2)Thinking)ToolV2   )AsyncRawV2ClientRawV2Client)V2ChatRequestDocumentsItem)V2ChatRequestSafetyMode)V2ChatRequestToolChoice)V2ChatResponse) V2ChatStreamRequestDocumentsItem)V2ChatStreamRequestSafetyMode)V2ChatStreamRequestToolChoice)V2ChatStreamResponse)V2EmbedRequestTruncate)V2RerankResponse.c                .   @   D  e Zd ZdefddZedefddZeeeeeeeeeeeeeeeeeeddd	e	d
e
dejeje  deje dejeje  deje deje deje deje dejeje	  deje deje deje deje deje deje deje deje deje deje deje deje f,ddZeeeeeeeeeeeeeeeeeeddd	e	d
e
dejeje  deje dejeje  deje deje deje deje dejeje	  deje deje deje deje deje deje deje deje deje deje deje def,d d!Z eeeeeeeedd"	d	e	d#e!d$ejeje	  d%ejeje	  d&ejeje"  deje d'eje d(ejeje#  d)eje$ deje deje de%fd*d+Z&eeedd,d	e	d-e	deje	 d.eje d/eje deje deje de'fd0d1Z(dS )2V2Clientclient_wrapperc                C      t |d| _d S N)r   )r   _raw_clientselfr    r$   V/var/www/html/karishye-ai-python/venv/lib/python3.10/site-packages/cohere/v2/client.py__init__!      zV2Client.__init__returnc                 C      | j S )z
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        RawV2Client
        r!   r#   r$   r$   r%   with_raw_response$      	zV2Client.with_raw_responseNtoolsstrict_tools	documentscitation_optionsresponse_formatsafety_mode
max_tokensstop_sequencestemperatureseedfrequency_penaltypresence_penaltykplogprobstool_choicethinkingpriorityrequest_optionsmodelmessagesr/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   c                c   s    | j jdi d|d|d|d|d|d|d|d|d	|	d
|
d|d|d|d|d|d|d|d|d|d|d|}|jE dH  W d   dS 1 sYw   Y  dS )a  
        Generates a text response to a user message. To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).

        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.

        Parameters
        ----------
        model : str
            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).

        messages : ChatMessages

        tools : typing.Optional[typing.Sequence[ToolV2]]
            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.

            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).

        strict_tools : typing.Optional[bool]
            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).

            **Note**: The first few requests with a new set of tools will take longer to process.

        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]
            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.

        citation_options : typing.Optional[CitationOptions]

        response_format : typing.Optional[ResponseFormatV2]

        safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]
            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
            When `OFF` is specified, the safety instruction will be omitted.

            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.

            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).

            **Note**: `command-r7b-12-2024` and newer models only support `"CONTEXTUAL"` and `"STRICT"` modes.

        max_tokens : typing.Optional[int]
            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).

            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `"MAX_TOKENS"`.

            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.

        stop_sequences : typing.Optional[typing.Sequence[str]]
            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.

        temperature : typing.Optional[float]
            Defaults to `0.3`.

            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.

            Randomness can be further maximized by increasing the  value of the `p` parameter.

        seed : typing.Optional[int]
            If specified, the backend will make a best effort to sample tokens
            deterministically, such that repeated requests with the same
            seed and parameters should return the same result. However,
            determinism cannot be totally guaranteed.

        frequency_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.

        presence_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.

        k : typing.Optional[int]
            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.
            Defaults to `0`, min value of `0`, max value of `500`.

        p : typing.Optional[float]
            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.

        logprobs : typing.Optional[bool]
            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.

        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]
            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.
            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.
            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.

            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.

        thinking : typing.Optional[Thinking]

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Yields
        ------
        typing.Iterator[V2ChatStreamResponse]


        Examples
        --------
        from cohere import Client, UserChatMessageV2

        client = Client(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )
        response = client.v2.chat_stream(
            model="command-a-03-2025",
            messages=[
                UserChatMessageV2(
                    content="Tell me about LLMs",
                )
            ],
        )
        for chunk in response:
            yield chunk
        rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   Nr$   r!   chat_streamdata)r#   rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rr$   r$   r%   rE   /   s`    	
"zV2Client.chat_streamc                C   s   | j jdi d|d|d|d|d|d|d|d|d	|	d
|
d|d|d|d|d|d|d|d|d|d|d|}|jS )a  
        Generates a text response to a user message and streams it down, token by token. To learn how to use the Chat API with streaming follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).

        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.

        Parameters
        ----------
        model : str
            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).

        messages : ChatMessages

        tools : typing.Optional[typing.Sequence[ToolV2]]
            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.

            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).

        strict_tools : typing.Optional[bool]
            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).

            **Note**: The first few requests with a new set of tools will take longer to process.

        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]
            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.

        citation_options : typing.Optional[CitationOptions]

        response_format : typing.Optional[ResponseFormatV2]

        safety_mode : typing.Optional[V2ChatRequestSafetyMode]
            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
            When `OFF` is specified, the safety instruction will be omitted.

            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.

            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).

            **Note**: `command-r7b-12-2024` and newer models only support `"CONTEXTUAL"` and `"STRICT"` modes.

        max_tokens : typing.Optional[int]
            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).

            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `"MAX_TOKENS"`.

            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.

        stop_sequences : typing.Optional[typing.Sequence[str]]
            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.

        temperature : typing.Optional[float]
            Defaults to `0.3`.

            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.

            Randomness can be further maximized by increasing the  value of the `p` parameter.

        seed : typing.Optional[int]
            If specified, the backend will make a best effort to sample tokens
            deterministically, such that repeated requests with the same
            seed and parameters should return the same result. However,
            determinism cannot be totally guaranteed.

        frequency_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.

        presence_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.

        k : typing.Optional[int]
            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.
            Defaults to `0`, min value of `0`, max value of `500`.

        p : typing.Optional[float]
            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.

        logprobs : typing.Optional[bool]
            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.

        tool_choice : typing.Optional[V2ChatRequestToolChoice]
            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.
            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.
            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.

            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.

        thinking : typing.Optional[Thinking]

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        V2ChatResponse


        Examples
        --------
        from cohere import Client, UserChatMessageV2

        client = Client(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )
        client.v2.chat(
            model="command-a-03-2025",
            messages=[
                UserChatMessageV2(
                    content="Tell me about LLMs",
                )
            ],
        )
        rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   Nr$   r!   chatrF   r#   rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   	_responser$   r$   r%   rI      sZ    	
zV2Client.chat	textsimagesinputsr5   output_dimensionembedding_typestruncater@   rA   
input_typerM   rN   rO   rP   rQ   rR   c                C   s(   | j j|||||||||	|
|d}|jS )a  
        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.

        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.

        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).

        Parameters
        ----------
        model : str
            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).

        input_type : EmbedInputType

        texts : typing.Optional[typing.Sequence[str]]
            An array of strings for the model to embed. Maximum number of texts per call is `96`.

        images : typing.Optional[typing.Sequence[str]]
            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.

            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.

            Image embeddings are supported with Embed v3.0 and newer models.

        inputs : typing.Optional[typing.Sequence[EmbedInput]]
            An array of inputs for the model to embed. Maximum number of inputs per call is `96`. An input can contain a mix of text and image components.

        max_tokens : typing.Optional[int]
            The maximum number of tokens to embed per input. If the input text is longer than this, it will be truncated according to the `truncate` parameter.

        output_dimension : typing.Optional[int]
            The number of dimensions of the output embedding. This is only available for `embed-v4` and newer models.
            Possible values are `256`, `512`, `1024`, and `1536`. The default is `1536`.

        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
            Specifies the types of embeddings you want to get back. Can be one or more of the following types.

            * `"float"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.
            * `"int8"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"binary"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"base64"`: Use this when you want to get back base64 embeddings. Supported with Embed v3.0 and newer Embed models.

        truncate : typing.Optional[V2EmbedRequestTruncate]
            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.

            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.

            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        EmbedByTypeResponse
            OK

        Examples
        --------
        from cohere import Client

        client = Client(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )
        client.v2.embed(
            texts=["hello", "goodbye"],
            model="embed-v4.0",
            input_type="classification",
            embedding_types=["float"],
        )
        rB   rS   rM   rN   rO   r5   rP   rQ   rR   r@   rA   r!   embedrF   r#   rB   rS   rM   rN   rO   r5   rP   rQ   rR   r@   rA   rK   r$   r$   r%   rV     s   ]zV2Client.embedtop_nmax_tokens_per_docr@   rA   queryrY   rZ   c          	   	   C   s    | j j|||||||d}|jS )aV
  
        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.

        Parameters
        ----------
        model : str
            The identifier of the model to use, eg `rerank-v3.5`.

        query : str
            The search query

        documents : typing.Sequence[str]
            A list of texts that will be compared to the `query`.
            For optimal performance we recommend against sending more than 1,000 documents in a single request.

            **Note**: long documents will automatically be truncated to the value of `max_tokens_per_doc`.

            **Note**: structured data should be formatted as YAML strings for best performance.

        top_n : typing.Optional[int]
            Limits the number of returned rerank results to the specified value. If not passed, all the rerank results will be returned.

        max_tokens_per_doc : typing.Optional[int]
            Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        V2RerankResponse
            OK

        Examples
        --------
        from cohere import Client

        client = Client(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )
        client.v2.rerank(
            documents=[
                "Carson City is the capital city of the American state of Nevada.",
                "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
                "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.",
                "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
                "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
            ],
            query="What is the capital of the United States?",
            top_n=3,
            model="rerank-v3.5",
        )
        rB   r[   r1   rY   rZ   r@   rA   r!   rerankrF   	r#   rB   r[   r1   rY   rZ   r@   rA   rK   r$   r$   r%   r^     s   E	zV2Client.rerank))__name__
__module____qualname__r   r&   propertyr   r,   OMITstrr   typingOptionalSequencer   boolr   r   r   r   intfloatr   r   r   Iteratorr   rE   r   r   r   r   rI   r
   r	   r   r   r   rV   r   r^   r$   r$   r$   r%   r       sz   	

 1	

 /	

r	
r   c                .   @   r   )2AsyncV2Clientr   c                C   r   r    )r   r!   r"   r$   r$   r%   r&   A  r'   zAsyncV2Client.__init__r(   c                 C   r)   )z
        Retrieves a raw implementation of this client that returns raw responses.

        Returns
        -------
        AsyncRawV2Client
        r*   r+   r$   r$   r%   r,   D  r-   zAsyncV2Client.with_raw_responseNr.   rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   c             	   C  s   | j jdi d|d|d|d|d|d|d|d|d	|	d
|
d|d|d|d|d|d|d|d|d|d|d|4 I dH }|j2 z	3 dH W }|V  qO6 W d  I dH  dS 1 I dH skw   Y  dS )a  
        Generates a text response to a user message. To learn how to use the Chat API and RAG follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).

        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.

        Parameters
        ----------
        model : str
            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).

        messages : ChatMessages

        tools : typing.Optional[typing.Sequence[ToolV2]]
            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.

            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).

        strict_tools : typing.Optional[bool]
            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).

            **Note**: The first few requests with a new set of tools will take longer to process.

        documents : typing.Optional[typing.Sequence[V2ChatStreamRequestDocumentsItem]]
            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.

        citation_options : typing.Optional[CitationOptions]

        response_format : typing.Optional[ResponseFormatV2]

        safety_mode : typing.Optional[V2ChatStreamRequestSafetyMode]
            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
            When `OFF` is specified, the safety instruction will be omitted.

            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.

            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).

            **Note**: `command-r7b-12-2024` and newer models only support `"CONTEXTUAL"` and `"STRICT"` modes.

        max_tokens : typing.Optional[int]
            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).

            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `"MAX_TOKENS"`.

            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.

        stop_sequences : typing.Optional[typing.Sequence[str]]
            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.

        temperature : typing.Optional[float]
            Defaults to `0.3`.

            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.

            Randomness can be further maximized by increasing the  value of the `p` parameter.

        seed : typing.Optional[int]
            If specified, the backend will make a best effort to sample tokens
            deterministically, such that repeated requests with the same
            seed and parameters should return the same result. However,
            determinism cannot be totally guaranteed.

        frequency_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.

        presence_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.

        k : typing.Optional[int]
            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.
            Defaults to `0`, min value of `0`, max value of `500`.

        p : typing.Optional[float]
            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.

        logprobs : typing.Optional[bool]
            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.

        tool_choice : typing.Optional[V2ChatStreamRequestToolChoice]
            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.
            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.
            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.

            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.

        thinking : typing.Optional[Thinking]

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Yields
        ------
        typing.AsyncIterator[V2ChatStreamResponse]


        Examples
        --------
        import asyncio

        from cohere import AsyncClient, UserChatMessageV2

        client = AsyncClient(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )


        async def main() -> None:
            response = await client.v2.chat_stream(
                model="command-a-03-2025",
                messages=[
                    UserChatMessageV2(
                        content="Tell me about LLMs",
                    )
                ],
            )
            async for chunk in response:
                yield chunk


        asyncio.run(main())
        rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   Nr$   rD   )r#   rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rG   _chunkr$   r$   r%   rE   O  sd    	
.zAsyncV2Client.chat_streamc                   s   | j jdi d|d|d|d|d|d|d|d|d	|	d
|
d|d|d|d|d|d|d|d|d|d|d|I dH }|jS )a-  
        Generates a text response to a user message and streams it down, token by token. To learn how to use the Chat API with streaming follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).

        Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.

        Parameters
        ----------
        model : str
            The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models).

        messages : ChatMessages

        tools : typing.Optional[typing.Sequence[ToolV2]]
            A list of tools (functions) available to the model. The model response may contain 'tool_calls' to the specified tools.

            Learn more in the [Tool Use guide](https://docs.cohere.com/docs/tools).

        strict_tools : typing.Optional[bool]
            When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools).

            **Note**: The first few requests with a new set of tools will take longer to process.

        documents : typing.Optional[typing.Sequence[V2ChatRequestDocumentsItem]]
            A list of relevant documents that the model can cite to generate a more accurate reply. Each document is either a string or document object with content and metadata.

        citation_options : typing.Optional[CitationOptions]

        response_format : typing.Optional[ResponseFormatV2]

        safety_mode : typing.Optional[V2ChatRequestSafetyMode]
            Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
            When `OFF` is specified, the safety instruction will be omitted.

            Safety modes are not yet configurable in combination with `tools` and `documents` parameters.

            **Note**: This parameter is only compatible newer Cohere models, starting with [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release) and [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release).

            **Note**: `command-r7b-12-2024` and newer models only support `"CONTEXTUAL"` and `"STRICT"` modes.

        max_tokens : typing.Optional[int]
            The maximum number of output tokens the model will generate in the response. If not set, `max_tokens` defaults to the model's maximum output token limit. You can find the maximum output token limits for each model in the [model documentation](https://docs.cohere.com/docs/models).

            **Note**: Setting a low value may result in incomplete generations. In such cases, the `finish_reason` field in the response will be set to `"MAX_TOKENS"`.

            **Note**: If `max_tokens` is set higher than the model's maximum output token limit, the generation will be capped at that model-specific maximum limit.

        stop_sequences : typing.Optional[typing.Sequence[str]]
            A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.

        temperature : typing.Optional[float]
            Defaults to `0.3`.

            A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.

            Randomness can be further maximized by increasing the  value of the `p` parameter.

        seed : typing.Optional[int]
            If specified, the backend will make a best effort to sample tokens
            deterministically, such that repeated requests with the same
            seed and parameters should return the same result. However,
            determinism cannot be totally guaranteed.

        frequency_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.

        presence_penalty : typing.Optional[float]
            Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
            Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.

        k : typing.Optional[int]
            Ensures that only the top `k` most likely tokens are considered for generation at each step. When `k` is set to `0`, k-sampling is disabled.
            Defaults to `0`, min value of `0`, max value of `500`.

        p : typing.Optional[float]
            Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
            Defaults to `0.75`. min value of `0.01`, max value of `0.99`.

        logprobs : typing.Optional[bool]
            Defaults to `false`. When set to `true`, the log probabilities of the generated tokens will be included in the response.

        tool_choice : typing.Optional[V2ChatRequestToolChoice]
            Used to control whether or not the model will be forced to use a tool when answering. When `REQUIRED` is specified, the model will be forced to use at least one of the user-defined tools, and the `tools` parameter must be passed in the request.
            When `NONE` is specified, the model will be forced **not** to use one of the specified tools, and give a direct response.
            If tool_choice isn't specified, then the model is free to choose whether to use the specified tools or not.

            **Note**: This parameter is only compatible with models [Command-r7b](https://docs.cohere.com/v2/docs/command-r7b) and newer.

        thinking : typing.Optional[Thinking]

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        V2ChatResponse


        Examples
        --------
        import asyncio

        from cohere import AsyncClient, UserChatMessageV2

        client = AsyncClient(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )


        async def main() -> None:
            await client.v2.chat(
                model="command-a-03-2025",
                messages=[
                    UserChatMessageV2(
                        content="Tell me about LLMs",
                    )
                ],
            )


        asyncio.run(main())
        rB   rC   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   Nr$   rH   rJ   r$   r$   r%   rI     s\    	

zAsyncV2Client.chatrL   rS   rM   rN   rO   rP   rQ   rR   c                   s0   | j j|||||||||	|
|dI dH }|jS )a<  
        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.

        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.

        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](https://docs.cohere.com/docs/semantic-search).

        Parameters
        ----------
        model : str
            ID of one of the available [Embedding models](https://docs.cohere.com/docs/cohere-embed).

        input_type : EmbedInputType

        texts : typing.Optional[typing.Sequence[str]]
            An array of strings for the model to embed. Maximum number of texts per call is `96`.

        images : typing.Optional[typing.Sequence[str]]
            An array of image data URIs for the model to embed. Maximum number of images per call is `1`.

            The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg`, `image/png`, `image/webp`, or `image/gif` format and has a maximum size of 5MB.

            Image embeddings are supported with Embed v3.0 and newer models.

        inputs : typing.Optional[typing.Sequence[EmbedInput]]
            An array of inputs for the model to embed. Maximum number of inputs per call is `96`. An input can contain a mix of text and image components.

        max_tokens : typing.Optional[int]
            The maximum number of tokens to embed per input. If the input text is longer than this, it will be truncated according to the `truncate` parameter.

        output_dimension : typing.Optional[int]
            The number of dimensions of the output embedding. This is only available for `embed-v4` and newer models.
            Possible values are `256`, `512`, `1024`, and `1536`. The default is `1536`.

        embedding_types : typing.Optional[typing.Sequence[EmbeddingType]]
            Specifies the types of embeddings you want to get back. Can be one or more of the following types.

            * `"float"`: Use this when you want to get back the default float embeddings. Supported with all Embed models.
            * `"int8"`: Use this when you want to get back signed int8 embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"binary"`: Use this when you want to get back signed binary embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Supported with Embed v3.0 and newer Embed models.
            * `"base64"`: Use this when you want to get back base64 embeddings. Supported with Embed v3.0 and newer Embed models.

        truncate : typing.Optional[V2EmbedRequestTruncate]
            One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.

            Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.

            If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        EmbedByTypeResponse
            OK

        Examples
        --------
        import asyncio

        from cohere import AsyncClient

        client = AsyncClient(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )


        async def main() -> None:
            await client.v2.embed(
                texts=["hello", "goodbye"],
                model="embed-v4.0",
                input_type="classification",
                embedding_types=["float"],
            )


        asyncio.run(main())
        rT   NrU   rW   r$   r$   r%   rV     s   ezAsyncV2Client.embedrX   r[   rY   rZ   c          	   	      s(   | j j|||||||dI dH }|jS )a
  
        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.

        Parameters
        ----------
        model : str
            The identifier of the model to use, eg `rerank-v3.5`.

        query : str
            The search query

        documents : typing.Sequence[str]
            A list of texts that will be compared to the `query`.
            For optimal performance we recommend against sending more than 1,000 documents in a single request.

            **Note**: long documents will automatically be truncated to the value of `max_tokens_per_doc`.

            **Note**: structured data should be formatted as YAML strings for best performance.

        top_n : typing.Optional[int]
            Limits the number of returned rerank results to the specified value. If not passed, all the rerank results will be returned.

        max_tokens_per_doc : typing.Optional[int]
            Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.

        priority : typing.Optional[int]
            The priority of the request (lower means earlier handling; default 0 highest priority).
            Higher priority requests are handled first, and dropped last when the system is under load.

        request_options : typing.Optional[RequestOptions]
            Request-specific configuration.

        Returns
        -------
        V2RerankResponse
            OK

        Examples
        --------
        import asyncio

        from cohere import AsyncClient

        client = AsyncClient(
            client_name="YOUR_CLIENT_NAME",
            token="YOUR_TOKEN",
        )


        async def main() -> None:
            await client.v2.rerank(
                documents=[
                    "Carson City is the capital city of the American state of Nevada.",
                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
                    "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.",
                    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
                    "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
                ],
                query="What is the capital of the United States?",
                top_n=3,
                model="rerank-v3.5",
            )


        asyncio.run(main())
        r\   Nr]   r_   r$   r$   r%   r^   (  s   M	zAsyncV2Client.rerank))r`   ra   rb   r   r&   rc   r   r,   rd   re   r   rf   rg   rh   r   ri   r   r   r   r   rj   rk   r   r   r   AsyncIteratorr   rE   r   r   r   r   rI   r
   r	   r   r   r   rV   r   r^   r$   r$   r$   r%   rm   @  sz   	

 :	

 7	

z	
rm   )4rf   core.client_wrapperr   r   core.request_optionsr   types.chat_messagesr   types.citation_optionsr   types.embed_by_type_responser   types.embed_inputr	   types.embed_input_typer
   types.embedding_typer   types.response_format_v2r   types.thinkingr   types.tool_v2r   
raw_clientr   r   #types.v2chat_request_documents_itemr    types.v2chat_request_safety_moder    types.v2chat_request_tool_choicer   types.v2chat_responser   *types.v2chat_stream_request_documents_itemr   'types.v2chat_stream_request_safety_moder   'types.v2chat_stream_request_tool_choicer   types.v2chat_stream_responser   types.v2embed_request_truncater   types.v2rerank_responser   castAnyrd   r   rm   r$   r$   r$   r%   <module>   s<       $