utils

check_either_required_env_variables_set

check_either_required_env_variables_set(
    required_env_variables: list[list[str]],
) -> list[Exception]

Check if at least one of the required environment variables is set in a list for a given list of lists of environment variables.

For example, if required_env_variables is [['A', 'B'], ['C', 'D']], then we first look at ['A', 'B'], and check at least one of the environment variables ‘A’ or ‘B’ are set. If either ‘A’ or ‘B’ are not set, we add a Warning to the returned list. IF neither ‘A’ or ‘B’ are set, we add a KeyError to the returned list. We then repeat this process for ['C', 'D'].

Parameters:

Name	Type	Description	Default
`required_env_variables`	`list[list[str]]`	List of lists of environment variables where at least one of the environment variables must be set.	required

Returns:

Type	Description
`list[Exception]`	List of exceptions of either Warnings to say an environment variable isn’t set or KeyErrors if none of the required environment variables in a list are set.

Source code in src/prompto/utils.py

def check_either_required_env_variables_set(
    required_env_variables: list[list[str]],
) -> list[Exception]:
    """
    Check if at least one of the required environment variables is set in a list
    for a given list of lists of environment variables.

    For example, if required_env_variables is `[['A', 'B'], ['C', 'D']]`,
    then we first look at `['A', 'B']`, and check at least one of the
    environment variables 'A' or 'B' are set. If either 'A' or 'B' are not set,
    we add a Warning to the returned list. IF neither 'A' or 'B' are set, we add
    a KeyError to the returned list. We then repeat this process for `['C', 'D']`.

    Parameters
    ----------
    required_env_variables : list[list[str]]
        List of lists of environment variables where at least one of the
        environment variables must be set.

    Returns
    -------
    list[Exception]
        List of exceptions of either Warnings to say an environment variable isn't set
        or KeyErrors if none of the required environment variables in a list are set.
    """
    # check required environment variables is a list of lists
    if not all(
        isinstance(env_variables, list) for env_variables in required_env_variables
    ):
        raise TypeError(
            "The 'required_env_variables' parameter must be a list of lists of environment variables"
        )

    issues = []
    for env_variables in required_env_variables:
        # see what variables are not set and get a list of Warnings
        warnings = check_optional_env_variables_set(env_variables)

        if len(warnings) == len(env_variables):
            # add a value error if none of the variables in this list are set
            issues.append(
                KeyError(
                    f"At least one of the environment variables '{env_variables}' must be set"
                )
            )
        else:
            # add the warnings to the list of issues if at least one variable is set
            issues.extend(warnings)

    return issues

check_max_queries_dict

check_max_queries_dict(
    max_queries_dict: dict[str, int | dict[str, int]]
) -> bool

Check the format of the max_queries_dict dictionary.

Raises a TypeError if the dictionary is not in the correct format.

Parameters:

Name	Type	Description	Default
`max_queries_dict`	`dict[str, int \| dict[str, int]]`	A dictionary of maximum queries per minute for each API or group, by default {}. The dictionary keys should be either a group name (which is then used in the “group” key of the prompt_dict) or an API name. The values should be integers (the maximum queries per minute or rate limit) or itself a dictionary with keys as the model-names and values as the maximum queries per minute for that model.	required

Returns:

Type	Description
`bool`	True if the max_queries_dict is valid, otherwise raises a ValueError or TypeError.

Source code in src/prompto/utils.py

def check_max_queries_dict(max_queries_dict: dict[str, int | dict[str, int]]) -> bool:
    """
    Check the format of the max_queries_dict dictionary.

    Raises a TypeError if the dictionary is not in the correct format.

    Parameters
    ----------
    max_queries_dict : dict[str, int | dict[str, int]]
        A dictionary of maximum queries per minute for each API or group, by default {}.
        The dictionary keys should be either a group name (which is then used in the
        "group" key of the prompt_dict) or an API name. The values should be integers
        (the maximum queries per minute or rate limit) or itself a dictionary with
        keys as the model-names and values as the maximum queries per minute for that model.

    Returns
    -------
    bool
        True if the max_queries_dict is valid, otherwise raises a ValueError or TypeError.
    """
    # check max_queries_dict is a dictionary
    if not isinstance(max_queries_dict, dict):
        raise TypeError(
            f"max_queries_dict must be a dictionary, not {type(max_queries_dict)}"
        )

    for key, value in max_queries_dict.items():
        # check each key is a string
        if not isinstance(key, str):
            raise TypeError(f"max_queries_dict keys must be strings, not {type(key)}")

        # check each value is an integer or dictionary
        if not isinstance(value, int) and not isinstance(value, dict):
            raise TypeError(
                f"max_queries_dict values must be integers or dictionaries, not {type(value)}"
            )

        if isinstance(value, dict):
            for sub_key, sub_value in value.items():
                # check each sub_key is a string
                if not isinstance(sub_key, str):
                    raise TypeError(
                        "if a value of max_queries_dict is a dictionary, "
                        f"the sub-keys must be strings, not {type(sub_key)}"
                    )

                # check each sub_value is an integer
                if not isinstance(sub_value, int):
                    raise TypeError(
                        "if a value of max_queries_dict is a dictionary, "
                        f"the sub-values must be integers, not {type(sub_value)}"
                    )
                elif sub_value < 0:
                    raise ValueError(
                        "if a value of max_queries_dict is a dictionary, "
                        "the sub-values must be positive integers, not negative"
                    )
        elif value < 0:
            raise ValueError(
                "if a value of max_queries_dict is an integer, "
                "the value must be a positive integer, not negative"
            )

    return True

check_optional_env_variables_set

check_optional_env_variables_set(
    optional_env_variables: list[str],
) -> list[Exception]

Check if optional environment variables are set.

A list of Warnings are returned for each optional environment variables that is not set. If they are all set, an empty list is returned.

Parameters:

Name	Type	Description	Default
`optional_env_variables`	`list[str]`	List of environment variables that are optional to be set.	required

Returns:

Type	Description
`list[Exception]`	List of exceptions for the optional environment variables that are not set.

Source code in src/prompto/utils.py

def check_optional_env_variables_set(
    optional_env_variables: list[str],
) -> list[Exception]:
    """
    Check if optional environment variables are set.

    A list of Warnings are returned for each optional environment variables
    that is not set. If they are all set, an empty list is returned.

    Parameters
    ----------
    optional_env_variables : list[str]
        List of environment variables that are optional to be set.

    Returns
    -------
    list[Exception]
        List of exceptions for the optional environment variables that are not set.
    """
    return [
        Warning(f"Environment variable '{env_variable}' is not set")
        for env_variable in optional_env_variables
        if env_variable not in os.environ
    ]

check_required_env_variables_set

check_required_env_variables_set(
    required_env_variables: list[str],
) -> list[Exception]

Check if required environment variables are set.

A list of KeyErrors are returned for each required environment variables that is not set. If they are all set, an empty list is returned.

Parameters:

Name	Type	Description	Default
`required_env_variables`	`list[str]`	List of environment variables that are required to be set.	required

Returns:

Type	Description
`list[Exception]`	List of exceptions that are raised if the required environment variables are not set.

Source code in src/prompto/utils.py

def check_required_env_variables_set(
    required_env_variables: list[str],
) -> list[Exception]:
    """
    Check if required environment variables are set.

    A list of KeyErrors are returned for each required environment variables
    that is not set. If they are all set, an empty list is returned.

    Parameters
    ----------
    required_env_variables : list[str]
        List of environment variables that are required to be set.

    Returns
    -------
    list[Exception]
        List of exceptions that are raised if the required environment variables are not set.
    """
    return [
        KeyError(f"Environment variable '{env_variable}' is not set")
        for env_variable in required_env_variables
        if env_variable not in os.environ
    ]

compute_sha256_base64

compute_sha256_base64(file_path, chunk_size=8192)

Compute the SHA256 hash of the file at ‘file_path’ and return it as a base64-encoded string.

Source code in src/prompto/utils.py

def compute_sha256_base64(file_path, chunk_size=8192):
    """
    Compute the SHA256 hash of the file at 'file_path' and return it as a base64-encoded string.
    """
    hasher = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            hasher.update(chunk)
    return base64.b64encode(hasher.digest()).decode("utf-8")

copy_file

copy_file(source: str, destination: str) -> None

Function to copy a file from one location to another.

Parameters:

Name	Type	Description	Default
`source`	`str`	File path of the file to be moved.	required
`destination`	`str`	File path of the destination of the file.	required

Source code in src/prompto/utils.py

def copy_file(source: str, destination: str) -> None:
    """
    Function to copy a file from one location to another.

    Parameters
    ----------
    source : str
        File path of the file to be moved.
    destination : str
        File path of the destination of the file.
    """
    if not os.path.exists(source):
        raise FileNotFoundError(f"File '{source}' does not exist")

    logging.info(f"Copying file from {source} to {destination}")
    shutil.copyfile(source, destination)

create_folder

create_folder(folder: str) -> None

Function to create a folder if it does not already exist.

Parameters:

Name	Type	Description	Default
`folder`	`str`	Name of the folder to be created.	required

Source code in src/prompto/utils.py

def create_folder(folder: str) -> None:
    """
    Function to create a folder if it does not already exist.

    Parameters
    ----------
    folder : str
        Name of the folder to be created.
    """
    if not os.path.exists(folder):
        logging.info(f"Creating folder '{folder}'")
        os.makedirs(folder)
    else:
        logging.info(f"Folder '{folder}' already exists")

get_environment_variable

get_environment_variable(env_variable: str, model_name: str) -> str

Get the value of an environment variable for a specific model. We first check if the environment variable with the model name identifier exists. If it does, we return the value of that environment variable. If it does not exist, we return the value of the environment variable without the model name identifier. If neither environment variables exist, we raise a KeyError.

Parameters:

Name	Type	Description	Default
`env_variable`	`str`	The name of the environment variable to get	required
`model_name`	`str`	The name of the model to get the environment variable for	required

Returns:

Type	Description
`str`	The value of the environment variable for the specific model. If no model-specific environment variable exists, the value of the environment variable without the model name identifier is returned.

Source code in src/prompto/utils.py

def get_environment_variable(env_variable: str, model_name: str) -> str:
    """
    Get the value of an environment variable for a specific model.
    We first check if the environment variable with the model name identifier
    exists. If it does, we return the value of that environment variable.
    If it does not exist, we return the value of the environment variable
    without the model name identifier.
    If neither environment variables exist, we raise a KeyError.

    Parameters
    ----------
    env_variable : str
        The name of the environment variable to get
    model_name : str
        The name of the model to get the environment variable for

    Returns
    -------
    str
        The value of the environment variable for the specific model.
        If no model-specific environment variable exists, the value of the
        environment variable without the model name identifier is returned.
    """
    # use the model specific environment variables if they exist
    # replace any invalid characters in the model name
    identifier = get_model_name_identifier(model_name)
    env_variable_with_idenfier = f"{env_variable}_{identifier}"

    if env_variable_with_idenfier in os.environ:
        return os.environ[env_variable_with_idenfier]
    elif env_variable in os.environ:
        return os.environ[env_variable]
    else:
        raise KeyError(
            f"Neither '{env_variable}' nor '{env_variable_with_idenfier}' environment variable is set"
        )

get_model_name_identifier

get_model_name_identifier(model_name: str) -> str

Helper function to get the model name identifier.

Some model names can contain characters that are not allowed in environment variable names. This function replaces those characters (“-“, “/”, “.”, “:”, ” “) with underscores (“_”).

Parameters:

Name	Type	Description	Default
`model_name`	`str`	The model name	required

Returns:

Type	Description
`str`	The model name identifier with invalid characters replaced with underscores

Source code in src/prompto/utils.py

def get_model_name_identifier(model_name: str) -> str:
    """
    Helper function to get the model name identifier.

    Some model names can contain characters that are not allowed in
    environment variable names. This function replaces those characters
    ("-", "/", ".", ":", " ") with underscores ("_").

    Parameters
    ----------
    model_name : str
        The model name

    Returns
    -------
    str
        The model name identifier with invalid characters replaced
        with underscores
    """
    model_name = model_name.replace("-", "_")
    model_name = model_name.replace("/", "_")
    model_name = model_name.replace(".", "_")
    model_name = model_name.replace(":", "_")
    model_name = model_name.replace(" ", "_")

    return model_name

log_error_response_chat

log_error_response_chat(
    index: int | str,
    model: str,
    message_index: int,
    n_messages: int,
    message: str,
    responses_so_far: list[str],
    error_as_string: str,
    id: int | str = "NA",
) -> str

Log an error response from a model in a chat interaction.

Parameters:

Name	Type	Description	Default
`index`	`int \| str`	Identifier for the query/chat from the input file.	required
`model`	`str`	Name of the model that generated the response.	required
`message_index`	`int`	Index of the message in the chat interaction.	required
`n_messages`	`int`	Total number of messages in the chat interaction.	required
`message`	`str`	Message that was sent to the model.	required
`responses_so_far`	`list[str]`	List of responses that have been generated so far in the chat interaction.	required
`error_as_string`	`str`	Error message that was generated by the model as a string.	required

Returns:

Type	Description
`str`	The log message that was written.

Source code in src/prompto/utils.py

def log_error_response_chat(
    index: int | str,
    model: str,
    message_index: int,
    n_messages: int,
    message: str,
    responses_so_far: list[str],
    error_as_string: str,
    id: int | str = "NA",
) -> str:
    """
    Log an error response from a model in a chat interaction.

    Parameters
    ----------
    index : int | str
        Identifier for the query/chat from the input file.
    model : str
        Name of the model that generated the response.
    message_index : int
        Index of the message in the chat interaction.
    n_messages : int
        Total number of messages in the chat interaction.
    message : str
        Message that was sent to the model.
    responses_so_far : list[str]
        List of responses that have been generated so far in the chat interaction.
    error_as_string : str
        Error message that was generated by the model as a string.

    Returns
    -------
    str
        The log message that was written.
    """
    log_message = (
        f"Error with model {model} (i={index}, id={id}, message={message_index+1}/{n_messages})\n"
        f"Prompt: {message[:50]}...\n"
        f"Responses so far: {responses_so_far}...\n"
        f"Error: {error_as_string}\n"
    )
    logging.info(log_message)
    return log_message

log_error_response_query

log_error_response_query(
    index: int | str,
    model: str,
    prompt: str,
    error_as_string: str,
    id: int | str = "NA",
) -> str

Log an error response from a model to a query.

Parameters:

Name	Type	Description	Default
`index`	`int \| str`	Identifier for the query from the input file.	required
`model`	`str`	Name of the model that generated the response.	required
`prompt`	`str`	Prompt that was used to generate the response.	required
`error_as_string`	`str`	Error message that was generated by the model as a string.	required

Returns:

Type	Description
`str`	The log message that was written.

Source code in src/prompto/utils.py

def log_error_response_query(
    index: int | str,
    model: str,
    prompt: str,
    error_as_string: str,
    id: int | str = "NA",
) -> str:
    """
    Log an error response from a model to a query.

    Parameters
    ----------
    index : int | str
        Identifier for the query from the input file.
    model : str
        Name of the model that generated the response.
    prompt : str
        Prompt that was used to generate the response.
    error_as_string : str
        Error message that was generated by the model as a string.

    Returns
    -------
    str
        The log message that was written.
    """
    log_message = (
        f"Error with model {model} (i={index}, id={id})\n"
        f"Prompt: {prompt[:50]}...\n"
        f"Error: {error_as_string}\n"
    )
    logging.info(log_message)
    return log_message

log_success_response_chat

log_success_response_chat(
    index: int | str,
    model: str,
    message_index: int,
    n_messages: int,
    message: str,
    response_text: str,
    id: int | str = "NA",
) -> str

Log a successful chat interaction with a model.

Parameters:

Name	Type	Description	Default
`index`	`int \| str`	Identifier for the query/chat from the input file.	required
`model`	`str`	Name of the model that generated the response.	required
`message_index`	`int`	Index of the message in the chat interaction.	required
`n_messages`	`int`	Total number of messages in the chat interaction.	required
`message`	`str`	Message that was sent to the model.	required
`response_text`	`str`	Response text generated by the model.	required

Returns:

Type	Description
`str`	The log message that was written.

Source code in src/prompto/utils.py

def log_success_response_chat(
    index: int | str,
    model: str,
    message_index: int,
    n_messages: int,
    message: str,
    response_text: str,
    id: int | str = "NA",
) -> str:
    """
    Log a successful chat interaction with a model.

    Parameters
    ----------
    index : int | str
        Identifier for the query/chat from the input file.
    model : str
        Name of the model that generated the response.
    message_index : int
        Index of the message in the chat interaction.
    n_messages : int
        Total number of messages in the chat interaction.
    message : str
        Message that was sent to the model.
    response_text : str
        Response text generated by the model.

    Returns
    -------
    str
        The log message that was written.
    """
    log_message = (
        f"Response received for model {model} (i={index}, id={id}, message={message_index+1}/{n_messages})\n"
        f"Prompt: {message[:50]}...\n"
        f"Response: {response_text[:50]}...\n"
    )
    logging.info(log_message)
    return log_message

log_success_response_query

log_success_response_query(
    index: int | str,
    model: str,
    prompt: str,
    response_text: str,
    id: int | str = "NA",
) -> str

Log a successful response from a model to a query.

Parameters:

Name	Type	Description	Default
`index`	`int \| str`	Identifier for the query from the input file.	required
`model`	`str`	Name of the model that generated the response.	required
`prompt`	`str`	Prompt that was used to generate the response.	required
`response_text`	`str`	Response text generated by the model.	required

Returns:

Type	Description
`str`	The log message that was written.

Source code in src/prompto/utils.py

def log_success_response_query(
    index: int | str,
    model: str,
    prompt: str,
    response_text: str,
    id: int | str = "NA",
) -> str:
    """
    Log a successful response from a model to a query.

    Parameters
    ----------
    index : int | str
        Identifier for the query from the input file.
    model : str
        Name of the model that generated the response.
    prompt : str
        Prompt that was used to generate the response.
    response_text : str
        Response text generated by the model.

    Returns
    -------
    str
        The log message that was written.
    """
    log_message = (
        f"Response received for model {model} (i={index}, id={id})\n"
        f"Prompt: {prompt[:50]}...\n"
        f"Response: {response_text[:50]}...\n"
    )
    logging.info(log_message)
    return log_message

move_file

move_file(source: str, destination: str) -> None

Function to move a file from one location to another.

Parameters:

Name	Type	Description	Default
`source`	`str`	File path of the file to be moved.	required
`destination`	`str`	File path of the destination of the file.	required

Source code in src/prompto/utils.py

def move_file(source: str, destination: str) -> None:
    """
    Function to move a file from one location to another.

    Parameters
    ----------
    source : str
        File path of the file to be moved.
    destination : str
        File path of the destination of the file.
    """
    if not os.path.exists(source):
        raise FileNotFoundError(f"File '{source}' does not exist")

    logging.info(f"Moving file from {source} to {destination}")
    os.rename(source, destination)

parse_list_arg

parse_list_arg(argument: str) -> list[str]

Splits a string into a list by separating on commas. Will remove any whitespace and removes duplicates. Used to parsing argument which is a list in CLI commands.

Parameters:

Name	Type	Description	Default
`argument`	`str`	A string separated with commas, e.g. “judge1, judge2” or “judge1,judge2,judge1”. Whitespace will be removed.	required

Returns:

Type	Description
`list[str]`	A list of the comma separated items in the input string, with no duplicates and whitespaces, e.g. [“judge1”, “judge2”].

Source code in src/prompto/utils.py

def parse_list_arg(argument: str) -> list[str]:
    """
    Splits a string into a list by separating on commas.
    Will remove any whitespace and removes duplicates.
    Used to parsing argument which is a list in CLI commands.

    Parameters
    ----------
    argument : str
        A string separated with commas, e.g.
        "judge1, judge2" or "judge1,judge2,judge1".
        Whitespace will be removed.

    Returns
    -------
    list[str]
        A list of the comma separated items in the input string,
        with no duplicates and whitespaces, e.g. ["judge1", "judge2"].

    """
    x = argument.replace(" ", "").split(",")
    return list(sorted(set(x), key=x.index))

sort_input_files_by_creation_time

sort_input_files_by_creation_time(input_folder: str) -> list[str]

Function sorts the jsonl or csv files in the input folder by creation/change time in a given directory.

Parameters:

Name	Type	Description	Default
`input_folder`	`str`	Folder which contains the files to be processed.	required

Returns:

Type	Description
`list[str]`	Ordered list of jsonl or csv filenames in the input folder.

Source code in src/prompto/utils.py

def sort_input_files_by_creation_time(input_folder: str) -> list[str]:
    """
    Function sorts the jsonl or csv files in the input folder by creation/change
    time in a given directory.

    Parameters
    ----------
    input_folder : str
        Folder which contains the files to be processed.

    Returns
    -------
    list[str]
        Ordered list of jsonl or csv filenames in the input folder.
    """
    if not os.path.isdir(input_folder):
        raise ValueError(
            f"Input folder '{input_folder}' must be a valid path to a folder"
        )

    return sorted(
        [
            f
            for f in os.listdir(input_folder)
            if (f.endswith(".jsonl") or f.endswith(".csv"))
        ],
        key=lambda f: os.path.getctime(os.path.join(input_folder, f)),
    )

sort_prompts_by_model_for_api

sort_prompts_by_model_for_api(prompt_dicts: list[dict], api: str) -> list[dict]

For a list of prompt dictionaries, sort the dictionaries with "api": api by the “model_name” key. The rest of the dictionaries are kept in the same order.

For Ollama API, if the model requested is not currently loaded, the model will be loaded on demand. This can take some time, so it is better to sort the prompts by the model name to reduce the time taken to load the models.

If no dictionaries with "api": api are present, the original list is returned.

Parameters:

Name	Type	Description	Default
`prompt_dicts`	`list[dict]`	List of dictionaries containing the prompt and other parameters to be sent to the API. Each dictionary must have keys “prompt” and “api”	required
`api`	`str`	The API name to sort the prompt dictionaries by the “model_name” key	required

Returns:

Type	Description
`list[dict]`	List of dictionaries containing the prompt and other parameters where the dictionaries with `"api": api` are sorted by the “model_name” key

Source code in src/prompto/utils.py

def sort_prompts_by_model_for_api(prompt_dicts: list[dict], api: str) -> list[dict]:
    """
    For a list of prompt dictionaries, sort the dictionaries with `"api": api`
    by the "model_name" key. The rest of the dictionaries are kept in the same order.

    For Ollama API, if the model requested is not currently loaded, the model will be
    loaded on demand. This can take some time, so it is better to sort the prompts
    by the model name to reduce the time taken to load the models.

    If no dictionaries with `"api": api` are present, the original list is returned.

    Parameters
    ----------
    prompt_dicts : list[dict]
        List of dictionaries containing the prompt and other parameters
        to be sent to the API. Each dictionary must have keys "prompt" and "api"
    api : str
        The API name to sort the prompt dictionaries by the "model_name" key

    Returns
    -------
    list[dict]
        List of dictionaries containing the prompt and other parameters
        where the dictionaries with `"api": api` are sorted by the "model_name" key
    """
    api_indices = [i for i, item in enumerate(prompt_dicts) if item.get("api") == api]
    if len(api_indices) == 0:
        return prompt_dicts

    # sort indices for dictionaries with "api": api
    sorted_api_indices = sorted(
        api_indices, key=lambda i: prompt_dicts[i].get("model_name", "")
    )

    # create map from original api index to sorted index
    api_index_map = {i: j for i, j in zip(api_indices, sorted_api_indices)}

    # sort data based on the combined indices
    return [
        (
            prompt_dicts[i]
            if i not in api_index_map.keys()
            else prompt_dicts[api_index_map[i]]
        )
        for i in range(len(prompt_dicts))
    ]

write_log_message

write_log_message(log_file: str, log_message: str, log: bool = True) -> None

Helper function to write a log message to a log file with the current date and time of the log message.

Parameters:

Name	Type	Description	Default
`log_file`	`str`	Path to the log file.	required
`log_message`	`str`	Message to be written to the log file.	required

Source code in src/prompto/utils.py

def write_log_message(log_file: str, log_message: str, log: bool = True) -> None:
    """
    Helper function to write a log message to a log file
    with the current date and time of the log message.

    Parameters
    ----------
    log_file : str
        Path to the log file.
    log_message : str
        Message to be written to the log file.
    """
    if log:
        logging.info(log_message)

    now = datetime.now()
    with open(log_file, "a") as log:
        log.write(f"{now.strftime('%d-%m-%Y, %H:%M')}: {log_message}\n")