Skip to content

upload_media

check_uploads_by_api

check_uploads_by_api(api_name: str)

Check if preemptive uploading media files has been implemented for the given API.

Source code in src/prompto/upload_media.py
def check_uploads_by_api(api_name: str):
    """
    Check if preemptive uploading media files has been implemented for the given API.
    """
    if api_name not in UPLOAD_APIS.keys():
        raise NotImplementedError(
            f"Uploading media files to {api_name} is not supported yet."
        )

    return True

do_upload_media async

do_upload_media(input_file, media_folder, output_file)

Upload media files to the relevant API. The media files are uploaded and the experiment file is updated with the uploaded filenames.

Parameters:

Name Type Description Default
input_file str

Path to the experiment file.

required
media_folder str

Path to the folder containing the media files.

required
output_file str

Path to new or updated output file. This can be the same as the input file in which case the input file will be overwritten. No checking of this behaviour is included in this function. It is assumed that the overwrite logic has been implemented elsewhere.

required
Source code in src/prompto/upload_media.py
async def do_upload_media(input_file, media_folder, output_file):
    """
    Upload media files to the relevant API. The media files are uploaded and the experiment
    file is updated with the uploaded filenames.

    Parameters
    ----------
    input_file : str

        Path to the experiment file.
    media_folder : str
        Path to the folder containing the media files.
    output_file : str
        Path to new or updated output file. This can be the same as the input file in which
        case the input file will be overwritten. No checking of this behaviour is included in this
        function. It is assumed that the overwrite logic has been implemented elsewhere.
    """
    files_to_upload, prompt_dict_list = _read_experiment_file(input_file, media_folder)

    # At present we only support the gemini API
    # Therefore we will just call the upload function
    # If in future we support other bulk upload to other APIs, we will need to
    # refactor here

    uploaded_files = await gemini_media.upload_media_files_async(files_to_upload)

    update_experiment_file(
        prompt_dict_list,
        uploaded_files,
        output_file,
        media_folder,
    )

update_experiment_file

update_experiment_file(
    prompt_dict_list: list[dict],
    uploaded_files: dict[str, str],
    output_path: str,
    media_location: str,
) -> None

Creates or updates the experiment file with the uploaded filenames. The uploaded filenames are added to the prompt dictionaries.

Parameters:

prompt_dict_list : list[dict] A list of prompt dictionaries containing the data from the original experiment file. uploaded_files : dict[str, str] A dictionary mapping local file paths to their corresponding uploaded filenames. output_path : str The path for the new/updated experiment file. No checking of the overwrite behaviour is included in this function. It is assumed that the overwrite logic has been implemented elsewhere. media_location : str The location of the media files (e.g., “data/media”).

Source code in src/prompto/upload_media.py
def update_experiment_file(
    prompt_dict_list: list[dict],
    uploaded_files: dict[str, str],
    output_path: str,
    media_location: str,
) -> None:
    """
    Creates or updates the experiment file with the uploaded filenames.
    The uploaded filenames are added to the prompt dictionaries.

    Parameters:
    ----------
    prompt_dict_list : list[dict]
        A list of prompt dictionaries containing the data from the original experiment file.
    uploaded_files : dict[str, str]
        A dictionary mapping local file paths to their corresponding uploaded filenames.
    output_path : str
        The path for the new/updated experiment file. No checking of the
        overwrite behaviour is included in this function. It is assumed that
        the overwrite logic has been implemented elsewhere.
    media_location : str
        The location of the media files (e.g., "data/media").
    """
    # Modify data to include uploaded filenames
    for data in prompt_dict_list:
        if isinstance(data.get("prompt"), list):
            for prompt in data["prompt"]:
                for part in prompt.get("parts", []):
                    if isinstance(part, dict) and "media" in part:
                        file_path = os.path.join(media_location, part["media"])
                        if file_path in uploaded_files:
                            part["uploaded_filename"] = uploaded_files[file_path]
                        else:
                            logger.warning(
                                f"Failed to find {file_path} in uploaded_files"
                            )

    # Write modified data back to the JSONL file
    with open(output_path, "w") as f:
        for data in prompt_dict_list:
            f.write(json.dumps(data) + "\n")