Skip to content

API

arcgis_oriented_imagery.data

Utilities for creating and managing ArcGIS oriented imagery datasets and for downloading camera-info CSV tables from S3. Key public functions:

  • get_new_camera_info_tables: download new/updated CSV camera info tables from S3 into a local working directory, with a manifest file to track previously downloaded items. Supports paginated S3 listings and configurable retry/backoff behavior when continuation tokens are missing.

  • process_camera_info_table: orchestrates renaming and creation of oriented imagery datasets using the other helper functions.

The module is designed to avoid import-time heavy dependencies where possible and allows basic mocking of arcpy and boto3 for unit testing.

add_images_to_oriented_imagery_dataset(dataset_path, camera_info_table, imagery_category='360', camera_info_field_map=None, include_all_fields=False)

Add images to an existing Oriented Imagery Dataset.

Parameters:

Name Type Description Default
dataset_path Union[str, Path]

The full path to the Oriented Imagery Dataset.

required
camera_info_table Union[str, Path]

The path to the CSV table containing camera information.

required
imagery_category Optional[Literal['Horizontal', 'Oblique', 'Nadir', '360', 'Inspection']]

The category of imagery being added. Default is '360'.

'360'
camera_info_field_map Optional[dict[str, str]]

A dictionary mapping the expected field names to the actual field names in the camera info table.

None
include_all_fields Optional[bool]

Whether all fields from input table, apart from the required schema, will be added to the dataset's attribute table. Default is False.

False
Source code in src/arcgis_oriented_imagery/data.py
def add_images_to_oriented_imagery_dataset(
    dataset_path: Union[str, Path],
    camera_info_table: Union[str, Path],
    imagery_category: Optional[
        Literal["Horizontal", "Oblique", "Nadir", "360", "Inspection"]
    ] = "360",
    camera_info_field_map: Optional[dict[str, str]] = None,
    include_all_fields: Optional[bool] = False,
) -> Path:
    """
    Add images to an existing Oriented Imagery Dataset.

    Args:
        dataset_path: The full path to the Oriented Imagery Dataset.
        camera_info_table: The path to the CSV table containing camera information.
        imagery_category: The category of imagery being added. Default is '360'.
        camera_info_field_map: A dictionary mapping the expected field names to the actual field names in the camera info table.
        include_all_fields: Whether all fields from input table, apart from the required schema, will be added to the dataset's
            attribute table. Default is False.
    """
    # ensure paths are Path objects
    dataset_path = (
        Path(dataset_path) if not isinstance(dataset_path, Path) else dataset_path
    )
    if camera_info_table:
        camera_info_table = (
            Path(camera_info_table)
            if not isinstance(camera_info_table, Path)
            else camera_info_table
        )
    # ensure the oriented imagery dataset exists
    if not dataset_path.exists():
        raise FileNotFoundError(
            f"The specified Oriented Imagery Dataset does not exist: {dataset_path}"
        )

    # if the camera info table is provided, ensure it exists
    if camera_info_table and not camera_info_table.exists():
        raise FileNotFoundError(
            f"The specified camera info table does not exist: {camera_info_table}"
        )

    # prepare camera info fields if mapping is provided
    if camera_info_field_map is not None:
        # get a temporary file name with the .csv suffix
        tmp_dir = tempfile.mkdtemp()
        tmp_csv_path = Path(tmp_dir) / "temp_camera_info.csv"

        # rename the columns in the camera info table to match expected names
        from .schema import (
            rename_csv_columns,
        )  # delayed import to avoid circular dependency

        camera_info_table = rename_csv_columns(
            input_csv_path=camera_info_table,
            output_csv_path=tmp_csv_path,
            column_mapping=camera_info_field_map,
            warn_if_extra=True,
        )

    # add images to the oriented imagery dataset
    arcpy.management.AddImagesToOrientedImageryDataset(
        in_oriented_imagery_dataset=str(dataset_path),
        imagery_category=imagery_category,
        input_data=str(camera_info_table),
        include_all_fields=include_all_fields,
    )

    # clean up temporary files if created
    if camera_info_field_map is not None:
        try:
            if tmp_csv_path.exists():
                tmp_csv_path.unlink()
            if Path(tmp_dir).exists():
                Path(tmp_dir).rmdir()
        except Exception as e:
            logger.warning(f"Failed to clean up temporary files: {e}")

    return dataset_path

create_file_geodatabase(file_geodatabase_path)

Create the full path to an ArcGIS File Geodatabase if it does not already exist.

Note

If the full path does not exist, all necessary parent directories will be created.

Parameters:

Name Type Description Default
file_geodatabase_path Union[str, Path]

The full path to the File Geodatabase.

required
Source code in src/arcgis_oriented_imagery/data.py
def create_file_geodatabase(file_geodatabase_path: Union[str, Path]) -> Path:
    """
    Create the full path to an ArcGIS File Geodatabase if it does not already exist.

    !!! note
        If the full path does not exist, all necessary parent directories will be created.

    Args:
        file_geodatabase_path: The full path to the File Geodatabase.
    """
    # if ensure the path is a Path object
    file_geodatabase_path = (
        Path(file_geodatabase_path)
        if not isinstance(file_geodatabase_path, Path)
        else file_geodatabase_path
    )

    # if the directories above the file geodatabase do not exist, create them
    parent_dir = file_geodatabase_path.parent
    if not parent_dir.exists():
        parent_dir.mkdir(parents=True)

    # create the file geodatabase if it does not exist
    if not file_geodatabase_path.exists():
        arcpy.management.CreateFileGDB(
            str(file_geodatabase_path.parent), file_geodatabase_path.name
        )

    return file_geodatabase_path

create_oriented_imagery_dataset(dataset_path, spatial_reference=4326, has_z=True)

Create an Oriented Imagery Dataset at the specified path if it does not already exist.

Note

If the full path does not exist, all necessary parent directories will be created.

Parameters:

Name Type Description Default
dataset_path Union[str, Path]

The full path to the Oriented Imagery Dataset, including the File Geodatabase and Feature Dataset.

required
spatial_reference Union[int, SpatialReference]

The spatial reference for the Oriented Imagery Dataset. This can be an EPSG code (int) or an :class:arcpy Spatial Reference<arcpy.SpatialReference> object. Default is 4326 (WGS 84).

4326
has_z Optional[bool]

Whether the Oriented Imagery Dataset has a Z dimension. Default is True.

True

Returns:

Type Description
Path

The full path to the created Oriented Imagery Dataset.

Source code in src/arcgis_oriented_imagery/data.py
def create_oriented_imagery_dataset(
    dataset_path: Union[str, Path],
    spatial_reference: Union[int, arcpy.SpatialReference] = 4326,
    has_z: Optional[bool] = True,
) -> Path:
    """
    Create an Oriented Imagery Dataset at the specified path if it does not already exist.

    !!! note
        If the full path does not exist, all necessary parent directories will be created.

    Args:
        dataset_path: The full path to the Oriented Imagery Dataset, including the File Geodatabase and
            Feature Dataset.
        spatial_reference: The spatial reference for the Oriented Imagery Dataset. This can be an EPSG code
            (int) or an :class:`arcpy Spatial Reference<arcpy.SpatialReference>` object. Default is 4326 (WGS 84).
        has_z: Whether the Oriented Imagery Dataset has a Z dimension. Default is True.

    Returns:
        The full path to the created Oriented Imagery Dataset.
    """
    # ensure the dataset path is a Path object
    dataset_path = (
        Path(dataset_path) if not isinstance(dataset_path, Path) else dataset_path
    )

    # ensure file dataset path has .gdb suffix
    gdb_pth = dataset_path.parent
    if gdb_pth.suffix.lower() != ".gdb":
        gdb_pth = gdb_pth.with_suffix(".gdb")

    # get the file geodatabase path and create it if it does not exist
    create_file_geodatabase(file_geodatabase_path=gdb_pth)

    # ensure the spatial reference is an arcpy SpatialReference object, and is integer EPSG code if provided as int
    if isinstance(spatial_reference, int):
        spatial_reference = arcpy.SpatialReference(spatial_reference)
    elif not isinstance(spatial_reference, arcpy.SpatialReference):
        raise TypeError(
            "The spatial_reference parameter must be an integer EPSG code or an arcpy SpatialReference object."
        )

    # create the oriented imagery dataset if it does not exist
    if not dataset_path.exists():
        arcpy.management.CreateOrientedImageryDataset(
            out_dataset_path=gdb_pth,
            out_dataset_name=dataset_path.name,
            spatial_reference=spatial_reference,
            has_z=has_z,
        )

    return dataset_path

get_new_camera_info_tables(s3_bucket_path, local_working_directory, manifest_file=None, max_retries=None, backoff_initial=None)

Download new camera info tables from an S3 bucket.

Parameters:

Name Type Description Default
s3_bucket_path str

The S3 bucket path where camera info tables are stored.

required
local_working_directory Union[str, Path]

The local directory to download the camera info tables to.

required
manifest_file Optional[Union[str, Path]]

Optional path to a manifest file listing specific files to download. If not provided, one will be created in the local working directory.

None
max_retries Optional[int]

Optional maximum number of retries when S3 indicates more pages (IsTruncated=True) but NextContinuationToken is not provided. If None the value is read from the AOI_S3_MAX_RETRIES environment variable or defaults to 3.

None
backoff_initial Optional[float]

Optional initial backoff (seconds) for retry attempts. If None the value is read from AOI_S3_BACKOFF_INITIAL environment variable or defaults to 0.1.

None
Configuration via environment variables

AOI_S3_MAX_RETRIES (int): default retry attempts when continuation token missing. AOI_S3_BACKOFF_INITIAL (float): initial backoff seconds for retries.

Source code in src/arcgis_oriented_imagery/data.py
def get_new_camera_info_tables(
    s3_bucket_path: str,
    local_working_directory: Union[str, Path],
    manifest_file: Optional[Union[str, Path]] = None,
    max_retries: Optional[int] = None,
    backoff_initial: Optional[float] = None,
) -> list[Path]:
    """
    Download new camera info tables from an S3 bucket.

    Args:
        s3_bucket_path: The S3 bucket path where camera info tables are stored.
        local_working_directory: The local directory to download the camera info tables to.
        manifest_file: Optional path to a manifest file listing specific files to download. If not provided,
            one will be created in the local working directory.
        max_retries: Optional maximum number of retries when S3 indicates more pages (IsTruncated=True)
            but `NextContinuationToken` is not provided. If None the value is read from the
            AOI_S3_MAX_RETRIES environment variable or defaults to 3.
        backoff_initial: Optional initial backoff (seconds) for retry attempts. If None the value
            is read from AOI_S3_BACKOFF_INITIAL environment variable or defaults to 0.1.

    Configuration via environment variables:
        AOI_S3_MAX_RETRIES (int): default retry attempts when continuation token missing.
        AOI_S3_BACKOFF_INITIAL (float): initial backoff seconds for retries.
    """
    # ensure local working directory is a Path object
    local_working_directory = (
        Path(local_working_directory)
        if not isinstance(local_working_directory, Path)
        else local_working_directory
    )

    # if manifest file is not provided, create one in the local working directory
    if manifest_file is None:
        manifest_file = local_working_directory / "s3_manifest.json"

    # ensure the local working directory exists
    if not local_working_directory.exists():
        local_working_directory.mkdir(parents=True)

    # resolve retry/backoff configuration: function args > env config > defaults
    if max_retries is None:
        max_retries = DEFAULT_S3_MAX_RETRIES
    if backoff_initial is None:
        backoff_initial = DEFAULT_S3_BACKOFF_INITIAL

    # use boto3 to check and download new camera info tables
    s3 = boto3.client("s3")
    bucket_name = re.match(r"s3://([^/]+)", s3_bucket_path).group(1)
    prefix = re.sub(
        r"s3://[^/]+/", "", s3_bucket_path
    )  # remove 's3://bucket-name/' to get the prefix
    new_tables = []

    try:
        # page through results using ContinuationToken (if present)
        # load existing manifest if it exists
        existing_manifest = {}
        if Path(manifest_file).exists():
            with open(manifest_file, "r") as mf:
                existing_manifest = json.load(mf)

        continuation_token = None
        while True:
            if continuation_token:
                response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix, ContinuationToken=continuation_token)
            else:
                response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

            if "Contents" not in response:
                # if the first page has no contents, exit early
                if continuation_token is None:
                    logger.info(f"No objects found in S3 bucket {s3_bucket_path}.")
                    return new_tables
                else:
                    break

            # check each object in the S3 bucket page
            for obj in response["Contents"]:
                key = obj["Key"]
                last_modified = obj["LastModified"].replace(tzinfo=timezone.utc).isoformat()
                filename = key.split("/")[-1]
                local_file_path = local_working_directory / filename

                # if a csv file, determine if the file is new or updated
                if filename.endswith(".csv") and (
                    (filename not in existing_manifest)
                    or (existing_manifest[filename] != last_modified)
                ):
                    # download the file
                    s3.download_file(bucket_name, key, str(local_file_path))
                    new_tables.append(local_file_path)
                    existing_manifest[filename] = last_modified
                    logger.info(f"Downloaded new/updated camera info table: {filename}")

                # check if there are more pages
                if response.get("IsTruncated"):
                    # if the response says truncated but lacks a continuation token,
                    # attempt a small number of retries with backoff in case of transient
                    # API shape issues. If still missing, log and break to avoid
                    # infinite loops.
                    next_token = response.get("NextContinuationToken")
                    if not next_token:
                        retry = 0
                        backoff = backoff_initial
                        next_token = None
                        while retry < max_retries:
                            retry += 1
                            try:
                                time.sleep(backoff)
                            except Exception:
                                # in test environments time.sleep may be patched or raise
                                pass
                            # re-request the same page to see if token appears
                            if continuation_token:
                                response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix, ContinuationToken=continuation_token)
                            else:
                                response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
                            next_token = response.get("NextContinuationToken")
                            if next_token:
                                break
                            backoff *= 2

                        if next_token:
                            continuation_token = next_token
                            continue
                        else:
                            logger.warning(
                                "S3 response indicated more pages (IsTruncated=True) but no NextContinuationToken was returned after retries; stopping pagination."
                            )
                            break
                    else:
                        continuation_token = next_token
                        # continue the loop to fetch next page
                        continue
                else:
                    break

        # update the manifest file
        with open(manifest_file, "w") as mf:
            json.dump(existing_manifest, mf, indent=4)

    except (NoCredentialsError, PartialCredentialsError):
        logger.error(
            "AWS credentials not found or incomplete. Please configure your AWS credentials."
        )

    except Exception as e:
        logger.error(f"An error occurred while accessing S3: {e}")

    return new_tables

process_camera_info_table(camera_info_table, working_directory, camera_info_field_map=None, output_dataset=None)

Process data defined in a camera info table CSV file. This includes renaming columns based on a provided mapping, and creating an output oriented imagery dataset.

Parameters:

Name Type Description Default
camera_info_table Union[str, Path]

The path to the CSV table containing camera information.

required
working_directory Union[str, Path]

The directory where intermediate and output files will be stored.

required
camera_info_field_map Optional[dict[str, str]]

A dictionary mapping the expected field names to the actual field names in the camera info table.

None
output_dataset Optional[Union[str, Path]]

Optional full path to the output Oriented Imagery Dataset. If not provided, a default path will be created in the working directory based on the input camera info table name.

None

Returns:

Type Description
Path

The full path to the created Oriented Imagery Dataset.

Source code in src/arcgis_oriented_imagery/data.py
def process_camera_info_table(
    camera_info_table: Union[str, Path],
    working_directory: Union[str, Path],
    camera_info_field_map: Optional[dict[str, str]] = None,
    output_dataset: Optional[Union[str, Path]] = None,
) -> Path:
    """
    Process data defined in a camera info table CSV file. This includes renaming columns based on a provided mapping, and creating an output
    oriented imagery dataset.

    Args:
        camera_info_table: The path to the CSV table containing camera information.
        working_directory: The directory where intermediate and output files will be stored.
        camera_info_field_map: A dictionary mapping the expected field names to the actual field names in the camera info table.
        output_dataset: Optional full path to the output Oriented Imagery Dataset. If not provided, a default path will be created
            in the working directory based on the input camera info table name.

    Returns:
        The full path to the created Oriented Imagery Dataset.
    """
    # ensure paths are Path objects
    camera_info_table = (
        Path(camera_info_table)
        if not isinstance(camera_info_table, Path)
        else camera_info_table
    )
    working_directory = (
        Path(working_directory)
        if not isinstance(working_directory, Path)
        else working_directory
    )

    # ensure the camera info table exists
    if not camera_info_table.exists():
        raise FileNotFoundError(
            f"The specified camera info table does not exist: {camera_info_table}"
        )

    # ensure the working directory exists
    if not working_directory.exists():
        working_directory.mkdir(parents=True)

    # create a naming convention for output files based on the input camera info table name if output dataset is not provided
    if output_dataset is None:
        base_name = _slugify(camera_info_table.stem)
        output_dataset = (
            working_directory / f"{base_name}.gdb" / f"{base_name}_oriented_imagery"
        )

    # ensure the output dataset path is a Path object
    output_dataset = (
        Path(output_dataset) if not isinstance(output_dataset, Path) else output_dataset
    )

    # build the output oriented imagery dataset
    create_oriented_imagery_dataset(dataset_path=output_dataset)

    # add images to the oriented imagery dataset
    add_images_to_oriented_imagery_dataset(
        dataset_path=output_dataset,
        camera_info_table=camera_info_table,
        camera_info_field_map=camera_info_field_map,
    )

    return output_dataset

arcgis_oriented_imagery.schema

Helpers for validating and renaming CSV/Tabular schemas used by the oriented imagery pipeline. Main public functions:

  • validate_csv_schema: check that required columns are present in a CSV file.
  • rename_dataframe_columns: rename columns in a pandas DataFrame using a mapping.
  • rename_csv_columns: read a CSV, rename columns, and write the result.

The module uses local pandas imports and a logger to remain testable in environments without heavy GIS dependencies.

rename_csv_columns(input_csv_path, output_csv_path, column_mapping, warn_if_extra=True)

Rename columns in a CSV file based on the provided mapping.

Parameters:

Name Type Description Default
input_csv_path Union[str, Path]

The path to the input CSV file.

required
output_csv_path Union[str, Path]

The path to the output CSV file.

required
column_mapping dict[str, str]

Dictionary mapping old column names to new column names.

required
warn_if_extra Optional[bool]

Whether to log a warning if extra columns are found in the input data.

True
Source code in src/arcgis_oriented_imagery/schema.py
def rename_csv_columns(
    input_csv_path: Union[str, Path],
    output_csv_path: Union[str, Path],
    column_mapping: dict[str, str],
    warn_if_extra: Optional[bool] = True,
) -> Path:
    """
    Rename columns in a CSV file based on the provided mapping.

    Args:
        input_csv_path: The path to the input CSV file.
        output_csv_path: The path to the output CSV file.
        column_mapping: Dictionary mapping old column names to new column names.
        warn_if_extra: Whether to log a warning if extra columns are found in the input data.
    """
    # ensure input paths are Path objects
    input_csv_path = (
        Path(input_csv_path) if not isinstance(input_csv_path, Path) else input_csv_path
    )
    output_csv_path = (
        Path(output_csv_path)
        if not isinstance(output_csv_path, Path)
        else output_csv_path
    )

    # ensure input CSV exists
    if not input_csv_path.exists():
        raise FileNotFoundError(f"Input CSV file not found: {input_csv_path}")

    # Read the input CSV file
    input_data = pd.read_csv(input_csv_path)

    # Rename columns
    renamed_data = rename_dataframe_columns(input_data, column_mapping, warn_if_extra)

    # Write the renamed DataFrame to the output CSV file
    renamed_data.to_csv(output_csv_path, index=False)
    logger.info(f"Renamed CSV saved to: {output_csv_path}")

    return output_csv_path

rename_dataframe_columns(input_data, column_mapping, warn_if_extra=True)

Rename columns in a Pandas DataFrame based on the provided mapping.

Parameters:

Name Type Description Default
input_data DataFrame

The input DataFrame.

required
column_mapping dict[str, str]

Dictionary mapping old column names to new column names.

required
warn_if_extra Optional[bool]

Whether to log a warning if extra columns are found in the input data.

True

Returns:

Type Description
Path

Pandas DataFrame with renamed columns.

Source code in src/arcgis_oriented_imagery/schema.py
def rename_dataframe_columns(
    input_data: pd.DataFrame,
    column_mapping: dict[str, str],
    warn_if_extra: Optional[bool] = True,
) -> Path:
    """
    Rename columns in a Pandas DataFrame based on the provided mapping.

    Args:
        input_data: The input DataFrame.
        column_mapping: Dictionary mapping old column names to new column names.
        warn_if_extra: Whether to log a warning if extra columns are found in the input data.

    Returns:
        Pandas DataFrame with renamed columns.
    """
    # see and notify if there are any extra columns in the input data
    extra_columns = [
        col for col in input_data.columns if col not in column_mapping.keys()
    ]
    if warn_if_extra and extra_columns:
        logger.warning(f"Extra columns detected: {extra_columns}")

    # get columns being renamed
    renamed_columns = {
        old: new for old, new in column_mapping.items() if old in input_data.columns
    }
    if not renamed_columns:
        logger.info("No columns to rename based on the provided mapping.")
    else:
        logger.info(f"Renaming columns: {renamed_columns}")

    # perform renaming
    renamed_data = input_data.rename(columns=renamed_columns)
    return renamed_data

validate_csv_schema(csv_path, required_columns, fail_if_extra=False)

Validate that the CSV file contains the required columns.

Parameters:

Name Type Description Default
csv_path Union[str, Path]

The path to the CSV file.

required
required_columns list[str]

A list of required column names.

required
fail_if_extra bool

Whether to fail if extra columns are found.

False

Returns:

Type Description
bool

True if the CSV schema is valid, False otherwise.

Source code in src/arcgis_oriented_imagery/schema.py
def validate_csv_schema(
    csv_path: Union[str, Path], required_columns: list[str], fail_if_extra: bool = False
) -> bool:
    """
    Validate that the CSV file contains the required columns.

    Args:
        csv_path: The path to the CSV file.
        required_columns: A list of required column names.
        fail_if_extra: Whether to fail if extra columns are found.

    Returns:
        True if the CSV schema is valid, False otherwise.
    """
    # validate the CSV schema using the csv module
    with open(csv_path, mode="r", newline="") as f:
        reader = csv.reader(f)
        csv_columns = next(reader)
        missing_columns = [col for col in required_columns if col not in csv_columns]

    if missing_columns:
        print(f"Missing columns in CSV: {missing_columns}")
        return False

    if fail_if_extra:
        extra_columns = [col for col in csv_columns if col not in required_columns]
        if extra_columns:
            logger.warning(f"Extra columns in CSV: {extra_columns}")

    return True