Skip to content

API

Module h3_od.config

h3_od.config

Configuration loader for the project.

Reads settings from YAML configuration files in the config/ directory using a singleton pattern so the files are parsed once and reused across modules.

The YAML config supports environment-specific sections defined under the environments key in config.yml. Add, rename, or remove environments by editing that YAML block — no Python changes required. Change the :pydata:ENVIRONMENT constant below — or set the PROJECT_ENV environment variable — to select the active environment.

Usage::

from h3_od.config import config, secrets, ENVIRONMENT

# dot-notation access
log_level = config.logging.level

# dict-style access
input_path = config["data"]["aoi_polygon"]

# secrets (loaded from config/secrets.yml)
gis_url = secrets.esri.gis_url

# check current environment
print(f"Running in {ENVIRONMENT} mode")

ENVIRONMENT = os.environ.get('PROJECT_ENV', 'dev') module-attribute

ConfigNode

Immutable, attribute-accessible wrapper around nested dictionaries.

Supports both dot-notation (cfg.logging.level) and dict-style (cfg["logging"]["level"]) access for convenience.

Source code in src/h3_od/config.py
class ConfigNode:
    """Immutable, attribute-accessible wrapper around nested dictionaries.

    Supports both dot-notation (``cfg.logging.level``) and dict-style
    (``cfg["logging"]["level"]``) access for convenience.
    """

    def __init__(self, data: dict[str, Any] | None = None) -> None:
        data = data or {}
        for key, value in data.items():
            # Recursively wrap dicts and lists
            if isinstance(value, dict):
                value = ConfigNode(value)
            elif isinstance(value, list):
                value = [ConfigNode(v) if isinstance(v, dict) else v for v in value]
            object.__setattr__(self, key, value)

    # dict-style access -------------------------------------------------------
    def __getitem__(self, key: str) -> Any:
        try:
            return getattr(self, key)
        except AttributeError:
            raise KeyError(key)

    def __contains__(self, key: str) -> bool:
        return key in self.__dict__

    def __iter__(self) -> Iterator[str]:
        return iter(self.__dict__)

    # convenience --------------------------------------------------------------
    def get(self, key: str, default: Any = None) -> Any:
        """Return the value for *key* if present, else *default*."""
        return self.__dict__.get(key, default)

    def to_dict(self) -> dict[str, Any]:
        """Recursively convert back to a plain dictionary."""
        out: dict[str, Any] = {}
        for key, value in self.__dict__.items():
            if isinstance(value, ConfigNode):
                out[key] = value.to_dict()
            elif isinstance(value, list):
                out[key] = [
                    v.to_dict() if isinstance(v, ConfigNode) else v for v in value
                ]
            else:
                out[key] = value
        return out

    def __getattr__(self, key: str) -> Any:
        # Fallback for missing attributes
        raise AttributeError(f"ConfigNode has no attribute '{key}'")

    def __repr__(self) -> str:
        return f"ConfigNode({self.__dict__})"

get(key, default=None)

Return the value for key if present, else default.

Source code in src/h3_od/config.py
def get(self, key: str, default: Any = None) -> Any:
    """Return the value for *key* if present, else *default*."""
    return self.__dict__.get(key, default)

to_dict()

Recursively convert back to a plain dictionary.

Source code in src/h3_od/config.py
def to_dict(self) -> dict[str, Any]:
    """Recursively convert back to a plain dictionary."""
    out: dict[str, Any] = {}
    for key, value in self.__dict__.items():
        if isinstance(value, ConfigNode):
            out[key] = value.to_dict()
        elif isinstance(value, list):
            out[key] = [
                v.to_dict() if isinstance(v, ConfigNode) else v for v in value
            ]
        else:
            out[key] = value
    return out

get_available_environments(config_path=None)

Return the environment names defined in config.yml.

Parameters:

Name Type Description Default
config_path Path | str | None

Explicit path to a YAML file. Defaults to config/config.yml.

None

Returns:

Type Description
list[str]

list[str]: Sorted list of environment keys found under the

list[str]

environments section (e.g. ``["dev", "olympia_drive",

list[str]

"olympia_walk"]``).

Source code in src/h3_od/config.py
def get_available_environments(
    config_path: Path | str | None = None,
) -> list[str]:
    """Return the environment names defined in ``config.yml``.

    Args:
        config_path: Explicit path to a YAML file.  Defaults to
            ``config/config.yml``.

    Returns:
        list[str]: Sorted list of environment keys found under the
        ``environments`` section (e.g. ``["dev", "olympia_drive",
        "olympia_walk"]``).
    """
    path = Path(config_path) if config_path else CONFIG_DIR / _CONFIG_FILE
    raw = _load_yaml(path)
    return sorted(raw.get("environments", {}).keys())

load_config(config_path=None, environment=None)

Load the main project configuration for a given environment.

Top-level keys (e.g. project) are always loaded. Then the environment-specific section (environments.<env>) is deep-merged on top, so environment values override any shared defaults.

Available environments are introspected from the environments key in config.yml — add or remove sections there to define your own.

Parameters:

Name Type Description Default
config_path Path | str | None

Explicit path to a YAML file. Defaults to config/config.yml relative to the project root.

None
environment str | None

One of the keys under environments in config.yml. Defaults to the module-level :pydata:ENVIRONMENT constant.

None

Returns:

Name Type Description
ConfigNode ConfigNode

A recursively accessible configuration object.

Raises:

Type Description
ValueError

If the requested environment is not defined in config.yml.

Source code in src/h3_od/config.py
def load_config(
    config_path: Path | str | None = None,
    environment: str | None = None,
) -> ConfigNode:
    """Load the main project configuration for a given environment.

    Top-level keys (e.g. ``project``) are always loaded.  Then the
    environment-specific section (``environments.<env>``) is deep-merged on
    top, so environment values override any shared defaults.

    Available environments are introspected from the ``environments`` key in
    ``config.yml`` — add or remove sections there to define your own.

    Args:
        config_path: Explicit path to a YAML file.  Defaults to
            ``config/config.yml`` relative to the project root.
        environment: One of the keys under ``environments`` in
            ``config.yml``.  Defaults to the module-level
            :pydata:`ENVIRONMENT` constant.

    Returns:
        ConfigNode: A recursively accessible configuration object.

    Raises:
        ValueError: If the requested environment is not defined in
            ``config.yml``.
    """
    env = environment or ENVIRONMENT

    path = Path(config_path) if config_path else CONFIG_DIR / _CONFIG_FILE
    raw = _load_yaml(path)

    # pull out the environments block and the active env section
    environments = raw.pop("environments", {})

    if env not in environments:
        available = ", ".join(sorted(environments.keys())) or "(none)"
        raise ValueError(
            f"Invalid environment '{env}'. "
            f"Available environments in config.yml: {available}"
        )

    env_settings = environments[env]

    # deep-merge environment-specific settings onto the shared base
    merged = _deep_merge(raw, env_settings)
    logger.debug(
        f"DEBUG merged config keys: {list(merged.keys())}"
    )  # Diagnostic output
    return _wrap_config(merged)

load_secrets(secrets_path=None)

Load project secrets.

Parameters:

Name Type Description Default
secrets_path Path | str | None

Explicit path to a YAML file. Defaults to config/secrets.yml relative to the project root.

None

Returns:

Name Type Description
ConfigNode ConfigNode

A recursively accessible secrets object.

Raises:

Type Description
FileNotFoundError

If the secrets file does not exist. Copy config/secrets_template.yml to config/secrets.yml and fill in your values.

Source code in src/h3_od/config.py
def load_secrets(
    secrets_path: Path | str | None = None,
) -> ConfigNode:
    """Load project secrets.

    Args:
        secrets_path: Explicit path to a YAML file.  Defaults to
            ``config/secrets.yml`` relative to the project root.

    Returns:
        ConfigNode: A recursively accessible secrets object.

    Raises:
        FileNotFoundError: If the secrets file does not exist. Copy
            ``config/secrets_template.yml`` to ``config/secrets.yml`` and
            fill in your values.
    """
    path = Path(secrets_path) if secrets_path else CONFIG_DIR / _SECRETS_FILE
    return ConfigNode(_load_yaml(path))

Module h3_od.proximity

Proximity streamlines the process of calculating distance metrics using Esri Network Analyst.

get_aoi_h3_origin_destination_distance_parquet(area_of_interest, h3_resolution, parquet_path, network_dataset=None, travel_mode='Walking Distance', max_distance=5.0, search_distance=1.0, origin_batch_size=250, output_batch_size=250, append=True)

Create an origin-destination matrix and save it to parquet.

Parameters:

Name Type Description Default
area_of_interest Union[str, Path, Geometry, List[Geometry]]

Feature Clas or Geometry object describing the area of interest to generate an origin-destination matrix for using H3 indices.

required
h3_resolution int

H3 resolution to use when generating an origin-destination matrix.

required
parquet_path Union[str, Path]

Path where the origin-destination table will be saved as Parquet.

required
network_dataset Optional[Path]

Optional path to network dataset to use.

None
travel_mode Optional[str]

Travel mode to use with the network dataset. Default is Walking Distance.

'Walking Distance'
max_distance Optional[float]

Maximum distance (in miles) to search from the origin to the destinations. Default is 5.0.

5.0
search_distance Optional[float]

Distance to search from the origin or destination locations to find a routable edge. Default is 1.0.

1.0
origin_batch_size Optional[int]

Number of origins to look up per batch. Default is 250.

250
output_batch_size Optional[int]

Number of origin locations to output from a solve at a time. This frequently is the bottleneck causing memory overruns, so if troubleshooting, try reducing this size first. The default is 250.

250
append Optional[bool]

Whether to append or replace existing results. The default is True.

True

.. note::

If encountering memory errors during solves, try reducing the ``output_batch_size``. This reduces the number
of records saved to memory before being written to disk. If still encountering issues, also try reducing the
``origin_batch_size``. This is the number of origins submitted to each solve operation.

Returns:

Type Description
Path

Path to where Parquet dataset is saved.

Source code in src/h3_od/proximity.py
def get_aoi_h3_origin_destination_distance_parquet(
    area_of_interest: Union[str, Path, arcpy.Geometry, List[arcpy.Geometry]],
    h3_resolution: int,
    parquet_path: Union[str, Path],
    network_dataset: Optional[Path] = None,
    travel_mode: Optional[str] = "Walking Distance",
    max_distance: Optional[float] = 5.0,
    search_distance: Optional[float] = 1.0,
    origin_batch_size: Optional[int] = 250,
    output_batch_size: Optional[int] = 250,
    append: Optional[bool] = True,
) -> Path:
    """
    Create an origin-destination matrix and save it to parquet.

    Args:
        area_of_interest: Feature Clas or Geometry object describing the area of interest to generate
            an origin-destination matrix for using H3 indices.
        h3_resolution: H3 resolution to use when generating an origin-destination matrix.
        parquet_path: Path where the origin-destination table will be saved as Parquet.
        network_dataset: Optional path to network dataset to use.
        travel_mode: Travel mode to use with the network dataset. Default is ``Walking Distance``.
        max_distance: Maximum distance (in miles) to search from the origin to the destinations. Default is ``5.0``.
        search_distance: Distance to search from the origin or destination locations to find a routable edge.
            Default is `1.0`.
        origin_batch_size: Number of origins to look up per batch. Default is ``250``.
        output_batch_size: Number of origin locations to output from a solve at a time. This frequently is the
            bottleneck causing memory overruns, so if troubleshooting, try reducing this size first. The default is
            ``250``.
        append: Whether to append or replace existing results. The default is ``True``.

    .. note::

        If encountering memory errors during solves, try reducing the ``output_batch_size``. This reduces the number
        of records saved to memory before being written to disk. If still encountering issues, also try reducing the
        ``origin_batch_size``. This is the number of origins submitted to each solve operation.

    Returns:
        Path to where Parquet dataset is saved.
    """
    # if the AOI is described with a feature class, standardize the path to a string
    if isinstance(area_of_interest, Path):
        area_of_interest = str(area_of_interest)

    # if a path to a feature class, get an iterable of geometries to work with
    if isinstance(area_of_interest, str):
        # make sure multiple runs do not cause problems
        arcpy.env.overwriteOutput = True

        # ensure the area of interest is NOT multipart
        area_of_interest = arcpy.management.MultipartToSinglepart(
            area_of_interest, "memory/aoi"
        )

        # get a list of geometries
        area_of_interest = [
            geom
            for geom in [
                r[0] for r in arcpy.da.SearchCursor(area_of_interest, "SHAPE@")
            ]
        ]

    # if just a single geometry, make into a list
    if not isinstance(area_of_interest, Iterable):
        area_of_interest = [area_of_interest]

    logger.debug("Getting H3 origin indices for the area of interest.")

    # iterate the geometries getting nested iterable (generator) of h3 indices
    h3_idx_gen = (
        list(h3_arcpy.get_h3_indices_for_esri_polygon(geom, resolution=h3_resolution))
        for geom in area_of_interest
    )

    # iterate the generators into single iterable
    h3_origin_tpl = tuple(itertools.chain(*h3_idx_gen))

    logger.info(
        f"{len(h3_origin_tpl):,} origins H3 indices retrieved for the area of interest."
    )

    # solve the batch and save the incremental result
    pqt_pth = get_origin_destination_parquet(
        origin_h3_indices=h3_origin_tpl,
        parquet_path=parquet_path,
        network_dataset=network_dataset,
        travel_mode=travel_mode,
        max_distance=max_distance,
        search_distance=search_distance,
        origin_batch_size=origin_batch_size,
        output_batch_size=output_batch_size,
        append=append,
    )

    return pqt_pth

get_h3_neighbors(origin_destination_dataset, h3_origin, distance=3.75, warn_on_fail=False)

Get neighbor H3 indices with distance from an origin H3 index.

Parameters:

Name Type Description Default
origin_destination_dataset Union[Dataset, str, Path]

Origin-destination PyArrow dataset or path to Parquet dataset.

required
h3_origin str

Origin H3 index.

required
distance float

Distance around origin to search for. Default is 3.75.

3.75
warn_on_fail bool

Whether to warn if no results found. Default is False.

False

Returns:

Type Description
DataFrame

Pandas dataframe with destination indices and distance.

Source code in src/h3_od/proximity.py
def get_h3_neighbors(
    origin_destination_dataset: Union[ds.Dataset, str, Path],
    h3_origin: str,
    distance: float = 3.75,
    warn_on_fail: bool = False,
) -> pd.DataFrame:
    """
    Get neighbor H3 indices with distance from an origin H3 index.

    Args:
        origin_destination_dataset: Origin-destination PyArrow dataset or path to Parquet dataset.
        h3_origin: Origin H3 index.
        distance: Distance around origin to search for. Default is ``3.75``.
        warn_on_fail: Whether to warn if no results found. Default is ``False``.

    Returns:
        Pandas dataframe with destination indices and distance.
    """
    # handle various ways origin-destination dataset can be provided, and ensure is PyArrow Dataset
    if not isinstance(origin_destination_dataset, ds.Dataset):
        origin_destination_dataset = ds.dataset(
            origin_destination_dataset, format="parquet"
        )

    # create the filter for retrieving data
    fltr = (pc.field("origin_id") == h3_origin) & (
        pc.field("distance_miles") <= distance
    )

    # read in the table with the filter
    od_tbl = origin_destination_dataset.to_table(filter=fltr)

    # handle if no matches found
    if od_tbl.num_rows == 0:
        # provide status message if warning user
        if warn_on_fail:
            warn(
                f"Cannot find destinations for origin {h3_origin}. This potentially is due to the origin being "
                f"unroutable, or searching using the wrong H3 resolution."
            )

        od_df = None

    else:
        od_df = od_tbl.to_pandas()

    return od_df

get_h3_origin_destination_distance(origin_destination_dataset, h3_origin, h3_destination=None, add_geometry=False, warn_on_fail=True)

Given an origin and destination H3 index, get the distance between the indices.

Parameters:

Name Type Description Default
origin_destination_dataset Union[str, Path]

Origin-destination PyArrow dataset or path to Parquet dataset.

required
h3_origin str

Origin H3 index.

required
h3_destination Optional[str]

Destination H3 index. Optional. If not provided, all potential H3 destinations will be returned.

None
add_geometry Optional[bool]

Whether to add a destination indices' geometry. This will return a Spatially Enabled Dataframe.

False
warn_on_fail Optional[bool]

Whether to warn if no results found.

True

Returns:

Type Description
DataFrame

Distance and travel time between H3 indices.

Source code in src/h3_od/proximity.py
def get_h3_origin_destination_distance(
    origin_destination_dataset: Union[str, Path],
    h3_origin: str,
    h3_destination: Optional[str] = None,
    add_geometry: Optional[bool] = False,
    warn_on_fail: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Given an origin and destination H3 index, get the distance between the indices.

    Args:
        origin_destination_dataset: Origin-destination PyArrow dataset or path to Parquet dataset.
        h3_origin: Origin H3 index.
        h3_destination: Destination H3 index. Optional. If not provided, all potential H3 destinations will be returned.
        add_geometry: Whether to add a destination indices' geometry. This will return a Spatially Enabled Dataframe.
        warn_on_fail: Whether to warn if no results found.

    Returns:
        Distance and travel time between H3 indices.
    """
    # get the resolution for the origin index
    origin_res = h3.get_resolution(h3_origin)

    # ensure the resolution matches for the origin and destination indices if the destination is provided
    if h3_destination is not None:
        dest_res = h3.get_resolution(h3_destination)
        if origin_res != dest_res:
            raise ValueError(
                f"Origin H3 resolution {origin_res} does not match destination H3 resolution {dest_res}."
            )

    # report the H3 resolution being used
    logger.debug(f"Using H3 resolution {origin_res} for origin-destination lookup.")

    # handle various ways origin-destination dataset can be provided, and ensure is PyArrow Dataset
    if not isinstance(origin_destination_dataset, ds.Dataset):
        origin_destination_dataset = ds.dataset(
            origin_destination_dataset,
            format="parquet",
            partitioning="hive",
        )

    # start building the filter to find destinations
    fltr = pc.field("origin_id") == h3_origin

    # only add the destination filter if provided
    if h3_destination is not None:
        dest_fltr = pc.field("destination_id") == h3_destination
        fltr = fltr & dest_fltr

    # read in the table with the filter
    od_tbl = origin_destination_dataset.filter(fltr).to_table()

    # handle contingency of not finding a match, but if found, provide the distance
    if od_tbl.num_rows == 0 and warn_on_fail:
        warn(
            f"Cannot route between {h3_origin} and {h3_destination}. This may be due to the origin, destination "
            f"or both being un-routable, simply too far apart, or possibly not using the correct resolution "
            f"indices."
        )

    # convert the table to a data frame and return at least the correct object even if there aren't any rows
    od_df = od_tbl.to_pandas()

    # enforce consistent schema
    od_df = od_df[["origin_id", "destination_id", "distance_miles", "time"]]

    # if desired to return spatially enabled dataframe
    if add_geometry:
        od_df["geometry"] = od_df["destination_id"].apply(
            get_arcgis_polygon_for_h3_index
        )

        # set the geometry so valid SeDF
        od_df.spatial.set_geometry("geometry", inplace=True)

    return od_df

get_h3_origin_destination_distance_using_coordinates(origin_destination_dataset, origin_coordinates, destination_coordinates, h3_resolution=10, warn_on_fail=True)

Given origin and destination coordinates, get the distance between using an H3 lookup.

Parameters:

Name Type Description Default
origin_destination_dataset Union[Dataset, str, Path]

Origin-destination PyArrow dataset or path to Parquet dataset.

required
origin_coordinates Union[Tuple[float], List[float]]

Origin coordinates in WGS84.

required
destination_coordinates Union[Tuple[float], List[float]]

Destination coordinates in WGS84.

required
h3_resolution int

H3 resolution origin-destination dataset is using.

10
warn_on_fail bool

Whether to warn if no results found.

True

Returns:

Type Description
DataFrame

Distance between origin and destination locations.

Source code in src/h3_od/proximity.py
def get_h3_origin_destination_distance_using_coordinates(
    origin_destination_dataset: Union[ds.Dataset, str, Path],
    origin_coordinates: Union[Tuple[float], List[float]],
    destination_coordinates: Union[Tuple[float], List[float]],
    h3_resolution: int = 10,
    warn_on_fail: bool = True,
) -> pd.DataFrame:
    """
    Given origin and destination coordinates, get the distance between using an H3 lookup.

    Args:
        origin_destination_dataset: Origin-destination PyArrow dataset or path to Parquet dataset.
        origin_coordinates: Origin coordinates in WGS84.
        destination_coordinates: Destination coordinates in WGS84.
        h3_resolution: H3 resolution origin-destination dataset is using.
        warn_on_fail: Whether to warn if no results found.

    Returns:
        Distance between origin and destination locations.
    """
    # get the indices for the origin and destination locations
    h3_origin = h3.latlng_to_cell(
        origin_coordinates[1], origin_coordinates[0], h3_resolution
    )
    h3_dest = h3.latlng_to_cell(
        destination_coordinates[1], destination_coordinates[0], h3_resolution
    )

    # get the distance between the locations
    dist = get_h3_origin_destination_distance(
        origin_destination_dataset, h3_origin, h3_dest, warn_on_fail
    )

    return dist

get_nearest_origin_destination_neighbor(origin_destination_dataset, origin_id, distance=0.5, warn_on_fail=False)

Get nearest neighbor unique identifier to an origin identifier.

Parameters:

Name Type Description Default
origin_destination_dataset Union[Dataset, str, Path]

Origin-destination PyArrow dataset or path to Parquet dataset.

required
origin_id int

Unique identifier for origin.

required
distance float

Distance around origin to search for. Default is 0.5.

0.5
warn_on_fail bool

Whether to warn if no results found. Default is False.

False

Returns:

Type Description
Union[str, int]

Unique identifier for the destination.

Source code in src/h3_od/proximity.py
def get_nearest_origin_destination_neighbor(
    origin_destination_dataset: Union[ds.Dataset, str, Path],
    origin_id: int,
    distance: float = 0.5,
    warn_on_fail: bool = False,
) -> Union[str, int]:
    """
    Get nearest neighbor unique identifier to an origin identifier.

    Args:
        origin_destination_dataset: Origin-destination PyArrow dataset or path to Parquet dataset.
        origin_id: Unique identifier for origin.
        distance: Distance around origin to search for. Default is ``0.5``.
        warn_on_fail: Whether to warn if no results found. Default is ``False``.

    Returns:
        Unique identifier for the destination.
    """
    # read in the table with the filter
    od_df = get_origin_destination_neighbors(
        origin_destination_dataset,
        origin_id=origin_id,
        distance=distance,
        warn_on_fail=warn_on_fail,
    )

    # based on the minimum distance, not locations potentially were returned
    if len(od_df.index) == 0:
        # no destination id exists
        dest_id = None

    else:
        # remove the origin to self distance...which is zero
        od_df = od_df.loc[od_df["origin_id"] != od_df["destination_id"]]

        # get the minimum distance
        min_dist = od_df["distance_miles"].min()

        # use the minimum distance to get the destination id of the nearest
        dest_id = od_df[od_df["distance_miles"] == min_dist].iloc[0]["destination_id"]

        # ensure the value is an integer (typically it is a float)
        if isinstance(dest_id, float):
            dest_id = int(dest_id)

    return dest_id

get_network_dataset_layer(network_dataset=None)

Get a network dataset layer, optionally using default.

Parameters:

Name Type Description Default
network_dataset Optional[Path]

Optional path to network dataset being used.

None

Note

If not specified, uses network solver set in Environment settings.

Returns:

Type Description
Layer

NAX Layer.

Source code in src/h3_od/proximity.py
def get_network_dataset_layer(
    network_dataset: Optional[Path] = None,
) -> arcpy._mp.Layer:
    """
    Get a network dataset layer, optionally using default.

    Args:
        network_dataset: Optional path to network dataset being used.

    !!! note

        If not specified, uses network solver set in Environment settings.

    Returns:
        NAX Layer.
    """
    # get the path to the country network dataset if it does not exist
    if network_dataset is None:
        network_dataset = Country(COUNTRY_ISO2).properties.network_path

    # ensure is string for GP
    if isinstance(network_dataset, Path):
        network_dataset = str(network_dataset)

    # check to ensure network dataset exists
    if not arcpy.Exists(network_dataset):
        raise FileNotFoundError(
            f"Cannot locate or access network dataset at {network_dataset}."
        )

    # create a network dataset layer
    nds_lyr = arcpy.nax.MakeNetworkDatasetLayer(network_dataset)[0]

    return nds_lyr

get_network_travel_modes(network_dataset=None)

Get the travel modes, which can be used when solving for a network.

Note

If not specified, uses network solver set in Environment settings.

Parameters:

Name Type Description Default
network_dataset Optional[Path]

Optional path to network dataset being used.

None

Returns:

Type Description
List[str]

List of travel mode name strings available for the network dataset.

Source code in src/h3_od/proximity.py
def get_network_travel_modes(
    network_dataset: Optional[Path] = None,
) -> List[str]:
    """
    Get the travel modes, which can be used when solving for a network.

    !!! note

        If not specified, uses network solver set in Environment settings.

    Args:
        network_dataset: Optional path to network dataset being used.

    Returns:
        List of travel mode name strings available for the network dataset.
    """
    # get the network dataset layer
    nds_lyr = get_network_dataset_layer(network_dataset)

    # retrieve the network dataset travel modes
    nd_travel_modes = arcpy.nax.GetTravelModes(nds_lyr)

    # get the travel modes as a list
    nd_travel_modes = list(nd_travel_modes.keys())

    return nd_travel_modes

get_origin_destination_cost_matrix_solver(network_dataset=None, travel_mode='Walking Distance', max_distance=5.0, search_distance=0.25)

Create and configure an ArcPy NAX Origin-Destination Cost Matrix solver.

Parameters:

Name Type Description Default
network_dataset Optional[Path]

Optional path to network dataset to use.

None
travel_mode Optional[str]

Travel mode to use with the network dataset. Default is Walking Distance.

'Walking Distance'
max_distance Optional[float]

Maximum distance (in miles) to search from the origin to the destinations. Default is 5.0.

5.0
search_distance Optional[float]

Distance to search from the origin or destination locations to find a routable edge. Default is 0.25.

0.25

Returns:

Type Description
OriginDestinationCostMatrix

ArcPy NAX Origin-Destination Matrix Solver.

Source code in src/h3_od/proximity.py
def get_origin_destination_cost_matrix_solver(
    network_dataset: Optional[Path] = None,
    travel_mode: Optional[str] = "Walking Distance",
    max_distance: Optional[float] = 5.0,
    search_distance: Optional[float] = 0.25,
) -> arcpy._na._odcms.OriginDestinationCostMatrix:
    """
    Create and configure an ArcPy NAX Origin-Destination Cost Matrix solver.

    Args:
        network_dataset: Optional path to network dataset to use.
        travel_mode: Travel mode to use with the network dataset. Default is ``Walking Distance``.
        max_distance: Maximum distance (in miles) to search from the origin to the destinations. Default is `5.0`.
        search_distance: Distance to search from the origin or destination locations to find a routable edge.
            Default is `0.25`.

    Returns:
        ArcPy NAX Origin-Destination Matrix Solver.
    """
    # create a network dataset layer
    nds_lyr = arcpy.nax.MakeNetworkDatasetLayer(str(network_dataset))[0]
    logger.debug("Created network dataset layer.")

    # instantiate origin-destination cost matrix solver object
    odcm = arcpy.nax.OriginDestinationCostMatrix(nds_lyr)
    logger.debug("Created origin-destination cost matrix object.")

    # set the desired travel mode for analysis
    nd_travel_modes = arcpy.nax.GetTravelModes(nds_lyr)
    odcm.travelMode = nd_travel_modes[travel_mode]
    logger.info(f'Origin-destination cost matrix travel mode is "{travel_mode}"')

    # use miles for the distance units
    odcm.distanceUnits = arcpy.nax.DistanceUnits.Miles
    logger.debug("Origin-destination cost matrix distance units set to miles.")

    # maximum distance to solve for based on the distance units above
    odcm.defaultImpedanceCutoff = max_distance
    logger.info(
        f"Origin-destination cost matrix maximum solve distance (defaultImpedanceCutoff) set to "
        f"{max_distance} miles."
    )

    # use miles for the search distance - how far to "snap" points to nearest routable network edge
    odcm.searchToleranceUnits = arcpy.nax.DistanceUnits.Miles
    logger.debug(
        "Origin-destination cost matrix search tolerance (snap distance) units set to miles."
    )

    # set the search distance
    odcm.searchTolerance = search_distance
    logger.info(
        f"Origin-destination cost matrix search tolerance (snap distance) set to {search_distance} miles."
    )

    # don't need geometry, just the origin, destination and output
    odcm.lineShapeType = arcpy.nax.LineShapeType.NoLine
    logger.debug("Origin-destination cost matrix set to not return line geometry.")

    return odcm

get_origin_destination_distance_parquet_from_arcgis_features(h3_features, parquet_path, h3_index_column='GRID_ID', network_dataset=None, travel_mode='Walking Distance', max_distance=5.0, search_distance=1.0)

Create an origin-destination matrix from ArcGIS features, a Feature Class with H3 indices, and save it to parquet.

Parameters:

Name Type Description Default
h3_features Union[str, Path]

Path to H3 feature class created for area of interest using ArcGIS Pro.

required
parquet_path Union[str, Path]

Path where the origin-destination table will be saved as Parquet.

required
h3_index_column str

Column in H3 feature class containing the H3 indices. Default is GRID_ID.

'GRID_ID'
network_dataset Optional[Path]

Optional path to network dataset to use.

None
travel_mode str

Travel mode to use with the network dataset. Default is Walking Distance.

'Walking Distance'
max_distance float

Maximum distance (in miles) to search from the origin to the destinations. Default is 5.0.

5.0
search_distance float

Distance to search from the origin or destination locations to find a routable edge. Default is 1.0.

1.0

Returns:

Type Description
Path

Path to where Parquet dataset is saved.

Source code in src/h3_od/proximity.py
def get_origin_destination_distance_parquet_from_arcgis_features(
    h3_features: Union[str, Path],
    parquet_path: Union[str, Path],
    h3_index_column: str = "GRID_ID",
    network_dataset: Optional[Path] = None,
    travel_mode: str = "Walking Distance",
    max_distance: float = 5.0,
    search_distance: float = 1.0,
) -> Path:
    """
    Create an origin-destination matrix from ArcGIS features, a Feature Class with H3 indices, and save it to parquet.

    Args:
        h3_features: Path to H3 feature class created for area of interest using ArcGIS Pro.
        parquet_path: Path where the origin-destination table will be saved as Parquet.
        h3_index_column: Column in H3 feature class containing the H3 indices. Default is ``GRID_ID``.
        network_dataset: Optional path to network dataset to use.
        travel_mode: Travel mode to use with the network dataset. Default is ``Walking Distance``.
        max_distance: Maximum distance (in miles) to search from the origin to the destinations. Default is `5.0`.
        search_distance: Distance to search from the origin or destination locations to find a routable edge.
            Default is `1.0`.

    Returns:
        Path to where Parquet dataset is saved.
    """
    # make the path a string for geoprocessing tools
    if isinstance(h3_features, Path):
        h3_features = str(h3_features)

    # get the list of unique H3 indices from the feature class using a Python set
    h3_lst = list(
        set(r[0] for r in arcpy.da.SearchCursor(h3_features, h3_index_column))
    )

    parquet_path = get_origin_destination_parquet(
        origin_h3_indices=h3_lst,
        destination_h3_indices=h3_lst,
        network_dataset=network_dataset,
        parquet_path=parquet_path,
        travel_mode=travel_mode,
        max_distance=max_distance,
        search_distance=search_distance,
    )

    return parquet_path

get_origin_destination_neighbors(origin_destination_dataset, origin_id, distance=0.5, warn_on_fail=False)

Get neighbor unique identifiers surrounding an origin identifier.

Parameters:

Name Type Description Default
origin_destination_dataset Union[Dataset, str, Path]

Origin-destination PyArrow dataset or path to Parquet dataset.

required
origin_id int

Unique identifier for origin identifier.

required
distance float

Distance around origin to search for. Default is 0.5.

0.5
warn_on_fail bool

Whether to warn if no results found. Default is False.

False

Returns:

Type Description
DataFrame

Pandas dataframe with destination indices and distance.

Source code in src/h3_od/proximity.py
def get_origin_destination_neighbors(
    origin_destination_dataset: Union[ds.Dataset, str, Path],
    origin_id: int,
    distance: float = 0.5,
    warn_on_fail: bool = False,
) -> pd.DataFrame:
    """
    Get neighbor unique identifiers surrounding an origin identifier.

    Args:
        origin_destination_dataset: Origin-destination PyArrow dataset or path to Parquet dataset.
        origin_id: Unique identifier for origin identifier.
        distance: Distance around origin to search for. Default is ``0.5``.
        warn_on_fail: Whether to warn if no results found. Default is ``False``.

    Returns:
        Pandas dataframe with destination indices and distance.
    """
    # handle various ways origin-destination dataset can be provided, and ensure is PyArrow Dataset
    if not isinstance(origin_destination_dataset, ds.Dataset):
        origin_destination_dataset = ds.dataset(
            origin_destination_dataset,
            format="parquet",
            partitioning="hive",
        )

    # create the filter for retrieving data
    fltr = (pc.field("origin_id") == origin_id) & (
        pc.field("distance_miles") <= distance
    )

    # read in the table with the filter
    od_tbl = origin_destination_dataset.filter(fltr).to_table()

    # handle if no matches found
    if od_tbl.num_rows == 0:
        # provide status message if warning user
        if warn_on_fail:
            warn(
                f'Cannot find destinations for OriginID, "{origin_id}". This likely is due to the origin being '
                f"unroutable."
            )

    # convert to pandas data frame
    od_df = od_tbl.to_pandas()

    # enforce consistent schema
    od_df = od_df[["origin_id", "destination_id", "distance_miles", "time"]]

    return od_df

get_origin_destination_oid_col(input_features, id_column)

Determine the unique identifier column for origin or destination features.

Resolves the appropriate identifier column name depending on the input type. For feature classes and layers, falls back to the OID field when no column is explicitly provided. For DataFrames the first column is used, and for other iterables the default "oid" is returned.

Parameters:

Name Type Description Default
input_features Union[Layer, str, Path, Iterable, DataFrame]

The origin or destination features as a layer, feature class path, iterable, or Spatially Enabled DataFrame.

required
id_column str

Explicit column name to use as the unique identifier. When None, a sensible default is inferred from the input type.

required

Returns:

Type Description
str

Name of the unique identifier column for the given input features.

Raises:

Type Description
ValueError

If the explicitly provided id_column does not exist in the feature class schema.

Source code in src/h3_od/proximity.py
def get_origin_destination_oid_col(
    input_features: Union[arcpy._mp.Layer, str, Path, Iterable, pd.DataFrame],
    id_column: str,
) -> str:
    """
    Determine the unique identifier column for origin or destination features.

    Resolves the appropriate identifier column name depending on the input type.
    For feature classes and layers, falls back to the OID field when no column is
    explicitly provided.  For DataFrames the first column is used, and for other
    iterables the default ``"oid"`` is returned.

    Args:
        input_features: The origin or destination features as a layer, feature
            class path, iterable, or Spatially Enabled DataFrame.
        id_column: Explicit column name to use as the unique identifier.  When
            ``None``, a sensible default is inferred from the input type.

    Returns:
        Name of the unique identifier column for the given input features.

    Raises:
        ValueError: If the explicitly provided ``id_column`` does not exist in
            the feature class schema.
    """
    # if working with a feature class
    if isinstance(input_features, (arcpy._mp.Layer, str, Path)):
        # get destination unique id column if not explicitly provided for feature classes
        if id_column is None:
            id_column = arcpy.Describe(str(input_features)).OIDFieldName

        # otherwise, make sure provided column exists
        else:
            if id_column not in [f.name for f in arcpy.ListFields(str(input_features))]:
                raise ValueError(
                    f'The provided destination_id_column "{id_column}" does not appear to be in the '
                    f"destination_features schema."
                )

    # if working with a data frame, it will be the first column
    elif isinstance(input_features, pd.DataFrame):
        id_column = input_features.columns[0]

    # otherwise, just use the oid
    else:
        id_column = "oid"

    return id_column

get_origin_destination_parquet(origin_h3_indices, parquet_path, destination_h3_indices=None, network_dataset=None, travel_mode='Walking Distance', max_distance=5.0, search_distance=0.25, origin_batch_size=250, output_batch_size=250, append=True)

Create an origin-destination matrix between two lists of H3 indices and save to parquet.

Parameters:

Name Type Description Default
origin_h3_indices Union[list, tuple]

Origin H3 indices, the starting locations, for the origin-destination solve.

required
parquet_path Union[str, Path]

Path where the origin-destination table will be saved as Parquet.

required
destination_h3_indices Optional[Iterable]

Destination H3 indices, the ending locations, for the origin-destination solve.

None
network_dataset Optional[Path]

Optional path to network dataset to use.

None
travel_mode Optional[str]

Travel mode to use with the network dataset. Default is Walking Distance.

'Walking Distance'
max_distance Optional[float]

Maximum distance (in miles) to search from the origin to the destinations. Default is 5.0.

5.0
search_distance Optional[float]

Distance to search from the origin or destination locations to find a routable edge. Default is 0.25.

0.25
origin_batch_size Optional[int]

Number of origin locations to use per origin-destination solve. If experiencing memory overruns, reduce the batch size. The default is 250.

250
output_batch_size Optional[int]

Number of origin locations to output from a solve at a time. This frequently is the bottleneck causing memory overruns, so if troubleshooting, try reducing this size first. The default is 50.

250
append Optional[bool]

Whether to append or replace existing results. The default is True.

True

Returns:

Type Description
Path

Path to where Parquet dataset is saved.

Source code in src/h3_od/proximity.py
def get_origin_destination_parquet(
    origin_h3_indices: Union[list, tuple],
    parquet_path: Union[str, Path],
    destination_h3_indices: Optional[Iterable] = None,
    network_dataset: Optional[Path] = None,
    travel_mode: Optional[str] = "Walking Distance",
    max_distance: Optional[float] = 5.0,
    search_distance: Optional[float] = 0.25,
    origin_batch_size: Optional[int] = 250,
    output_batch_size: Optional[int] = 250,
    append: Optional[bool] = True,
) -> Path:
    """
    Create an origin-destination matrix between two lists of H3 indices and save to parquet.

    Args:
        origin_h3_indices: Origin H3 indices, the starting locations, for the origin-destination solve.
        parquet_path: Path where the origin-destination table will be saved as Parquet.
        destination_h3_indices: Destination H3 indices, the ending locations, for the origin-destination solve.
        network_dataset: Optional path to network dataset to use.
        travel_mode: Travel mode to use with the network dataset. Default is ``Walking Distance``.
        max_distance: Maximum distance (in miles) to search from the origin to the destinations. Default is ``5.0``.
        search_distance: Distance to search from the origin or destination locations to find a routable edge.
            Default is `0.25`.
        origin_batch_size: Number of origin locations to use per origin-destination solve. If experiencing memory
            overruns, reduce the batch size. The default is ``250``.
        output_batch_size: Number of origin locations to output from a solve at a time. This frequently is the
            bottleneck causing memory overruns, so if troubleshooting, try reducing this size first. The default is
            ``50``.
        append: Whether to append or replace existing results. The default is ``True``.

    Returns:
        Path to where Parquet dataset is saved.
    """
    # ensure parquet_path is a Path object
    parquet_path = (
        Path(parquet_path) if not isinstance(parquet_path, Path) else parquet_path
    )

    # determine if parquet_path refers to a single file or a directory
    is_single_file = parquet_path.suffix == ".parquet" or (
        parquet_path.exists() and parquet_path.is_file()
    )

    # make sure the location to save the parquet exists (only create directory for dataset-style output)
    if not is_single_file and not parquet_path.exists():
        parquet_path.mkdir(parents=True)

    # get the resolution of the input H3 indices
    h3_resolution = h3_arcpy.get_h3_resolution(origin_h3_indices[0])

    # check if existing parquet data is available for append filtering
    has_existing_data = False
    if append and parquet_path.exists():
        if is_single_file:
            has_existing_data = parquet_path.is_file()
        else:
            has_existing_data = any(parquet_path.iterdir())

    # if appending, use pyarrow dataset filtering to find preexisting origin (and optionally destination) ids
    if has_existing_data:
        try:
            # open the existing parquet as a dataset — works for both single files and directories
            ds_kwargs = {"source": parquet_path, "format": "parquet"}
            if not is_single_file:
                ds_kwargs["partitioning"] = "hive"
            existing_ds = ds.dataset(**ds_kwargs)

            # get existing origin ids using pyarrow dataset filtering
            existing_origin_id_set = set(
                pc.unique(
                    existing_ds.to_table(columns=["origin_id"]).column("origin_id")
                ).to_pylist()
            )
            logger.debug(
                f"{len(existing_origin_id_set):,} origins already solved for in output parquet data."
            )

            original_len = len(origin_h3_indices)
            if existing_origin_id_set:
                origin_h3_indices = [
                    idx
                    for idx in origin_h3_indices
                    if idx not in existing_origin_id_set
                ]
                logger.debug(
                    f"Only have to solve for {len(origin_h3_indices):,} origins instead of {original_len:,}."
                )

            # optionally filter destination indices already present in the dataset
            if destination_h3_indices is not None:
                existing_dest_id_set = set(
                    pc.unique(
                        existing_ds.to_table(columns=["destination_id"]).column(
                            "destination_id"
                        )
                    ).to_pylist()
                )
                destination_h3_indices = [
                    idx
                    for idx in destination_h3_indices
                    if idx not in existing_dest_id_set
                ]
                logger.debug(
                    f"{len(existing_dest_id_set):,} destinations already present; "
                    f"{len(destination_h3_indices):,} new destinations remaining."
                )

        except Exception as e:
            logger.warning(
                f"Unable to read existing parquet dataset for append filtering: {e}. "
                f"Proceeding without filtering."
            )

    # get the count of input features for batching
    origin_cnt = len(origin_h3_indices)

    # batch the solve based on the input feature count
    origin_batch_cnt = math.ceil(origin_cnt / origin_batch_size)

    logger.info(
        f"The origin-destination matrix solution will require {origin_batch_cnt:,} iterations."
    )

    # create the schema to use for converting the list to a pyarrow table, required for saving to parquet
    pa_schema = pa.schema(
        [
            pa.field(
                "h3_resolution",
                pa.string() if isinstance(h3_resolution, str) else pa.int64(),
            ),
            pa.field("origin_id", pa.string()),
            pa.field("destination_id", pa.string()),
            pa.field("distance_miles", pa.float64()),
            pa.field("time", pa.float64()),
        ]
    )

    # iterate the number of times it takes to process all the input features
    for batch_idx in range(origin_batch_cnt):
        logger.info(
            f"Starting the origin-destination cost matrix batch {(batch_idx + 1):,} of {origin_batch_cnt:,}."
        )

        # create a list of the object identifiers in the input data for this batch
        start_idx = batch_idx * origin_batch_size
        end_idx = start_idx + origin_batch_size
        batch_origin_lst = origin_h3_indices[start_idx:end_idx]

        # solve the OD cost matrix for this batch
        result, _batch_dest_lst = _solve_batch(
            batch_origin_lst,
            max_distance,
            network_dataset,
            travel_mode,
            search_distance,
        )

        # export solve results to parquet
        _export_results_to_parquet(
            result,
            batch_origin_lst,
            h3_resolution,
            pa_schema,
            parquet_path,
            output_batch_size,
            batch_idx,
        )

    logger.info(
        f"Successfully created origin-destination cost matrix and saved parquet result to {parquet_path}"
    )

    return parquet_path

validate_h3_index_list(input_features)

Validate and normalize a list of H3 indices into tuples of index strings and point geometries.

Handles H3 indices provided as hexadecimal strings, integer strings, or native integers, converting them to a consistent list of (h3_index, PointGeometry) tuples suitable for use with ArcPy network analysis workflows.

Parameters:

Name Type Description Default
input_features List[Union[str, int]]

List of H3 indices as hexadecimal strings, numeric strings, or integers.

required

Returns:

Type Description
List[Tuple[str, PointGeometry]]

List of tuples where each element is an H3 index string paired with its corresponding

List[Tuple[str, PointGeometry]]

arcpy.PointGeometry in WGS 84 (WKID 4326).

Source code in src/h3_od/proximity.py
def validate_h3_index_list(
    input_features: List[Union[str, int]],
) -> List[Tuple[str, arcpy.PointGeometry]]:
    """
    Validate and normalize a list of H3 indices into tuples of index strings and point geometries.

    Handles H3 indices provided as hexadecimal strings, integer strings, or native integers,
    converting them to a consistent list of ``(h3_index, PointGeometry)`` tuples suitable for
    use with ArcPy network analysis workflows.

    Args:
        input_features: List of H3 indices as hexadecimal strings, numeric strings, or integers.

    Returns:
        List of tuples where each element is an H3 index string paired with its corresponding
        ``arcpy.PointGeometry`` in WGS 84 (WKID 4326).
    """
    # get the first item to interrogate
    for first_itm in input_features:
        # if integers, convert to hexadecimal string
        if isinstance(first_itm, int):
            input_features = [
                h3.int_to_str(h3_int_idx) for h3_int_idx in input_features
            ]

        # if integers buried in a big string, convert to hexadecimal string
        elif isinstance(first_itm, str):
            if first_itm.isdigit():
                input_features = [
                    h3.int_to_str(int(h3_int_idx)) for h3_int_idx in input_features
                ]

        break

    # iterate the input h3 indices and create a tuple of the h3 index and coordinates (admittedly reversed)
    input_features = [(h3_idx, h3.cell_to_latlng(h3_idx)) for h3_idx in input_features]

    # iterate the tuples, and replace the transposed coordinates with PointGeometry
    input_features = [
        (
            h3_idx,
            arcpy.PointGeometry(
                arcpy.Point(h3_coords[1], h3_coords[0]), spatial_reference=4326
            ),
        )
        for h3_idx, h3_coords in input_features
    ]

    return input_features

validate_origin_destination_inputs(input_features)

Validate and normalize origin or destination input features for network analysis.

Accepts a variety of input formats — H3 index lists, identifier-geometry tuples, ArcPy layers, Spatially Enabled DataFrames, or paths to feature classes — and converts them into a consistent format (layer path or feature set) that can be consumed by the ArcPy NAX origin-destination cost matrix solver.

Parameters:

Name Type Description Default
input_features Union[List[Union[int, str]], Iterable[Tuple[Union[int, str], Geometry]], Layer, DataFrame, str, Path]

Input features in one of the following forms:

  • A list of H3 indices (as int or str).
  • An iterable of (id, arcpy.Geometry) tuples.
  • An arcpy._mp.Layer reference.
  • A Spatially Enabled pd.DataFrame with Point or Polygon geometry.
  • A str or Path pointing to a feature class on disk.
required

Returns:

Type Description
Union[Layer, str, FeatureSet]

The validated input features as an arcpy._mp.Layer path string,

Union[Layer, str, FeatureSet]

layer reference, or arcpy.FeatureSet.

Raises:

Type Description
ValueError

If geometry type is not Point or Polygon, or if a DataFrame fails spatial validation.

Source code in src/h3_od/proximity.py
def validate_origin_destination_inputs(
    input_features: Union[
        List[Union[int, str]],
        Iterable[Tuple[Union[int, str], arcpy.Geometry]],
        arcpy._mp.Layer,
        pd.DataFrame,
        str,
        Path,
    ],
) -> Union[arcpy._mp.Layer, str, arcpy.FeatureSet]:
    """
    Validate and normalize origin or destination input features for network analysis.

    Accepts a variety of input formats — H3 index lists, identifier-geometry tuples,
    ArcPy layers, Spatially Enabled DataFrames, or paths to feature classes — and
    converts them into a consistent format (layer path or feature set) that can be
    consumed by the ArcPy NAX origin-destination cost matrix solver.

    Args:
        input_features: Input features in one of the following forms:

            - A list of H3 indices (as ``int`` or ``str``).
            - An iterable of ``(id, arcpy.Geometry)`` tuples.
            - An ``arcpy._mp.Layer`` reference.
            - A Spatially Enabled ``pd.DataFrame`` with Point or Polygon geometry.
            - A ``str`` or ``Path`` pointing to a feature class on disk.

    Returns:
        The validated input features as an ``arcpy._mp.Layer`` path string,
        layer reference, or ``arcpy.FeatureSet``.

    Raises:
        ValueError: If geometry type is not Point or Polygon, or if a DataFrame
            fails spatial validation.
    """
    # first check if the dataframe is spatially enabled AND point or polygon
    if isinstance(input_features, pd.DataFrame):
        if not input_features.spatial.validate():
            raise ValueError(
                "Input features' data frame does not appear to be validated. Please try "
                "df.spatial.set_geometry"
            )

        if (
            input_features.spatial.geometry_type != "Point"
            and input_features.spatial.geometry_type != "Polygon"
        ):
            raise ValueError(
                "Input features' geometry must be either Point or Polygon."
            )

    # if a list, can be just h3 indices, but also list of unique ids and geometries
    elif isinstance(input_features, Iterable):
        # get the first item to interrogate
        for first_itm in input_features:
            # if just a list of identifiers, process as H3 indices
            if isinstance(first_itm, (str, int)):
                input_features = validate_h3_index_list(input_features)

            # otherwise, make sure geometries are valid, and convert geometries to Python API geometries
            else:
                geom = first_itm[1]

                # ensure point is PointGeometry
                if isinstance(geom, arcpy.Point):
                    geom = arcpy.PointGeometry(geom)

                # ensure is correct geometry type
                if not isinstance(geom, (arcpy.PointGeometry, arcpy.Polygon)):
                    raise ValueError(
                        "Input features' geometry must be either PointGeometry or Polygon."
                    )

                # convert ArcPy geometries to Python API geometries
                input_features = [
                    (oid, arcgis.geometry.Geometry.from_arcpy(arcpy_geom))
                    for oid, arcpy_geom in input_features
                ]

            break

    # if a layer or path to data, use describe to check the geometry type
    else:
        if isinstance(input_features, Path):
            input_features = str(input_features)

        geom_typ = arcpy.Describe(input_features).shapeType
        if geom_typ != "Point" and geom_typ != "Polygon":
            raise ValueError(
                "Input features' geometry type must be either Point or Polygon."
            )

    # now, if a list, is tuples of oid and geometry...convert to spatially enabled data frame
    if isinstance(input_features, list):
        input_features = pd.DataFrame(input_features, columns=["oid", "SHAPE"])
        input_features.spatial.set_geometry("SHAPE")

    # finally, if spatially enabled data frame, convert to temporary feature class
    if isinstance(input_features, pd.DataFrame):
        # check and set the spatial reference if not already set for the features
        if input_features.spatial.sr["wkid"] is None:
            input_features.spatial.sr = arcgis.geometry.SpatialReference(4326)

        # converting SeDF to Feature Set and passing through Copy Features since much more reliable
        input_features = arcpy.management.CopyFeatures(
            arcpy.FeatureSet(input_features.spatial.to_featureset()),
            os.path.join(arcpy.env.scratchGDB, f"f_{uuid.uuid4().hex}"),
        )[0]

    return input_features

Module h3_od.distance_decay

Functions enabling calculation of distance decay, applying a S-shaped sigmoid curve to model distance decay.

get_bus_stop_distance_decay_index(distance)

Get the distance decay coefficient for a bus stop.

Parameters:

Name Type Description Default
distance Union[float, int]

Walking distance in miles to the bus stop.

required

Returns:

Type Description
float

Sigmoid distance decay index between 0.0 and 1.0 for the given bus stop distance.

Source code in src/h3_od/distance_decay.py
def get_bus_stop_distance_decay_index(distance: Union[float, int]) -> float:
    """
    Get the distance decay coefficient for a bus stop.

    Args:
        distance: Walking distance in miles to the bus stop.

    Returns:
        Sigmoid distance decay index between 0.0 and 1.0 for the given bus stop distance.
    """
    distance_index = get_sigmoid_distance_decay_index(distance, 5.8, 0.65)
    return distance_index

get_light_rail_stop_distance_decay_index(distance)

Get the distance decay coefficient for a light rail stop.

Parameters:

Name Type Description Default
distance Union[float, int]

Walking distance in miles to the light rail stop or station.

required

Returns:

Type Description
float

Sigmoid distance decay index between 0.0 and ~0.98 for the given light rail stop distance.

Source code in src/h3_od/distance_decay.py
def get_light_rail_stop_distance_decay_index(distance: Union[float, int]) -> float:
    """
    Get the distance decay coefficient for a light rail stop.

    Args:
        distance: Walking distance in miles to the light rail stop or station.

    Returns:
        Sigmoid distance decay index between 0.0 and ~0.98 for the given light rail stop distance.
    """
    distance_index = get_sigmoid_distance_decay_index(distance, 4.8, 1.3) * 0.98
    return distance_index

get_sigmoid_distance_decay_index(distance, steepness, offset)

Get sigmoid distance decay index.

Parameters:

Name Type Description Default
distance Union[float, int]

Distance to calculate decay for.

required
steepness Union[float, int]

Controls how sharply the decay curve transitions from 1 to 0. Higher values produce a steeper drop-off.

required
offset Union[float, int]

Distance value at which the decay index equals 0.5, shifting the midpoint of the sigmoid curve along the distance axis.

required

Returns:

Type Description
float

Sigmoid distance decay index between 0.0 and 1.0.

Source code in src/h3_od/distance_decay.py
def get_sigmoid_distance_decay_index(
    distance: Union[float, int], steepness: Union[float, int], offset: Union[float, int]
) -> float:
    """
    Get sigmoid distance decay index.

    Args:
        distance: Distance to calculate decay for.
        steepness: Controls how sharply the decay curve transitions from 1 to 0.
            Higher values produce a steeper drop-off.
        offset: Distance value at which the decay index equals 0.5, shifting
            the midpoint of the sigmoid curve along the distance axis.

    Returns:
        Sigmoid distance decay index between 0.0 and 1.0.
    """
    distance_index = 1 / (1 + np.exp(steepness * (distance - offset)))

    return distance_index

Module h3_od.utils

get_arcgis_polygon_for_h3_index(h3_index)

For a single H3 index, get the ArcGIS Polygon geometry for the index.

Parameters:

Name Type Description Default
h3_index Union[str, int]

H3 index.

required

Returns:

Type Description
Polygon

ArcGIS Polygon geometry for the index.

Source code in src/h3_od/utils/h3_arcgis.py
def get_arcgis_polygon_for_h3_index(h3_index: Union[str, int]) -> Polygon:
    """
    For a single H3 index, get the ArcGIS Polygon geometry for the index.

    Args:
        h3_index: H3 index.

    Returns:
        ArcGIS Polygon geometry for the index.
    """
    h3_index = preprocess_h3_index(h3_index)

    # get the coordinates for the index
    coord_lst = h3_int.cell_to_boundary(h3_index)

    # create an ArcGIS geometry object for the index
    geom = Polygon(
        {
            "rings": [[[coords[1], coords[0]] for coords in coord_lst]],
            "spatialReference": {"wkid": 4326},
        }
    )

    return geom

get_logger(logger_name=None, level='INFO', logfile_path=None, log_format='%(asctime)s | %(name)s | %(levelname)s | %(message)s', propagate=True, add_stream_handler=True, add_arcpy_handler=False)

Get Python :class:Logger<logging.Logger> configured to provide stream, file or, if available, ArcPy output. The way the method is set up, logging will be routed through ArcPy messaging using :class:ArcpyHandler if ArcPy is available. If ArcPy is not available, messages will be sent to the console using a :class:StreamHandler<logging.StreamHandler>. Next, if the logfile_path is provided, log messages will also be written to the provided path to a logfile using a :class:FileHandler<logging.FileHandler>.

Valid log_level inputs include: * DEBUG - Detailed information, typically of interest only when diagnosing problems. * INFO - Confirmation that things are working as expected. * WARNING or WARN - An indication that something unexpected happened, or indicative of some problem in the near future (e.g. "disk space low"). The software is still working as expected. * ERROR - Due to a more serious problem, the software has not been able to perform some function. * CRITICAL - A serious error, indicating that the program itself may be unable to continue running.

Note

Logging levels can be provided as strings (e.g. 'DEBUG'), corresponding integer values or using the logging module constants (e.g. logging.DEBUG).

Parameters:

Name Type Description Default
logger_name Optional[str]

Name of the logger. If None, the root logger is used.

None
level Optional[Union[str, int]]

Logging level to use. Default is INFO.

'INFO'
log_format Optional[str]

Format string for the logging messages. Default is '%(asctime)s | %(name)s | %(levelname)s | %(message)s'.

'%(asctime)s | %(name)s | %(levelname)s | %(message)s'
propagate bool

If True, log messages are passed to the handlers of ancestor loggers. Default is True.

True
logfile_path Union[Path, str]

Where to save the logfile if file output is desired.

None
add_stream_handler bool

If True, add a StreamHandler to route logging to the console. Default is True.

True
add_arcpy_handler bool

If True and ArcPy is available, add the ArcpyHandler to route logging through ArcPy messaging. Default is False.

False
logger = get_logger('DEBUG')
logger.debug('nauseatingly detailed debugging message')
logger.info('something actually useful to know')
logger.warning('The sky may be falling')
logger.error('The sky is falling.')
logger.critical('The sky appears to be falling because a giant meteor is colliding with the earth.')
Source code in src/h3_od/utils/_logging.py
def get_logger(
    logger_name: Optional[str] = None,
    level: Optional[Union[str, int]] = "INFO",
    logfile_path: Union[Path, str] = None,
    log_format: Optional[str] = "%(asctime)s | %(name)s | %(levelname)s | %(message)s",
    propagate: bool = True,
    add_stream_handler: bool = True,
    add_arcpy_handler: bool = False,
) -> logging.Logger:
    """
    Get Python :class:`Logger<logging.Logger>` configured to provide stream, file or, if available, ArcPy output.
    The way the method is set up, logging will be routed through ArcPy messaging using :class:`ArcpyHandler` if
    ArcPy is available. If ArcPy is *not* available, messages will be sent to the console using a
    :class:`StreamHandler<logging.StreamHandler>`. Next, if the `logfile_path` is provided, log messages will also
    be written to the provided path to a logfile using a :class:`FileHandler<logging.FileHandler>`.

    Valid `log_level` inputs include:
    * `DEBUG` - Detailed information, typically of interest only when diagnosing problems.
    * `INFO` - Confirmation that things are working as expected.
    * `WARNING` or ``WARN`` -  An indication that something unexpected happened, or indicative of some problem in the
        near future (e.g. "disk space low"). The software is still working as expected.
    * `ERROR` - Due to a more serious problem, the software has not been able to perform some function.
    * `CRITICAL` - A serious error, indicating that the program itself may be unable to continue running.

    !!! note

        Logging levels can be provided as strings (e.g. `'DEBUG'`), corresponding integer values or using the
        logging module constants (e.g. `logging.DEBUG`).

    Args:
        logger_name: Name of the logger. If `None`, the root logger is used.
        level: Logging level to use. Default is INFO.
        log_format: Format string for the logging messages. Default is `'%(asctime)s | %(name)s | %(levelname)s | %(message)s'`.
        propagate: If `True`, log messages are passed to the handlers of ancestor loggers. Default is `True`.
        logfile_path: Where to save the logfile if file output is desired.
        add_stream_handler: If `True`, add a `StreamHandler` to route logging to the console. Default is `True`.
        add_arcpy_handler: If `True` and ArcPy is available, add the `ArcpyHandler` to route logging through
            ArcPy messaging. Default is `False`.

    ``` python
    logger = get_logger('DEBUG')
    logger.debug('nauseatingly detailed debugging message')
    logger.info('something actually useful to know')
    logger.warning('The sky may be falling')
    logger.error('The sky is falling.')
    logger.critical('The sky appears to be falling because a giant meteor is colliding with the earth.')
    ```

    """
    # ensure valid logging level
    log_str_lst = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "WARN", "FATAL"]
    log_int_lst = [0, 10, 20, 30, 40, 50]

    if not isinstance(level, (str, int)):
        raise ValueError(
            "You must define a specific logging level for log_level as a string or integer."
        )
    elif isinstance(level, str) and level not in log_str_lst:
        raise ValueError(
            f'The log_level must be one of {log_str_lst}. You provided "{level}".'
        )
    elif isinstance(level, int) and level not in log_int_lst:
        raise ValueError(
            f"If providing an integer for log_level, it must be one of the following, {log_int_lst}."
        )

    # get default logger and set logging level at the same time
    logger = logging.getLogger(logger_name)
    logger.setLevel(level=level)

    # configure formatting
    log_frmt = logging.Formatter(log_format)

    # set propagation
    logger.propagate = propagate

    # make sure at least a stream handler is present
    if add_stream_handler:
        # create and add the stream handler
        sh = logging.StreamHandler()
        sh.setFormatter(log_frmt)
        logger.addHandler(sh)

    # if in an environment with ArcPy, add handler to bubble logging up to ArcGIS through ArcPy
    if find_spec("arcpy") is not None and add_arcpy_handler:
        ah = ArcpyHandler()
        ah.setFormatter(log_frmt)
        logger.addHandler(ah)

    # if a path for the logfile is provided, log results to the file
    if logfile_path is not None:
        # ensure the full path exists
        if not logfile_path.parent.exists():
            logfile_path.parent.mkdir(parents=True)

        # create and add the file handler
        fh = logging.FileHandler(str(logfile_path))
        fh.setFormatter(log_frmt)
        logger.addHandler(fh)

    return logger

get_pyarrow_dataset_from_parquet(dataset_path)

Load a Parquet dataset from disk into a PyArrow Dataset.

Convenience wrapper around pyarrow.dataset.dataset that applies Hive-style partitioning, which is the partitioning scheme used by the origin-destination Parquet outputs in this project.

Parameters:

Name Type Description Default
dataset_path Path

Path to the Parquet file or directory containing the partitioned Parquet dataset.

required

Returns:

Type Description
Dataset

A pyarrow.dataset.Dataset backed by the Parquet data at the given path.

Source code in src/h3_od/utils/_pyarrow.py
def get_pyarrow_dataset_from_parquet(dataset_path: Path) -> ds.Dataset:
    """
    Load a Parquet dataset from disk into a PyArrow Dataset.

    Convenience wrapper around ``pyarrow.dataset.dataset`` that applies Hive-style
    partitioning, which is the partitioning scheme used by the origin-destination
    Parquet outputs in this project.

    Args:
        dataset_path: Path to the Parquet file or directory containing the
            partitioned Parquet dataset.

    Returns:
        A ``pyarrow.dataset.Dataset`` backed by the Parquet data at the given path.
    """
    dataset = ds.dataset(
        dataset_path,
        format="parquet",
        partitioning="hive",
    )
    return dataset