Skip to content

Utils API

Utility functions for visualization, compliance checking, and data processing.

utils

Utilities for batch processing, multi-level analysis, and publication-quality plotting.

Functions

get_custom_constraints

get_custom_constraints()

GET_CUSTOM_CONSTRAINTS: Get custom constraints for common structures. :return: DataFrame with custom constraints for common structures.

Source code in src/dosemetrics/utils/compliance.py
def get_custom_constraints():
    """
    GET_CUSTOM_CONSTRAINTS: Get custom constraints for common structures.
    :return: DataFrame with custom constraints for common structures.
    """
    constraint_df = pd.DataFrame(
        [
            {"Structure": "Brain", "Constraint Type": "mean", "Level": 30},
            {"Structure": "BrainStem", "Constraint Type": "max", "Level": 56},
            {"Structure": "Chiasm", "Constraint Type": "max", "Level": 55},
            {"Structure": "Cochlea_L", "Constraint Type": "max", "Level": 45},
            {"Structure": "Cochlea_R", "Constraint Type": "max", "Level": 45},
            {"Structure": "LacrimalGland_L", "Constraint Type": "max", "Level": 40},
            {"Structure": "LacrimalGland_R", "Constraint Type": "max", "Level": 40},
            {"Structure": "OpticNerve_L", "Constraint Type": "max", "Level": 56},
            {"Structure": "OpticNerve_R", "Constraint Type": "max", "Level": 56},
            {"Structure": "GTV", "Constraint Type": "nmean", "Level": 60},
            {"Structure": "CTV", "Constraint Type": "nmean", "Level": 60},
            {"Structure": "PTV", "Constraint Type": "nmean", "Level": 60},
        ]
    )

    constraint_df.set_index("Structure", inplace=True)
    return constraint_df

get_default_constraints

get_default_constraints()

GET_DEFAULT_CONSTRAINTS: Get default constraints for common structures. :return: DataFrame with default constraints for common structures.

Source code in src/dosemetrics/utils/compliance.py
def get_default_constraints():
    """
    GET_DEFAULT_CONSTRAINTS: Get default constraints for common structures.
    :return: DataFrame with default constraints for common structures.
    """
    constraint_df = pd.DataFrame(
        [
            {"Structure": "Brain", "Constraint Type": "mean", "Level": 30},
            {"Structure": "BrainStem", "Constraint Type": "max", "Level": 54},
            {"Structure": "Chiasm", "Constraint Type": "max", "Level": 54},
            {"Structure": "Cochlea_L", "Constraint Type": "mean", "Level": 45},
            {"Structure": "Cochlea_R", "Constraint Type": "mean", "Level": 45},
            {"Structure": "Eye_L", "Constraint Type": "max", "Level": 10},
            {"Structure": "Eye_R", "Constraint Type": "max", "Level": 10},
            {"Structure": "Hippocampus_L", "Constraint Type": "mean", "Level": 30},
            {"Structure": "Hippocampus_R", "Constraint Type": "mean", "Level": 30},
            {"Structure": "LacrimalGland_L", "Constraint Type": "mean", "Level": 25},
            {"Structure": "LacrimalGland_R", "Constraint Type": "mean", "Level": 25},
            {"Structure": "OpticNerve_L", "Constraint Type": "max", "Level": 54},
            {"Structure": "OpticNerve_R", "Constraint Type": "max", "Level": 54},
            {"Structure": "Pituitary", "Constraint Type": "mean", "Level": 45},
            {"Structure": "Target", "Constraint Type": "min", "Level": 60},
        ]
    )

    constraint_df.set_index("Structure", inplace=True)
    return constraint_df

check_compliance

check_compliance(df, constraint)

CHECK_COMPLIANCE: Check compliance of dose metrics with constraints. :param df: DataFrame with dose metrics including columns for max-dose, mean-dose, ... :param constraint: DataFrame constructed using get_default_constraints(). :return: DataFrame with compliance status and failure reason for each structure.

Source code in src/dosemetrics/utils/compliance.py
def check_compliance(df, constraint):
    """
    CHECK_COMPLIANCE: Check compliance of dose metrics with constraints.
    :param df: DataFrame with dose metrics including columns for max-dose, mean-dose, ...
    :param constraint: DataFrame constructed using get_default_constraints().
    :return: DataFrame with compliance status and failure reason for each structure.
    """
    compliance_df = pd.DataFrame()
    for structure in constraint.index:
        if structure in df.index:
            if constraint.loc[structure, "Constraint Type"] == "max":
                if (
                    pd.to_numeric(df.loc[structure, "Max Dose"])
                    > constraint.loc[structure, "Level"]
                ):
                    compliance_df.loc[structure, "Compliance"] = "❌ No"
                    compliance_df.loc[structure, "Reason"] = (
                        f"Max dose constraint: "
                        f"{float(constraint.loc[structure, 'Level'])}, "
                        f"exceeded: {float(df.loc[structure, 'Max Dose']):.2f}"
                    )
                else:
                    compliance_df.loc[structure, "Compliance"] = "✅ Yes"
                    compliance_df.loc[structure, "Reason"] = (
                        f"Max dose is within constraint! "
                    )
            elif constraint.loc[structure, "Constraint Type"] == "min":
                if (
                    pd.to_numeric(df.loc[structure, "Min Dose"])
                    < constraint.loc[structure, "Level"]
                ):
                    compliance_df.loc[structure, "Compliance"] = "❌ No"
                    compliance_df.loc[structure, "Reason"] = (
                        "Min dose constraint: "
                        + str(constraint.loc[structure, "Level"])
                        + " not met: "
                        + str(df.loc[structure, "Min Dose"])
                    )
                else:
                    compliance_df.loc[structure, "Compliance"] = "✅ Yes"
                    compliance_df.loc[structure, "Reason"] = (
                        f"Min dose is within constraint! "
                    )
            elif constraint.loc[structure, "Constraint Type"] == "mean":
                if (
                    pd.to_numeric(df.loc[structure, "Mean Dose"])
                    > constraint.loc[structure, "Level"]
                ):
                    compliance_df.loc[structure, "Compliance"] = "❌ No"
                    compliance_df.loc[structure, "Reason"] = (
                        f"Mean dose constraint: "
                        f"{constraint.loc[structure, 'Level']},"
                        f" exceeded: {df.loc[structure, 'Mean Dose']:.2f}"
                    )
                else:
                    compliance_df.loc[structure, "Compliance"] = "✅ Yes"
                    compliance_df.loc[structure, "Reason"] = (
                        f"Mean dose is within constraint! "
                    )
            elif constraint.loc[structure, "Constraint Type"] == "nmean":
                # This is negative mean dose, so we want to check if the mean dose
                # is below the constraint. This is used only for targets.
                if df.loc[structure, "Mean Dose"] < constraint.loc[structure, "Level"]:
                    compliance_df.loc[structure, "Compliance"] = "❌ No"
                    compliance_df.loc[structure, "Reason"] = (
                        f"Target mean dose constraint: "
                        f"{constraint.loc[structure, 'Level']},"
                        f" higher than: {df.loc[structure, 'Mean Dose']:.2f}"
                    )
                else:
                    compliance_df.loc[structure, "Compliance"] = "✅ Yes"
                    compliance_df.loc[structure, "Reason"] = (
                        f"Target mean dose is achieved! "
                    )
            elif constraint.loc[structure, "Constraint Type"] == "volume":
                NotImplementedError("Volume constraint not implemented yet!")
                # compliance_df.loc[structure, "Compliance"] = "✅ Yes"
                # compliance_df.loc[
                #    structure, "Reason"
                # ] = f"Volume dose is within constraint! "

    return compliance_df

quality_index

quality_index(dose: Dose, structure: Structure, constraint_type: str, constraint_level: float) -> float

Compute the quality index of a dose distribution relative to a constraint.

Quality index interpretation: - Positive values: Constraint is met (higher is better, 1.0 is ideal) - Negative values: Constraint is violated (magnitude indicates severity)

Parameters:

Name Type Description Default
dose Dose

Dose distribution object

required
structure Structure

Structure to evaluate

required
constraint_type str

Type of constraint ('max', 'mean', or 'min')

required
constraint_level float

Constraint value in Gy

required

Returns:

Type Description
float

Quality index (-1 to 1)

Examples:

>>> from dosemetrics.dose import Dose
>>> from dosemetrics.utils.compliance import quality_index
>>>
>>> dose = Dose.from_dicom("rtdose.dcm")
>>> brainstem = structures.get_structure("Brainstem")
>>>
>>> # Check max dose constraint
>>> qi = quality_index(dose, brainstem, "max", 54.0)
>>> if qi < 0:
...     print("Constraint violated!")
Source code in src/dosemetrics/utils/compliance.py
def quality_index(
    dose: Dose,
    structure: Structure,
    constraint_type: str,
    constraint_level: float,
) -> float:
    """
    Compute the quality index of a dose distribution relative to a constraint.

    Quality index interpretation:
    - Positive values: Constraint is met (higher is better, 1.0 is ideal)
    - Negative values: Constraint is violated (magnitude indicates severity)

    Args:
        dose: Dose distribution object
        structure: Structure to evaluate
        constraint_type: Type of constraint ('max', 'mean', or 'min')
        constraint_level: Constraint value in Gy

    Returns:
        Quality index (-1 to 1)

    Examples:
        >>> from dosemetrics.dose import Dose
        >>> from dosemetrics.utils.compliance import quality_index
        >>>
        >>> dose = Dose.from_dicom("rtdose.dcm")
        >>> brainstem = structures.get_structure("Brainstem")
        >>>
        >>> # Check max dose constraint
        >>> qi = quality_index(dose, brainstem, "max", 54.0)
        >>> if qi < 0:
        ...     print("Constraint violated!")
    """
    from ..metrics import dvh, statistics

    dose_bins, volumes = dvh.compute_dvh(dose, structure)

    if constraint_type == "mean":
        # Check if mean dose exceeds constraint
        indices = np.where(dose_bins > constraint_level)[0]
        if len(indices) > 0:
            proportion_above = np.max(volumes[indices])
        else:
            proportion_above = 0.0

        if proportion_above > 0:
            # Negative value indicates violation
            # Worst case is -1 (all voxels above constraint)
            return -proportion_above / 100.0
        else:
            # Constraint is met - compute gap
            mean_dose_val = statistics.compute_mean_dose(dose, structure)
            gap_between = (constraint_level - mean_dose_val) / constraint_level
            return float(gap_between)

    elif constraint_type == "max":
        # Check if any dose exceeds constraint
        indices = np.where(dose_bins > constraint_level)[0]
        if len(indices) > 0:
            proportion_above = np.max(volumes[indices])
        else:
            proportion_above = 0.0

        if proportion_above > 0:
            # Negative value indicates violation
            return -proportion_above / 100.0
        else:
            # Constraint is met - compute gap
            max_dose_val = statistics.compute_max_dose(dose, structure)
            gap_between = (constraint_level - max_dose_val) / constraint_level
            return float(gap_between)

    elif constraint_type == "min":
        # For targets - check if dose is below constraint
        indices = np.where(dose_bins < constraint_level)[0]
        if len(indices) > 0:
            proportion_below = np.min(volumes[indices])
        else:
            proportion_below = 0.0

        if proportion_below < 100:
            # Negative value indicates violation
            return -(100 - proportion_below) / 100.0
        else:
            return 1.0

    # Default return
    return 0.0

load_dataset

load_dataset(root_path: Union[str, Path], subject_pattern: str = '*', dose_pattern: str = 'dose*', structures_pattern: str = '*.nii.gz', auto_detect: bool = True) -> Dict[str, Dict[str, Union[Dose, StructureSet]]]

Load an entire dataset with multiple subjects.

Automatically detects folder structure and loads all doses and structure sets. Supports both DICOM and NIfTI formats with automatic detection.

Parameters

root_path : str or Path Root directory containing subject folders subject_pattern : str Glob pattern for subject folder names (default: "*") dose_pattern : str Pattern to identify dose files/folders structures_pattern : str Pattern to identify structure files auto_detect : bool Automatically detect DICOM vs NIfTI format

Returns

dataset : Dict[str, Dict[str, Union[Dose, StructureSet]]] Nested dictionary: {subject_id: {'dose': Dose, 'structures': StructureSet}}

Examples

dataset = load_dataset('/data/clinical_study') for subject_id, data in dataset.items(): ... dose = data['dose'] ... structures = data['structures'] ... print(f"Subject {subject_id}: {len(structures)} structures")

Source code in src/dosemetrics/utils/batch.py
def load_dataset(
    root_path: Union[str, Path],
    subject_pattern: str = "*",
    dose_pattern: str = "dose*",
    structures_pattern: str = "*.nii.gz",
    auto_detect: bool = True
) -> Dict[str, Dict[str, Union[Dose, StructureSet]]]:
    """
    Load an entire dataset with multiple subjects.

    Automatically detects folder structure and loads all doses and structure sets.
    Supports both DICOM and NIfTI formats with automatic detection.

    Parameters
    ----------
    root_path : str or Path
        Root directory containing subject folders
    subject_pattern : str
        Glob pattern for subject folder names (default: "*")
    dose_pattern : str
        Pattern to identify dose files/folders
    structures_pattern : str
        Pattern to identify structure files
    auto_detect : bool
        Automatically detect DICOM vs NIfTI format

    Returns
    -------
    dataset : Dict[str, Dict[str, Union[Dose, StructureSet]]]
        Nested dictionary: {subject_id: {'dose': Dose, 'structures': StructureSet}}

    Examples
    --------
    >>> dataset = load_dataset('/data/clinical_study')
    >>> for subject_id, data in dataset.items():
    ...     dose = data['dose']
    ...     structures = data['structures']
    ...     print(f"Subject {subject_id}: {len(structures)} structures")
    """
    root_path = Path(root_path)
    dataset = {}

    # Find all subject folders
    subject_folders = sorted(root_path.glob(subject_pattern))

    for subject_folder in subject_folders:
        if not subject_folder.is_dir():
            continue

        subject_id = subject_folder.name

        try:
            # Try to load the entire folder
            if auto_detect:
                format_type = detect_folder_format(str(subject_folder))
            else:
                format_type = None

            # Load dose and structures
            result = load_from_folder(str(subject_folder))

            if result:
                dataset[subject_id] = result

        except Exception as e:
            print(f"Warning: Could not load subject {subject_id}: {e}")
            continue

    return dataset

load_multiple_doses

load_multiple_doses(folder_paths: List[Union[str, Path]], dose_names: Optional[List[str]] = None) -> Dict[str, Dose]

Load multiple dose distributions from different folders.

Useful for comparing different treatment plans (e.g., TPS vs predicted).

Parameters

folder_paths : List[str or Path] List of folders, each containing a dose distribution dose_names : List[str], optional Names for each dose (default: uses folder names)

Returns

doses : Dict[str, Dose] Dictionary mapping dose names to Dose objects

Examples

doses = load_multiple_doses([ ... '/data/subject01/tps', ... '/data/subject01/predicted' ... ], dose_names=['TPS', 'Predicted'])

Source code in src/dosemetrics/utils/batch.py
def load_multiple_doses(
    folder_paths: List[Union[str, Path]],
    dose_names: Optional[List[str]] = None
) -> Dict[str, Dose]:
    """
    Load multiple dose distributions from different folders.

    Useful for comparing different treatment plans (e.g., TPS vs predicted).

    Parameters
    ----------
    folder_paths : List[str or Path]
        List of folders, each containing a dose distribution
    dose_names : List[str], optional
        Names for each dose (default: uses folder names)

    Returns
    -------
    doses : Dict[str, Dose]
        Dictionary mapping dose names to Dose objects

    Examples
    --------
    >>> doses = load_multiple_doses([
    ...     '/data/subject01/tps',
    ...     '/data/subject01/predicted'
    ... ], dose_names=['TPS', 'Predicted'])
    """
    doses = {}

    for i, folder_path in enumerate(folder_paths):
        folder_path = Path(folder_path)

        if dose_names and i < len(dose_names):
            name = dose_names[i]
        else:
            name = folder_path.name

        try:
            result = load_from_folder(str(folder_path))
            if result and 'dose' in result:
                doses[name] = result['dose']
        except Exception as e:
            print(f"Warning: Could not load dose from {folder_path}: {e}")

    return doses

process_dataset_with_metric

process_dataset_with_metric(dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]], metric_func: Callable, structure_names: Optional[List[str]] = None, **metric_kwargs) -> pd.DataFrame

Apply a metric function across an entire dataset.

Computes metrics for all subjects and all structures, returning results in a structured DataFrame.

Parameters

dataset : Dict Dataset dictionary from load_dataset() metric_func : Callable Metric function that takes (dose, structure) and returns a value or dict structure_names : List[str], optional Specific structures to analyze (default: all structures) **metric_kwargs Additional keyword arguments passed to metric_func

Returns

results : pd.DataFrame DataFrame with columns: subject_id, structure_name, metric values

Examples

from dosemetrics.metrics import dvh dataset = load_dataset('/data/study') results = process_dataset_with_metric( ... dataset, ... dvh.compute_mean_dose, ... structure_names=['PTV', 'Heart'] ... )

Source code in src/dosemetrics/utils/batch.py
def process_dataset_with_metric(
    dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    metric_func: Callable,
    structure_names: Optional[List[str]] = None,
    **metric_kwargs
) -> pd.DataFrame:
    """
    Apply a metric function across an entire dataset.

    Computes metrics for all subjects and all structures, returning results
    in a structured DataFrame.

    Parameters
    ----------
    dataset : Dict
        Dataset dictionary from load_dataset()
    metric_func : Callable
        Metric function that takes (dose, structure) and returns a value or dict
    structure_names : List[str], optional
        Specific structures to analyze (default: all structures)
    **metric_kwargs
        Additional keyword arguments passed to metric_func

    Returns
    -------
    results : pd.DataFrame
        DataFrame with columns: subject_id, structure_name, metric values

    Examples
    --------
    >>> from dosemetrics.metrics import dvh
    >>> dataset = load_dataset('/data/study')
    >>> results = process_dataset_with_metric(
    ...     dataset,
    ...     dvh.compute_mean_dose,
    ...     structure_names=['PTV', 'Heart']
    ... )
    """
    results = []

    for subject_id, data in dataset.items():
        if 'dose' not in data or 'structures' not in data:
            continue

        dose = data['dose']
        structures = data['structures']

        # Determine which structures to process
        if structure_names:
            struct_list = [structures.get_structure(name) for name in structure_names 
                          if name in structures.structure_names]
        else:
            struct_list = list(structures.structures.values())

        for structure in struct_list:
            try:
                # Call the metric function
                result = metric_func(dose, structure, **metric_kwargs)

                # Handle different return types
                if isinstance(result, dict):
                    row = {'subject_id': subject_id, 'structure': structure.name}
                    row.update(result)
                else:
                    row = {
                        'subject_id': subject_id,
                        'structure': structure.name,
                        'value': result
                    }

                results.append(row)

            except Exception as e:
                print(f"Warning: Error processing {subject_id}/{structure.name}: {e}")
                continue

    return pd.DataFrame(results)

batch_compute_dvh

batch_compute_dvh(dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]], structure_names: Optional[List[str]] = None, max_dose: Optional[float] = None, step_size: float = 0.1) -> Dict[str, Dict[str, Tuple[np.ndarray, np.ndarray]]]

Compute DVHs for all subjects and structures in a dataset.

Parameters

dataset : Dict Dataset dictionary from load_dataset() structure_names : List[str], optional Specific structures to analyze max_dose : float, optional Maximum dose for DVH bins step_size : float DVH bin width in Gy

Returns

dvhs : Dict[str, Dict[str, Tuple]] Nested dict: {subject_id: {structure_name: (dose_bins, volumes)}}

Examples

from dosemetrics.utils import batch dataset = batch.load_dataset('/data/study') dvhs = batch.batch_compute_dvh(dataset, structure_names=['PTV', 'Heart'])

Source code in src/dosemetrics/utils/batch.py
def batch_compute_dvh(
    dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    structure_names: Optional[List[str]] = None,
    max_dose: Optional[float] = None,
    step_size: float = 0.1
) -> Dict[str, Dict[str, Tuple[np.ndarray, np.ndarray]]]:
    """
    Compute DVHs for all subjects and structures in a dataset.

    Parameters
    ----------
    dataset : Dict
        Dataset dictionary from load_dataset()
    structure_names : List[str], optional
        Specific structures to analyze
    max_dose : float, optional
        Maximum dose for DVH bins
    step_size : float
        DVH bin width in Gy

    Returns
    -------
    dvhs : Dict[str, Dict[str, Tuple]]
        Nested dict: {subject_id: {structure_name: (dose_bins, volumes)}}

    Examples
    --------
    >>> from dosemetrics.utils import batch
    >>> dataset = batch.load_dataset('/data/study')
    >>> dvhs = batch.batch_compute_dvh(dataset, structure_names=['PTV', 'Heart'])
    """
    from ..metrics import dvh as dvh_module

    dvhs = {}

    for subject_id, data in dataset.items():
        if 'dose' not in data or 'structures' not in data:
            continue

        dose = data['dose']
        structures = data['structures']
        subject_dvhs = {}

        # Determine which structures to process
        if structure_names:
            struct_list = [structures.get_structure(name) for name in structure_names 
                          if name in structures.structure_names]
        else:
            struct_list = list(structures.structures.values())

        for structure in struct_list:
            try:
                dose_bins, volumes = dvh_module.compute_dvh(
                    dose, structure, max_dose=max_dose, step_size=step_size
                )
                subject_dvhs[structure.name] = {
                    'dose_bins': dose_bins,
                    'volumes': volumes
                }
            except Exception as e:
                print(f"Warning: Error computing DVH for {subject_id}/{structure.name}: {e}")

        if subject_dvhs:
            dvhs[subject_id] = subject_dvhs

    return dvhs

compare_doses_batch

compare_doses_batch(dataset1: Dict[str, Dict[str, Union[Dose, StructureSet]]], dataset2: Dict[str, Dict[str, Union[Dose, StructureSet]]], comparison_func: Callable, structure_names: Optional[List[str]] = None, **kwargs) -> pd.DataFrame

Compare two datasets (e.g., TPS vs predicted doses).

Parameters

dataset1, dataset2 : Dict Dataset dictionaries to compare comparison_func : Callable Function that takes (dose1, dose2, structure) and returns metrics structure_names : List[str], optional Specific structures to compare **kwargs Additional arguments for comparison_func

Returns

comparison : pd.DataFrame Comparison results for all subjects and structures

Examples

from dosemetrics.metrics import dose_comparison tps_data = load_dataset('/data/tps') pred_data = load_dataset('/data/predicted') comparison = compare_doses_batch( ... tps_data, pred_data, ... dose_comparison.compute_mae ... )

Source code in src/dosemetrics/utils/batch.py
def compare_doses_batch(
    dataset1: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    dataset2: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    comparison_func: Callable,
    structure_names: Optional[List[str]] = None,
    **kwargs
) -> pd.DataFrame:
    """
    Compare two datasets (e.g., TPS vs predicted doses).

    Parameters
    ----------
    dataset1, dataset2 : Dict
        Dataset dictionaries to compare
    comparison_func : Callable
        Function that takes (dose1, dose2, structure) and returns metrics
    structure_names : List[str], optional
        Specific structures to compare
    **kwargs
        Additional arguments for comparison_func

    Returns
    -------
    comparison : pd.DataFrame
        Comparison results for all subjects and structures

    Examples
    --------
    >>> from dosemetrics.metrics import dose_comparison
    >>> tps_data = load_dataset('/data/tps')
    >>> pred_data = load_dataset('/data/predicted')
    >>> comparison = compare_doses_batch(
    ...     tps_data, pred_data,
    ...     dose_comparison.compute_mae
    ... )
    """
    results = []

    # Find common subjects
    common_subjects = set(dataset1.keys()) & set(dataset2.keys())

    for subject_id in common_subjects:
        data1 = dataset1[subject_id]
        data2 = dataset2[subject_id]

        if 'dose' not in data1 or 'dose' not in data2:
            continue

        dose1 = data1['dose']
        dose2 = data2['dose']

        # Get structures (use dataset1's structures)
        if 'structures' not in data1:
            continue

        structures = data1['structures']

        # Determine which structures to process
        if structure_names:
            struct_list = [structures.get_structure(name) for name in structure_names 
                          if name in structures.structure_names]
        else:
            struct_list = list(structures.structures.values())

        for structure in struct_list:
            try:
                result = comparison_func(dose1, dose2, structure, **kwargs)

                if isinstance(result, dict):
                    row = {'subject_id': subject_id, 'structure': structure.name}
                    row.update(result)
                else:
                    row = {
                        'subject_id': subject_id,
                        'structure': structure.name,
                        'value': result
                    }

                results.append(row)

            except Exception as e:
                print(f"Warning: Error comparing {subject_id}/{structure.name}: {e}")

    return pd.DataFrame(results)

aggregate_results

aggregate_results(results: DataFrame, group_by: Union[str, List[str]] = 'structure', agg_funcs: Optional[Dict[str, Union[str, List[str]]]] = None) -> pd.DataFrame

Aggregate batch processing results.

Compute summary statistics across subjects, structures, or other groupings.

Parameters

results : pd.DataFrame Results from process_dataset_with_metric or similar group_by : str or List[str] Column(s) to group by (e.g., 'structure', 'subject_id') agg_funcs : Dict, optional Aggregation functions for each column Default: {'value': ['mean', 'std', 'min', 'max']}

Returns

summary : pd.DataFrame Aggregated statistics

Examples

results = process_dataset_with_metric(dataset, compute_mean_dose) summary = aggregate_results(results, group_by='structure') print(summary) # Mean dose statistics per structure

Source code in src/dosemetrics/utils/batch.py
def aggregate_results(
    results: pd.DataFrame,
    group_by: Union[str, List[str]] = 'structure',
    agg_funcs: Optional[Dict[str, Union[str, List[str]]]] = None
) -> pd.DataFrame:
    """
    Aggregate batch processing results.

    Compute summary statistics across subjects, structures, or other groupings.

    Parameters
    ----------
    results : pd.DataFrame
        Results from process_dataset_with_metric or similar
    group_by : str or List[str]
        Column(s) to group by (e.g., 'structure', 'subject_id')
    agg_funcs : Dict, optional
        Aggregation functions for each column
        Default: {'value': ['mean', 'std', 'min', 'max']}

    Returns
    -------
    summary : pd.DataFrame
        Aggregated statistics

    Examples
    --------
    >>> results = process_dataset_with_metric(dataset, compute_mean_dose)
    >>> summary = aggregate_results(results, group_by='structure')
    >>> print(summary)  # Mean dose statistics per structure
    """
    if agg_funcs is None:
        # Default aggregations for numeric columns
        numeric_cols = results.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) == 0:
            return results.groupby(group_by).size().to_frame('count')

        agg_funcs = {col: ['mean', 'std', 'min', 'max', 'median'] 
                     for col in numeric_cols if col != group_by}

    return results.groupby(group_by).agg(agg_funcs)

export_batch_results

export_batch_results(results: DataFrame, output_path: Union[str, Path], format: str = 'csv', **kwargs) -> None

Export batch processing results to file.

Parameters

results : pd.DataFrame Results dataframe to export output_path : str or Path Output file path format : str Output format: 'csv', 'excel', 'json', 'parquet' **kwargs Additional arguments for the export function

Examples

results = process_dataset_with_metric(dataset, compute_mean_dose) export_batch_results(results, 'results/mean_dose.csv')

Source code in src/dosemetrics/utils/batch.py
def export_batch_results(
    results: pd.DataFrame,
    output_path: Union[str, Path],
    format: str = 'csv',
    **kwargs
) -> None:
    """
    Export batch processing results to file.

    Parameters
    ----------
    results : pd.DataFrame
        Results dataframe to export
    output_path : str or Path
        Output file path
    format : str
        Output format: 'csv', 'excel', 'json', 'parquet'
    **kwargs
        Additional arguments for the export function

    Examples
    --------
    >>> results = process_dataset_with_metric(dataset, compute_mean_dose)
    >>> export_batch_results(results, 'results/mean_dose.csv')
    """
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    if format == 'csv':
        results.to_csv(output_path, **kwargs)
    elif format == 'excel':
        results.to_excel(output_path, **kwargs)
    elif format == 'json':
        results.to_json(output_path, **kwargs)
    elif format == 'parquet':
        results.to_parquet(output_path, **kwargs)
    else:
        raise ValueError(f"Unsupported format: {format}")

analyze_by_structure

analyze_by_structure(dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]], structure_name: str, metrics: Dict[str, callable]) -> pd.DataFrame

Analyze a single structure across all subjects.

Computes specified metrics for one structure across the entire dataset, useful for population-level structure analysis (e.g., PTV coverage across cohort).

Parameters

dataset : Dict Dataset dictionary from batch.load_dataset() structure_name : str Name of structure to analyze metrics : Dict[str, callable] Dictionary of {metric_name: metric_function} Each function should take (dose, structure) and return a value

Returns

results : pd.DataFrame DataFrame with subject_id and computed metrics

Examples

from dosemetrics.metrics import dvh from dosemetrics.utils import analysis

metrics = { ... 'mean_dose': dvh.compute_mean_dose, ... 'max_dose': dvh.compute_max_dose, ... 'D95': lambda d, s: dvh.compute_dose_at_volume(d, s, 95) ... } results = analysis.analyze_by_structure(dataset, 'PTV', metrics) print(results.describe()) # Summary statistics for PTV across subjects

Source code in src/dosemetrics/utils/analysis.py
def analyze_by_structure(
    dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    structure_name: str,
    metrics: Dict[str, callable]
) -> pd.DataFrame:
    """
    Analyze a single structure across all subjects.

    Computes specified metrics for one structure across the entire dataset,
    useful for population-level structure analysis (e.g., PTV coverage across cohort).

    Parameters
    ----------
    dataset : Dict
        Dataset dictionary from batch.load_dataset()
    structure_name : str
        Name of structure to analyze
    metrics : Dict[str, callable]
        Dictionary of {metric_name: metric_function}
        Each function should take (dose, structure) and return a value

    Returns
    -------
    results : pd.DataFrame
        DataFrame with subject_id and computed metrics

    Examples
    --------
    >>> from dosemetrics.metrics import dvh
    >>> from dosemetrics.utils import analysis
    >>> 
    >>> metrics = {
    ...     'mean_dose': dvh.compute_mean_dose,
    ...     'max_dose': dvh.compute_max_dose,
    ...     'D95': lambda d, s: dvh.compute_dose_at_volume(d, s, 95)
    ... }
    >>> results = analysis.analyze_by_structure(dataset, 'PTV', metrics)
    >>> print(results.describe())  # Summary statistics for PTV across subjects
    """
    results = []

    for subject_id, data in dataset.items():
        if 'dose' not in data or 'structures' not in data:
            continue

        dose = data['dose']
        structures = data['structures']

        # Find the structure
        try:
            structure = structures.get_structure(structure_name)
        except (ValueError, KeyError):
            continue

        row = {'subject_id': subject_id}

        # Compute all metrics
        for metric_name, metric_func in metrics.items():
            try:
                value = metric_func(dose, structure)
                row[metric_name] = value
            except Exception as e:
                print(f"Warning: Error computing {metric_name} for {subject_id}/{structure_name}: {e}")
                row[metric_name] = np.nan

        results.append(row)

    return pd.DataFrame(results)

analyze_by_subject

analyze_by_subject(dose: Dose, structures: StructureSet, metrics: Dict[str, callable], structure_names: Optional[List[str]] = None) -> pd.DataFrame

Analyze all structures for a single subject.

Computes metrics for all (or selected) structures in a single subject's dataset.

Parameters

dose : Dose Subject's dose distribution structures : StructureSet Subject's structure set metrics : Dict[str, callable] Dictionary of {metric_name: metric_function} structure_names : List[str], optional Specific structures to analyze (default: all)

Returns

results : pd.DataFrame DataFrame with structure names and computed metrics

Examples

from dosemetrics.metrics import dvh from dosemetrics.utils import analysis

dose = Dose.from_dicom('rtdose.dcm') structures = StructureSet.from_dicom('rtstruct.dcm')

metrics = { ... 'mean_dose': dvh.compute_mean_dose, ... 'V20': lambda d, s: dvh.compute_volume_at_dose(d, s, 20) ... } results = analysis.analyze_by_subject(dose, structures, metrics)

Source code in src/dosemetrics/utils/analysis.py
def analyze_by_subject(
    dose: Dose,
    structures: StructureSet,
    metrics: Dict[str, callable],
    structure_names: Optional[List[str]] = None
) -> pd.DataFrame:
    """
    Analyze all structures for a single subject.

    Computes metrics for all (or selected) structures in a single subject's dataset.

    Parameters
    ----------
    dose : Dose
        Subject's dose distribution
    structures : StructureSet
        Subject's structure set
    metrics : Dict[str, callable]
        Dictionary of {metric_name: metric_function}
    structure_names : List[str], optional
        Specific structures to analyze (default: all)

    Returns
    -------
    results : pd.DataFrame
        DataFrame with structure names and computed metrics

    Examples
    --------
    >>> from dosemetrics.metrics import dvh
    >>> from dosemetrics.utils import analysis
    >>> 
    >>> dose = Dose.from_dicom('rtdose.dcm')
    >>> structures = StructureSet.from_dicom('rtstruct.dcm')
    >>> 
    >>> metrics = {
    ...     'mean_dose': dvh.compute_mean_dose,
    ...     'V20': lambda d, s: dvh.compute_volume_at_dose(d, s, 20)
    ... }
    >>> results = analysis.analyze_by_subject(dose, structures, metrics)
    """
    results = []

    # Determine which structures to analyze
    if structure_names:
        struct_list = [structures.get_structure(name) for name in structure_names 
                      if name in structures.structure_names]
    else:
        # Iterate over structure values only
        struct_list = list(structures.structures.values())

    for structure in struct_list:
        row = {'structure': structure.name, 'type': structure.structure_type.value}

        # Compute all metrics
        for metric_name, metric_func in metrics.items():
            try:
                value = metric_func(dose, structure)
                row[metric_name] = value
            except Exception as e:
                print(f"Warning: Error computing {metric_name} for {structure.name}: {e}")
                row[metric_name] = np.nan

        results.append(row)

    return pd.DataFrame(results)

analyze_by_dataset

analyze_by_dataset(dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]], metrics: Dict[str, callable], structure_names: Optional[List[str]] = None, summary_stats: bool = True) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.DataFrame]]

Analyze entire dataset with population-level statistics.

Computes metrics across all subjects and structures, with optional summary statistics grouped by structure.

Parameters

dataset : Dict Dataset dictionary metrics : Dict[str, callable] Metrics to compute structure_names : List[str], optional Specific structures to analyze summary_stats : bool If True, return both detailed and summary dataframes

Returns

results : pd.DataFrame or Tuple[pd.DataFrame, pd.DataFrame] If summary_stats=False: detailed results If summary_stats=True: (detailed_results, summary_stats)

Examples

from dosemetrics.metrics import dvh from dosemetrics.utils import analysis

metrics = { ... 'mean_dose': dvh.compute_mean_dose, ... 'D95': lambda d, s: dvh.compute_dose_at_volume(d, s, 95) ... } detailed, summary = analysis.analyze_by_dataset( ... dataset, metrics, structure_names=['PTV', 'Heart', 'Lung_L'] ... ) print(summary) # Mean ± std for each metric per structure

Source code in src/dosemetrics/utils/analysis.py
def analyze_by_dataset(
    dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    metrics: Dict[str, callable],
    structure_names: Optional[List[str]] = None,
    summary_stats: bool = True
) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.DataFrame]]:
    """
    Analyze entire dataset with population-level statistics.

    Computes metrics across all subjects and structures, with optional
    summary statistics grouped by structure.

    Parameters
    ----------
    dataset : Dict
        Dataset dictionary
    metrics : Dict[str, callable]
        Metrics to compute
    structure_names : List[str], optional
        Specific structures to analyze
    summary_stats : bool
        If True, return both detailed and summary dataframes

    Returns
    -------
    results : pd.DataFrame or Tuple[pd.DataFrame, pd.DataFrame]
        If summary_stats=False: detailed results
        If summary_stats=True: (detailed_results, summary_stats)

    Examples
    --------
    >>> from dosemetrics.metrics import dvh
    >>> from dosemetrics.utils import analysis
    >>> 
    >>> metrics = {
    ...     'mean_dose': dvh.compute_mean_dose,
    ...     'D95': lambda d, s: dvh.compute_dose_at_volume(d, s, 95)
    ... }
    >>> detailed, summary = analysis.analyze_by_dataset(
    ...     dataset, metrics, structure_names=['PTV', 'Heart', 'Lung_L']
    ... )
    >>> print(summary)  # Mean ± std for each metric per structure
    """
    results = []

    for subject_id, data in dataset.items():
        if 'dose' not in data or 'structures' not in data:
            continue

        dose = data['dose']
        structures = data['structures']

        # Determine which structures to analyze
        if structure_names:
            struct_list = [structures.get_structure(name) for name in structure_names 
                          if name in structures.structure_names]
        else:
            struct_list = list(structures.structures.values())

        for structure in struct_list:
            row = {
                'subject_id': subject_id,
                'structure': structure.name,
                'type': structure.structure_type.value
            }

            # Compute all metrics
            for metric_name, metric_func in metrics.items():
                try:
                    value = metric_func(dose, structure)
                    row[metric_name] = value
                except Exception as e:
                    print(f"Warning: Error computing {metric_name} for {subject_id}/{structure.name}: {e}")
                    row[metric_name] = np.nan

            results.append(row)

    detailed_df = pd.DataFrame(results)

    if not summary_stats:
        return detailed_df

    # Compute summary statistics grouped by structure
    metric_cols = list(metrics.keys())
    summary = detailed_df.groupby('structure')[metric_cols].agg(['mean', 'std', 'min', 'max', 'median'])

    return detailed_df, summary

analyze_subset

analyze_subset(dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]], metrics: Dict[str, callable], subject_filter: Optional[callable] = None, structure_filter: Optional[callable] = None, **filter_kwargs) -> pd.DataFrame

Analyze a filtered subset of the dataset.

Apply custom filters to subjects and/or structures before analysis.

Parameters

dataset : Dict Dataset dictionary metrics : Dict[str, callable] Metrics to compute subject_filter : callable, optional Function that takes (subject_id, data) and returns bool structure_filter : callable, optional Function that takes (structure) and returns bool **filter_kwargs Additional filter parameters

Returns

results : pd.DataFrame Analysis results for filtered subset

Examples
Analyze only target structures

def target_only(structure): ... return structure.structure_type == StructureType.TARGET

results = analysis.analyze_subset( ... dataset, ... metrics={'mean_dose': compute_mean_dose}, ... structure_filter=target_only ... )

Source code in src/dosemetrics/utils/analysis.py
def analyze_subset(
    dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    metrics: Dict[str, callable],
    subject_filter: Optional[callable] = None,
    structure_filter: Optional[callable] = None,
    **filter_kwargs
) -> pd.DataFrame:
    """
    Analyze a filtered subset of the dataset.

    Apply custom filters to subjects and/or structures before analysis.

    Parameters
    ----------
    dataset : Dict
        Dataset dictionary
    metrics : Dict[str, callable]
        Metrics to compute
    subject_filter : callable, optional
        Function that takes (subject_id, data) and returns bool
    structure_filter : callable, optional
        Function that takes (structure) and returns bool
    **filter_kwargs
        Additional filter parameters

    Returns
    -------
    results : pd.DataFrame
        Analysis results for filtered subset

    Examples
    --------
    >>> # Analyze only target structures
    >>> def target_only(structure):
    ...     return structure.structure_type == StructureType.TARGET
    >>> 
    >>> results = analysis.analyze_subset(
    ...     dataset,
    ...     metrics={'mean_dose': compute_mean_dose},
    ...     structure_filter=target_only
    ... )
    """
    results = []

    for subject_id, data in dataset.items():
        # Apply subject filter
        if subject_filter and not subject_filter(subject_id, data):
            continue

        if 'dose' not in data or 'structures' not in data:
            continue

        dose = data['dose']
        structures = data['structures']

        # Filter structures
        if structure_filter:
            struct_list = [s for s in structures.structures.values() if structure_filter(s)]
        else:
            struct_list = list(structures.structures.values())

        for structure in struct_list:
            row = {
                'subject_id': subject_id,
                'structure': structure.name,
                'type': structure.structure_type.value
            }

            # Compute metrics
            for metric_name, metric_func in metrics.items():
                try:
                    value = metric_func(dose, structure)
                    row[metric_name] = value
                except Exception as e:
                    print(f"Warning: Error computing {metric_name} for {subject_id}/{structure.name}: {e}")
                    row[metric_name] = np.nan

            results.append(row)

    return pd.DataFrame(results)

compute_cohort_statistics

compute_cohort_statistics(results: DataFrame, metric_cols: Optional[List[str]] = None, group_by: str = 'structure') -> pd.DataFrame

Compute cohort-level summary statistics.

Parameters

results : pd.DataFrame Results from analyze_by_dataset or similar metric_cols : List[str], optional Columns to summarize (default: all numeric) group_by : str Column to group by (default: 'structure')

Returns

statistics : pd.DataFrame Summary statistics (mean, std, CI, etc.)

Examples

results = analyze_by_dataset(dataset, metrics) stats = compute_cohort_statistics(results[0]) print(stats) # Population statistics per structure

Source code in src/dosemetrics/utils/analysis.py
def compute_cohort_statistics(
    results: pd.DataFrame,
    metric_cols: Optional[List[str]] = None,
    group_by: str = 'structure'
) -> pd.DataFrame:
    """
    Compute cohort-level summary statistics.

    Parameters
    ----------
    results : pd.DataFrame
        Results from analyze_by_dataset or similar
    metric_cols : List[str], optional
        Columns to summarize (default: all numeric)
    group_by : str
        Column to group by (default: 'structure')

    Returns
    -------
    statistics : pd.DataFrame
        Summary statistics (mean, std, CI, etc.)

    Examples
    --------
    >>> results = analyze_by_dataset(dataset, metrics)
    >>> stats = compute_cohort_statistics(results[0])
    >>> print(stats)  # Population statistics per structure
    """
    if metric_cols is None:
        metric_cols = results.select_dtypes(include=[np.number]).columns.tolist()

    summary = results.groupby(group_by)[metric_cols].agg([
        'count',
        'mean',
        'std',
        'min',
        ('q25', lambda x: np.percentile(x, 25)),
        'median',
        ('q75', lambda x: np.percentile(x, 75)),
        'max'
    ])

    # Add confidence intervals
    for col in metric_cols:
        if (group_by, col, 'count') in summary.columns or (col, 'count') in summary.columns:
            n = summary[(col, 'count')] if (col, 'count') in summary.columns else summary[(group_by, col, 'count')]
            std = summary[(col, 'std')] if (col, 'std') in summary.columns else summary[(group_by, col, 'std')]
            se = std / np.sqrt(n)
            summary[(col, 'ci_95')] = 1.96 * se

    return summary

compare_cohorts

compare_cohorts(results1: DataFrame, results2: DataFrame, metric_cols: Optional[List[str]] = None, cohort_names: Tuple[str, str] = ('Cohort1', 'Cohort2')) -> pd.DataFrame

Compare two cohorts statistically.

Performs t-tests and computes effect sizes between two groups.

Parameters

results1, results2 : pd.DataFrame Results from two different cohorts metric_cols : List[str], optional Metrics to compare cohort_names : Tuple[str, str] Names for the cohorts

Returns

comparison : pd.DataFrame Statistical comparison results

Examples

pre_treatment = analyze_by_dataset(pre_data, metrics) post_treatment = analyze_by_dataset(post_data, metrics) comparison = compare_cohorts( ... pre_treatment[0], post_treatment[0], ... cohort_names=('Pre', 'Post') ... )

Source code in src/dosemetrics/utils/analysis.py
def compare_cohorts(
    results1: pd.DataFrame,
    results2: pd.DataFrame,
    metric_cols: Optional[List[str]] = None,
    cohort_names: Tuple[str, str] = ('Cohort1', 'Cohort2')
) -> pd.DataFrame:
    """
    Compare two cohorts statistically.

    Performs t-tests and computes effect sizes between two groups.

    Parameters
    ----------
    results1, results2 : pd.DataFrame
        Results from two different cohorts
    metric_cols : List[str], optional
        Metrics to compare
    cohort_names : Tuple[str, str]
        Names for the cohorts

    Returns
    -------
    comparison : pd.DataFrame
        Statistical comparison results

    Examples
    --------
    >>> pre_treatment = analyze_by_dataset(pre_data, metrics)
    >>> post_treatment = analyze_by_dataset(post_data, metrics)
    >>> comparison = compare_cohorts(
    ...     pre_treatment[0], post_treatment[0],
    ...     cohort_names=('Pre', 'Post')
    ... )
    """
    from scipy import stats

    if metric_cols is None:
        metric_cols = results1.select_dtypes(include=[np.number]).columns.tolist()

    comparison_results = []

    # Get common structures
    structures1 = set(results1['structure'].unique())
    structures2 = set(results2['structure'].unique())
    common_structures = structures1 & structures2

    for structure in common_structures:
        data1 = results1[results1['structure'] == structure]
        data2 = results2[results2['structure'] == structure]

        for metric in metric_cols:
            if metric not in data1.columns or metric not in data2.columns:
                continue

            values1 = data1[metric].dropna()
            values2 = data2[metric].dropna()

            if len(values1) < 2 or len(values2) < 2:
                continue

            # T-test
            t_stat, p_value = stats.ttest_ind(values1, values2)

            # Effect size (Cohen's d)
            pooled_std = np.sqrt(((len(values1)-1)*values1.std()**2 + (len(values2)-1)*values2.std()**2) / 
                                (len(values1) + len(values2) - 2))
            cohens_d = (values1.mean() - values2.mean()) / pooled_std if pooled_std > 0 else 0

            comparison_results.append({
                'structure': structure,
                'metric': metric,
                f'{cohort_names[0]}_mean': values1.mean(),
                f'{cohort_names[0]}_std': values1.std(),
                f'{cohort_names[1]}_mean': values2.mean(),
                f'{cohort_names[1]}_std': values2.std(),
                'difference': values1.mean() - values2.mean(),
                't_statistic': t_stat,
                'p_value': p_value,
                'cohens_d': cohens_d,
                'significant': p_value < 0.05
            })

    return pd.DataFrame(comparison_results)

plot_dvh

plot_dvh(dose: Dose, structure: Structure, bins: int = 1000, relative_volume: bool = True, ax: Optional[Axes] = None, label: Optional[str] = None, color: Optional[str] = None, **plot_kwargs) -> plt.Axes

Plot dose-volume histogram for a single structure.

Parameters

dose : Dose Dose distribution structure : Structure Structure to plot DVH for bins : int Number of bins for DVH computation relative_volume : bool If True, plot relative volume (%), else absolute volume (cc) ax : plt.Axes, optional Axis to plot on (creates new if None) label : str, optional Label for the curve (default: structure name) color : str, optional Color for the curve **plot_kwargs Additional arguments passed to plt.plot()

Returns

ax : plt.Axes The plot axis

Examples

import matplotlib.pyplot as plt from dosemetrics.utils import plot

fig, ax = plt.subplots() plot.plot_dvh(dose, ptv, ax=ax, label='PTV', color='red') plot.plot_dvh(dose, heart, ax=ax, label='Heart', color='blue') plt.legend() plt.show()

Source code in src/dosemetrics/utils/plot.py
def plot_dvh(
    dose: Dose,
    structure: Structure,
    bins: int = 1000,
    relative_volume: bool = True,
    ax: Optional[plt.Axes] = None,
    label: Optional[str] = None,
    color: Optional[str] = None,
    **plot_kwargs
) -> plt.Axes:
    """
    Plot dose-volume histogram for a single structure.

    Parameters
    ----------
    dose : Dose
        Dose distribution
    structure : Structure
        Structure to plot DVH for
    bins : int
        Number of bins for DVH computation
    relative_volume : bool
        If True, plot relative volume (%), else absolute volume (cc)
    ax : plt.Axes, optional
        Axis to plot on (creates new if None)
    label : str, optional
        Label for the curve (default: structure name)
    color : str, optional
        Color for the curve
    **plot_kwargs
        Additional arguments passed to plt.plot()

    Returns
    -------
    ax : plt.Axes
        The plot axis

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from dosemetrics.utils import plot
    >>> 
    >>> fig, ax = plt.subplots()
    >>> plot.plot_dvh(dose, ptv, ax=ax, label='PTV', color='red')
    >>> plot.plot_dvh(dose, heart, ax=ax, label='Heart', color='blue')
    >>> plt.legend()
    >>> plt.show()
    """
    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 6))

    # Compute DVH
    # Convert bins to step_size (approximate)
    max_dose = dose.max_dose
    step_size = max_dose / bins if bins > 0 else 0.1
    dose_bins, volumes = dvh.compute_dvh(dose, structure, step_size=step_size)

    if not relative_volume:
        # DVH returns relative volume by default, convert to absolute if needed
        volumes = volumes / 100.0 * structure.volume_cc if hasattr(structure, 'volume_cc') else volumes

    # Plot
    if label is None:
        label = structure.name

    plot_kwargs.setdefault('linewidth', 2)
    if color:
        plot_kwargs['color'] = color

    ax.plot(dose_bins, volumes, label=label, **plot_kwargs)

    # Format axis
    ax.set_xlabel('Dose (Gy)', fontsize=12)
    if relative_volume:
        ax.set_ylabel('Volume (%)', fontsize=12)
        ax.set_ylim(0, 105)
    else:
        ax.set_ylabel('Volume (cc)', fontsize=12)

    ax.grid(True, alpha=0.3)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    return ax

plot_subject_dvhs

plot_subject_dvhs(dose: Dose, structures: StructureSet, structure_names: Optional[List[str]] = None, bins: int = 1000, relative_volume: bool = True, color_by_type: bool = True, figsize: Tuple[float, float] = (10, 7)) -> Tuple[plt.Figure, plt.Axes]

Plot DVHs for all structures of a subject.

Parameters

dose : Dose Dose distribution structures : StructureSet Structure set structure_names : List[str], optional Specific structures to plot (default: all) bins : int Number of bins relative_volume : bool Plot relative vs absolute volume color_by_type : bool Use different colors for targets vs OARs figsize : Tuple[float, float] Figure size

Returns

fig, ax : Figure and Axes

Examples

from dosemetrics.utils import plot fig, ax = plot.plot_subject_dvhs(dose, structures) plt.savefig('subject_dvhs.png', dpi=300, bbox_inches='tight')

Source code in src/dosemetrics/utils/plot.py
def plot_subject_dvhs(
    dose: Dose,
    structures: StructureSet,
    structure_names: Optional[List[str]] = None,
    bins: int = 1000,
    relative_volume: bool = True,
    color_by_type: bool = True,
    figsize: Tuple[float, float] = (10, 7)
) -> Tuple[plt.Figure, plt.Axes]:
    """
    Plot DVHs for all structures of a subject.

    Parameters
    ----------
    dose : Dose
        Dose distribution
    structures : StructureSet
        Structure set
    structure_names : List[str], optional
        Specific structures to plot (default: all)
    bins : int
        Number of bins
    relative_volume : bool
        Plot relative vs absolute volume
    color_by_type : bool
        Use different colors for targets vs OARs
    figsize : Tuple[float, float]
        Figure size

    Returns
    -------
    fig, ax : Figure and Axes

    Examples
    --------
    >>> from dosemetrics.utils import plot
    >>> fig, ax = plot.plot_subject_dvhs(dose, structures)
    >>> plt.savefig('subject_dvhs.png', dpi=300, bbox_inches='tight')
    """
    fig, ax = plt.subplots(figsize=figsize)

    # Filter structures
    if structure_names:
        struct_list = [structures.get_structure(name) for name in structure_names 
                      if name in structures.structure_names]
    else:
        struct_list = list(structures.structures.values())

    # Assign colors
    if color_by_type:
        from ..structures import StructureType
        colors = {}
        for s in struct_list:
            if s.structure_type == StructureType.TARGET:
                colors[s.name] = TARGET_COLOR
            else:
                colors[s.name] = OAR_COLOR
    else:
        colors = {s.name: DEFAULT_COLORS[i % len(DEFAULT_COLORS)] 
                 for i, s in enumerate(struct_list)}

    # Plot each DVH
    for structure in struct_list:
        plot_dvh(dose, structure, bins=bins, relative_volume=relative_volume,
                ax=ax, color=colors[structure.name])

    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.set_title('Dose-Volume Histograms', fontsize=14, fontweight='bold')

    fig.tight_layout()

    return fig, ax

plot_dvh_comparison

plot_dvh_comparison(dose1: Dose, dose2: Dose, structure: Structure, labels: Tuple[str, str] = ('Dose 1', 'Dose 2'), bins: int = 1000, relative_volume: bool = True, figsize: Tuple[float, float] = (8, 6)) -> Tuple[plt.Figure, plt.Axes]

Compare DVHs from two different dose distributions.

Useful for comparing TPS vs predicted, or different treatment plans.

Parameters

dose1, dose2 : Dose Dose distributions to compare structure : Structure Structure to analyze labels : Tuple[str, str] Labels for the two doses bins : int Number of bins relative_volume : bool Plot relative vs absolute volume figsize : Tuple[float, float] Figure size

Returns

fig, ax : Figure and Axes

Examples

fig, ax = plot.plot_dvh_comparison( ... tps_dose, pred_dose, ptv, ... labels=('TPS', 'Predicted') ... )

Source code in src/dosemetrics/utils/plot.py
def plot_dvh_comparison(
    dose1: Dose,
    dose2: Dose,
    structure: Structure,
    labels: Tuple[str, str] = ('Dose 1', 'Dose 2'),
    bins: int = 1000,
    relative_volume: bool = True,
    figsize: Tuple[float, float] = (8, 6)
) -> Tuple[plt.Figure, plt.Axes]:
    """
    Compare DVHs from two different dose distributions.

    Useful for comparing TPS vs predicted, or different treatment plans.

    Parameters
    ----------
    dose1, dose2 : Dose
        Dose distributions to compare
    structure : Structure
        Structure to analyze
    labels : Tuple[str, str]
        Labels for the two doses
    bins : int
        Number of bins
    relative_volume : bool
        Plot relative vs absolute volume
    figsize : Tuple[float, float]
        Figure size

    Returns
    -------
    fig, ax : Figure and Axes

    Examples
    --------
    >>> fig, ax = plot.plot_dvh_comparison(
    ...     tps_dose, pred_dose, ptv,
    ...     labels=('TPS', 'Predicted')
    ... )
    """
    fig, ax = plt.subplots(figsize=figsize)

    # Plot both DVHs
    plot_dvh(dose1, structure, bins=bins, relative_volume=relative_volume,
            ax=ax, label=labels[0], color='#1f77b4', linestyle='-')
    plot_dvh(dose2, structure, bins=bins, relative_volume=relative_volume,
            ax=ax, label=labels[1], color='#ff7f0e', linestyle='--')

    ax.legend()
    ax.set_title(f'DVH Comparison: {structure.name}', fontsize=14, fontweight='bold')

    fig.tight_layout()

    return fig, ax

plot_dvh_band

plot_dvh_band(dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]], structure_name: str, bins: int = 1000, relative_volume: bool = True, percentiles: Tuple[float, float] = (25, 75), show_median: bool = True, show_individual: bool = False, ax: Optional[Axes] = None, color: Optional[str] = None, label: Optional[str] = None) -> plt.Axes

Plot DVH band showing population statistics.

Creates a band plot showing median and interquartile range across multiple subjects for a single structure.

Parameters

dataset : Dict Dataset dictionary from batch.load_dataset() structure_name : str Structure to plot bins : int Number of bins relative_volume : bool Plot relative vs absolute volume percentiles : Tuple[float, float] Lower and upper percentiles for band show_median : bool Whether to show median curve show_individual : bool Whether to show individual DVHs with transparency ax : plt.Axes, optional Axis to plot on color : str, optional Color for the band label : str, optional Label for the legend

Returns

ax : plt.Axes

Examples

fig, ax = plt.subplots() plot.plot_dvh_band(dataset, 'PTV', ax=ax, color='red', label='PTV') plot.plot_dvh_band(dataset, 'Heart', ax=ax, color='blue', label='Heart') plt.legend()

Source code in src/dosemetrics/utils/plot.py
def plot_dvh_band(
    dataset: Dict[str, Dict[str, Union[Dose, StructureSet]]],
    structure_name: str,
    bins: int = 1000,
    relative_volume: bool = True,
    percentiles: Tuple[float, float] = (25, 75),
    show_median: bool = True,
    show_individual: bool = False,
    ax: Optional[plt.Axes] = None,
    color: Optional[str] = None,
    label: Optional[str] = None
) -> plt.Axes:
    """
    Plot DVH band showing population statistics.

    Creates a band plot showing median and interquartile range across
    multiple subjects for a single structure.

    Parameters
    ----------
    dataset : Dict
        Dataset dictionary from batch.load_dataset()
    structure_name : str
        Structure to plot
    bins : int
        Number of bins
    relative_volume : bool
        Plot relative vs absolute volume
    percentiles : Tuple[float, float]
        Lower and upper percentiles for band
    show_median : bool
        Whether to show median curve
    show_individual : bool
        Whether to show individual DVHs with transparency
    ax : plt.Axes, optional
        Axis to plot on
    color : str, optional
        Color for the band
    label : str, optional
        Label for the legend

    Returns
    -------
    ax : plt.Axes

    Examples
    --------
    >>> fig, ax = plt.subplots()
    >>> plot.plot_dvh_band(dataset, 'PTV', ax=ax, color='red', label='PTV')
    >>> plot.plot_dvh_band(dataset, 'Heart', ax=ax, color='blue', label='Heart')
    >>> plt.legend()
    """
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 7))

    # Collect DVHs from all subjects
    all_dvhs = []
    max_dose = 0

    for subject_id, data in dataset.items():
        if 'dose' not in data or 'structures' not in data:
            continue

        dose = data['dose']
        structures = data['structures']
        structure = structures.get_structure(structure_name) if structure_name in structures else None

        if structure is None:
            continue

        try:
            max_dose_val = dose.max_dose
            step_size = max_dose_val / bins if bins > 0 else 0.1
            dose_bins, volumes = dvh.compute_dvh(dose, structure, step_size=step_size)

            # volumes are already in percentage (0-100)

            all_dvhs.append((dose_bins, volumes))
            max_dose = max(max_dose, dose_bins[-1])

            # Plot individual if requested
            if show_individual:
                ax.plot(dose_bins, volumes, alpha=0.1, color=color or 'gray', linewidth=1)

        except Exception as e:
            print(f"Warning: Error computing DVH for {subject_id}/{structure_name}: {e}")

    if not all_dvhs:
        print(f"No valid DVHs found for {structure_name}")
        return ax

    # Create common dose axis
    common_doses = np.linspace(0, max_dose, bins)

    # Interpolate all DVHs to common dose axis
    interpolated_dvhs = []
    for dose_bins, volumes in all_dvhs:
        interp_volumes = np.interp(common_doses, dose_bins, volumes)
        interpolated_dvhs.append(interp_volumes)

    dvh_array = np.array(interpolated_dvhs)

    # Compute statistics
    median_dvh = np.median(dvh_array, axis=0)
    lower_percentile = np.percentile(dvh_array, percentiles[0], axis=0)
    upper_percentile = np.percentile(dvh_array, percentiles[1], axis=0)

    # Plot band
    if color is None:
        color = DEFAULT_COLORS[0]

    ax.fill_between(common_doses, lower_percentile, upper_percentile,
                    alpha=0.3, color=color, label=f'{label or structure_name} (IQR)')

    if show_median:
        ax.plot(common_doses, median_dvh, color=color, linewidth=2,
               label=f'{label or structure_name} (median)')

    # Format
    ax.set_xlabel('Dose (Gy)', fontsize=12)
    if relative_volume:
        ax.set_ylabel('Volume (%)', fontsize=12)
        ax.set_ylim(0, 105)
    else:
        ax.set_ylabel('Volume (cc)', fontsize=12)

    ax.grid(True, alpha=0.3)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    return ax

plot_metric_boxplot

plot_metric_boxplot(results: DataFrame, metric: str, group_by: str = 'structure', figsize: Tuple[float, float] = (10, 6), show_points: bool = True, horizontal: bool = False) -> Tuple[plt.Figure, plt.Axes]

Create box plot for a metric across structures or subjects.

Parameters

results : pd.DataFrame Results from analysis functions metric : str Metric column to plot group_by : str Column to group by ('structure' or 'subject_id') figsize : Tuple[float, float] Figure size show_points : bool Whether to show individual data points horizontal : bool Whether to make horizontal box plot

Returns

fig, ax : Figure and Axes

Examples

from dosemetrics.utils import analysis, plot results = analysis.analyze_by_dataset(dataset, metrics) fig, ax = plot.plot_metric_boxplot(results[0], 'mean_dose')

Source code in src/dosemetrics/utils/plot.py
def plot_metric_boxplot(
    results: pd.DataFrame,
    metric: str,
    group_by: str = 'structure',
    figsize: Tuple[float, float] = (10, 6),
    show_points: bool = True,
    horizontal: bool = False
) -> Tuple[plt.Figure, plt.Axes]:
    """
    Create box plot for a metric across structures or subjects.

    Parameters
    ----------
    results : pd.DataFrame
        Results from analysis functions
    metric : str
        Metric column to plot
    group_by : str
        Column to group by ('structure' or 'subject_id')
    figsize : Tuple[float, float]
        Figure size
    show_points : bool
        Whether to show individual data points
    horizontal : bool
        Whether to make horizontal box plot

    Returns
    -------
    fig, ax : Figure and Axes

    Examples
    --------
    >>> from dosemetrics.utils import analysis, plot
    >>> results = analysis.analyze_by_dataset(dataset, metrics)
    >>> fig, ax = plot.plot_metric_boxplot(results[0], 'mean_dose')
    """
    fig, ax = plt.subplots(figsize=figsize)

    # Prepare data
    groups = results[group_by].unique()
    data = [results[results[group_by] == g][metric].dropna() for g in groups]

    # Create box plot
    if horizontal:
        bp = ax.boxplot(data, labels=groups, vert=False, patch_artist=True)
        ax.set_xlabel(metric, fontsize=12)
        ax.set_ylabel(group_by.replace('_', ' ').title(), fontsize=12)
    else:
        bp = ax.boxplot(data, labels=groups, patch_artist=True)
        ax.set_ylabel(metric, fontsize=12)
        ax.set_xlabel(group_by.replace('_', ' ').title(), fontsize=12)
        plt.xticks(rotation=45, ha='right')

    # Color boxes
    for patch in bp['boxes']:
        patch.set_facecolor(DEFAULT_COLORS[0])
        patch.set_alpha(0.6)

    # Add individual points
    if show_points:
        for i, (group, d) in enumerate(zip(groups, data)):
            x = np.random.normal(i + 1, 0.04, size=len(d))
            ax.plot(x, d, 'o', alpha=0.3, color='black', markersize=4)

    ax.grid(True, alpha=0.3, axis='y')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    fig.tight_layout()

    return fig, ax

plot_metric_comparison

plot_metric_comparison(results1: DataFrame, results2: DataFrame, metric: str, cohort_names: Tuple[str, str] = ('Cohort 1', 'Cohort 2'), structure_names: Optional[List[str]] = None, figsize: Tuple[float, float] = (12, 6)) -> Tuple[plt.Figure, plt.Axes]

Compare a metric between two cohorts.

Creates side-by-side box plots for comparison.

Parameters

results1, results2 : pd.DataFrame Results from two cohorts metric : str Metric to compare cohort_names : Tuple[str, str] Names for the cohorts structure_names : List[str], optional Specific structures to include figsize : Tuple[float, float] Figure size

Returns

fig, ax : Figure and Axes

Examples

fig, ax = plot.plot_metric_comparison( ... pre_results, post_results, 'mean_dose', ... cohort_names=('Pre-treatment', 'Post-treatment') ... )

Source code in src/dosemetrics/utils/plot.py
def plot_metric_comparison(
    results1: pd.DataFrame,
    results2: pd.DataFrame,
    metric: str,
    cohort_names: Tuple[str, str] = ('Cohort 1', 'Cohort 2'),
    structure_names: Optional[List[str]] = None,
    figsize: Tuple[float, float] = (12, 6)
) -> Tuple[plt.Figure, plt.Axes]:
    """
    Compare a metric between two cohorts.

    Creates side-by-side box plots for comparison.

    Parameters
    ----------
    results1, results2 : pd.DataFrame
        Results from two cohorts
    metric : str
        Metric to compare
    cohort_names : Tuple[str, str]
        Names for the cohorts
    structure_names : List[str], optional
        Specific structures to include
    figsize : Tuple[float, float]
        Figure size

    Returns
    -------
    fig, ax : Figure and Axes

    Examples
    --------
    >>> fig, ax = plot.plot_metric_comparison(
    ...     pre_results, post_results, 'mean_dose',
    ...     cohort_names=('Pre-treatment', 'Post-treatment')
    ... )
    """
    fig, ax = plt.subplots(figsize=figsize)

    # Filter structures if specified
    if structure_names:
        results1 = results1[results1['structure'].isin(structure_names)]
        results2 = results2[results2['structure'].isin(structure_names)]

    # Get common structures
    structures1 = set(results1['structure'].unique())
    structures2 = set(results2['structure'].unique())
    common_structures = sorted(structures1 & structures2)

    if not common_structures:
        print("No common structures found")
        return fig, ax

    # Prepare data for grouped box plot
    x_pos = np.arange(len(common_structures))
    width = 0.35

    means1 = [results1[results1['structure'] == s][metric].mean() for s in common_structures]
    means2 = [results2[results2['structure'] == s][metric].mean() for s in common_structures]

    stds1 = [results1[results1['structure'] == s][metric].std() for s in common_structures]
    stds2 = [results2[results2['structure'] == s][metric].std() for s in common_structures]

    # Create bars
    ax.bar(x_pos - width/2, means1, width, label=cohort_names[0],
          yerr=stds1, capsize=5, alpha=0.8, color=DEFAULT_COLORS[0])
    ax.bar(x_pos + width/2, means2, width, label=cohort_names[1],
          yerr=stds2, capsize=5, alpha=0.8, color=DEFAULT_COLORS[1])

    # Format
    ax.set_ylabel(metric, fontsize=12)
    ax.set_xlabel('Structure', fontsize=12)
    ax.set_title(f'{metric} Comparison', fontsize=14, fontweight='bold')
    ax.set_xticks(x_pos)
    ax.set_xticklabels(common_structures, rotation=45, ha='right')
    ax.legend()
    ax.grid(True, alpha=0.3, axis='y')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    fig.tight_layout()

    return fig, ax

plot_dose_slice

plot_dose_slice(dose: Dose, slice_idx: Optional[int] = None, axis: int = 2, structures: Optional[StructureSet] = None, structure_names: Optional[List[str]] = None, vmin: Optional[float] = None, vmax: Optional[float] = None, cmap: str = 'viridis', show_colorbar: bool = True, figsize: Tuple[float, float] = (10, 8)) -> Tuple[plt.Figure, plt.Axes]

Plot a 2D slice of dose distribution with optional structure contours.

Parameters

dose : Dose Dose distribution slice_idx : int, optional Slice index (default: middle slice) axis : int Axis to slice along (0=sagittal, 1=coronal, 2=axial) structures : StructureSet, optional Structures to overlay structure_names : List[str], optional Specific structures to show vmin, vmax : float, optional Dose value range for colormap cmap : str Colormap name show_colorbar : bool Whether to show colorbar figsize : Tuple[float, float] Figure size

Returns

fig, ax : Figure and Axes

Examples

fig, ax = plot.plot_dose_slice( ... dose, structures=structures, ... structure_names=['PTV', 'Heart'] ... )

Source code in src/dosemetrics/utils/plot.py
def plot_dose_slice(
    dose: Dose,
    slice_idx: Optional[int] = None,
    axis: int = 2,
    structures: Optional[StructureSet] = None,
    structure_names: Optional[List[str]] = None,
    vmin: Optional[float] = None,
    vmax: Optional[float] = None,
    cmap: str = 'viridis',
    show_colorbar: bool = True,
    figsize: Tuple[float, float] = (10, 8)
) -> Tuple[plt.Figure, plt.Axes]:
    """
    Plot a 2D slice of dose distribution with optional structure contours.

    Parameters
    ----------
    dose : Dose
        Dose distribution
    slice_idx : int, optional
        Slice index (default: middle slice)
    axis : int
        Axis to slice along (0=sagittal, 1=coronal, 2=axial)
    structures : StructureSet, optional
        Structures to overlay
    structure_names : List[str], optional
        Specific structures to show
    vmin, vmax : float, optional
        Dose value range for colormap
    cmap : str
        Colormap name
    show_colorbar : bool
        Whether to show colorbar
    figsize : Tuple[float, float]
        Figure size

    Returns
    -------
    fig, ax : Figure and Axes

    Examples
    --------
    >>> fig, ax = plot.plot_dose_slice(
    ...     dose, structures=structures,
    ...     structure_names=['PTV', 'Heart']
    ... )
    """
    fig, ax = plt.subplots(figsize=figsize)

    # Get middle slice if not specified
    if slice_idx is None:
        slice_idx = dose.dose_array.shape[axis] // 2

    # Extract slice
    if axis == 0:
        dose_slice = dose.dose_array[slice_idx, :, :]
    elif axis == 1:
        dose_slice = dose.dose_array[:, slice_idx, :]
    else:  # axis == 2
        dose_slice = dose.dose_array[:, :, slice_idx]

    # Plot dose
    im = ax.imshow(dose_slice.T, origin='lower', cmap=cmap, vmin=vmin, vmax=vmax,
                   aspect='equal', interpolation='bilinear')

    # Add colorbar
    if show_colorbar:
        cbar = plt.colorbar(im, ax=ax)
        cbar.set_label('Dose (Gy)', fontsize=12)

    # Overlay structure contours
    if structures:
        struct_list = [s for s in structures if s.name in structure_names] if structure_names else list(structures)

        for i, structure in enumerate(struct_list):
            # Get contour on this slice
            # Note: This is a simplified version - actual implementation would need
            # proper coordinate transformation and contour extraction
            color = DEFAULT_COLORS[i % len(DEFAULT_COLORS)]

            # Placeholder for contour plotting
            # In practice, you'd extract the contour points for this slice
            # and plot them using ax.plot()

    ax.set_xlabel('X (pixels)', fontsize=12)
    ax.set_ylabel('Y (pixels)', fontsize=12)
    ax.set_title(f'Dose Distribution - Slice {slice_idx}', fontsize=14, fontweight='bold')

    fig.tight_layout()

    return fig, ax

save_figure

save_figure(fig: Figure, filepath: Union[str, Path], dpi: int = 300, formats: List[str] = ['png'], **savefig_kwargs) -> None

Save figure in multiple formats with publication-quality settings.

Parameters

fig : plt.Figure Figure to save filepath : str or Path Output path (without extension) dpi : int Resolution for raster formats formats : List[str] Formats to save (e.g., ['png', 'pdf', 'svg']) **savefig_kwargs Additional arguments for fig.savefig()

Examples

fig, ax = plot.plot_dvh(dose, structure) plot.save_figure(fig, 'figures/ptv_dvh', formats=['png', 'pdf'])

Source code in src/dosemetrics/utils/plot.py
def save_figure(
    fig: plt.Figure,
    filepath: Union[str, Path],
    dpi: int = 300,
    formats: List[str] = ['png'],
    **savefig_kwargs
) -> None:
    """
    Save figure in multiple formats with publication-quality settings.

    Parameters
    ----------
    fig : plt.Figure
        Figure to save
    filepath : str or Path
        Output path (without extension)
    dpi : int
        Resolution for raster formats
    formats : List[str]
        Formats to save (e.g., ['png', 'pdf', 'svg'])
    **savefig_kwargs
        Additional arguments for fig.savefig()

    Examples
    --------
    >>> fig, ax = plot.plot_dvh(dose, structure)
    >>> plot.save_figure(fig, 'figures/ptv_dvh', formats=['png', 'pdf'])
    """
    filepath = Path(filepath)
    filepath.parent.mkdir(parents=True, exist_ok=True)

    savefig_kwargs.setdefault('bbox_inches', 'tight')
    savefig_kwargs.setdefault('dpi', dpi)

    for fmt in formats:
        output_path = filepath.with_suffix(f'.{fmt}')
        fig.savefig(output_path, **savefig_kwargs)
        print(f"Saved: {output_path}")