Skip to content

spatial_graph_algorithms.compare

Helpers for comparative simulation and reconstruction studies. run_comparison() returns a ComparisonResult with built-in summary, ranking, plotting, and I/O.

API Reference

spatial_graph_algorithms.compare.ComparisonResult dataclass

Results of a multi-method reconstruction comparison study.

Wraps the raw tidy DataFrame produced by :func:run_comparison and exposes convenience methods for summarising, ranking, and plotting without boilerplate pandas.

The raw DataFrame is always accessible via :attr:df.

Parameters:

Name Type Description Default
df DataFrame

One row per graph_spec × seed × reconstruction_spec.

required

Examples:

>>> from spatial_graph_algorithms.compare import parameter_grid, run_comparison
>>> graphs = parameter_grid(cases=[{"n": 50, "mode": "knn", "k": 4}])
>>> recons = parameter_grid(cases=[{"method": "mds"}])
>>> result = run_comparison(graph_specs=graphs, reconstruction_specs=recons, seeds=[1])
>>> isinstance(result.df, pd.DataFrame)
True
Source code in src/spatial_graph_algorithms/compare/__init__.py
@dataclass
class ComparisonResult:
    """Results of a multi-method reconstruction comparison study.

    Wraps the raw tidy DataFrame produced by :func:`run_comparison` and
    exposes convenience methods for summarising, ranking, and plotting
    without boilerplate pandas.

    The raw DataFrame is always accessible via :attr:`df`.

    Parameters
    ----------
    df : pandas.DataFrame
        One row per ``graph_spec × seed × reconstruction_spec``.

    Examples
    --------
    >>> from spatial_graph_algorithms.compare import parameter_grid, run_comparison
    >>> graphs = parameter_grid(cases=[{"n": 50, "mode": "knn", "k": 4}])
    >>> recons = parameter_grid(cases=[{"method": "mds"}])
    >>> result = run_comparison(graph_specs=graphs, reconstruction_specs=recons, seeds=[1])
    >>> isinstance(result.df, pd.DataFrame)
    True
    """

    df: pd.DataFrame

    # ------------------------------------------------------------------
    # Analysis helpers
    # ------------------------------------------------------------------

    def summary(
        self,
        *,
        by: list[str] | None = None,
        metrics: list[str] | None = None,
    ) -> pd.DataFrame:
        """Return mean metrics grouped by method and graph condition.

        Only rows with ``status == "ok"`` are included.

        Parameters
        ----------
        by : list of str, optional
            Columns to group by.  Default is ``["graph_label", "method"]``.
        metrics : list of str, optional
            Metric columns to aggregate.  Default is ``["cpd", "knn"]``.

        Returns
        -------
        pandas.DataFrame
            Mean of each metric for each group.  Groups with no successful
            rows are absent.

        Examples
        --------
        >>> result.summary()  # doctest: +SKIP
                                         cpd    knn
        graph_label  method
        mode=knn__k=4  landmark_mds  0.8821  0.7341
                       mds           0.7512  0.6103
        """
        by = list(by) if by is not None else _DEFAULT_SUMMARY_BY
        metrics = list(metrics) if metrics is not None else _DEFAULT_METRICS
        ok = self.df[self.df["status"] == "ok"]
        present = [m for m in metrics if m in ok.columns]
        if not present:
            return pd.DataFrame()
        return ok.groupby(by)[present].mean().round(4)

    def best(
        self,
        *,
        metric: str = "cpd",
        by: list[str] | None = None,
        higher_is_better: bool = True,
    ) -> pd.DataFrame:
        """Return the best-performing method per group.

        Parameters
        ----------
        metric : str
            Metric column to rank by.  Default is ``"cpd"``.
        by : list of str, optional
            Grouping columns.  Default is ``["graph_label"]``.
        higher_is_better : bool
            If ``True`` (default), select the row with the highest metric
            value.  Set to ``False`` for error / loss metrics.

        Returns
        -------
        pandas.DataFrame
            One row per unique ``by`` group, showing the best method and its
            mean metric value (averaged over seeds).

        Raises
        ------
        ValueError
            If ``metric`` is not a column in the results DataFrame.

        Examples
        --------
        >>> result.best(metric="cpd")  # doctest: +SKIP
              graph_label        method     cpd
        0  mode=knn__k=4  landmark_mds  0.8821
        """
        by_cols = list(by) if by is not None else _DEFAULT_BEST_BY
        ok = self.df[self.df["status"] == "ok"]
        if metric not in ok.columns:
            available = [c for c in ok.columns if ok[c].dtype.kind == "f"]
            raise ValueError(
                f"Metric {metric!r} not found in results.  "
                f"Available numeric columns: {available}"
            )
        grp_cols = by_cols + ["method"]
        agg = ok.groupby(grp_cols)[metric].mean().reset_index()
        if higher_is_better:
            idx = agg.groupby(by_cols)[metric].idxmax()
        else:
            idx = agg.groupby(by_cols)[metric].idxmin()
        return agg.loc[idx.values].reset_index(drop=True)

    # ------------------------------------------------------------------
    # Plotting
    # ------------------------------------------------------------------

    def plot(
        self,
        *,
        metric: str = "cpd",
        by: str = "method",
        hue: str | None = "graph_label",
        ax: plt.Axes | None = None,
    ) -> plt.Figure:
        """Bar chart of a quality metric grouped by method and condition.

        Means are computed over all successful rows (``status == "ok"``).
        Error bars show one standard deviation.

        Parameters
        ----------
        metric : str
            Metric column to plot.  Default is ``"cpd"``.
        by : str
            Column that defines the x-axis categories.  Default is
            ``"method"``.
        hue : str, optional
            Column that defines the colour grouping.  Default is
            ``"graph_label"``.  Pass ``None`` for a single-colour chart.
        ax : matplotlib.axes.Axes, optional
            Axes to draw on.  A new figure is created when omitted.

        Returns
        -------
        matplotlib.figure.Figure

        Raises
        ------
        ValueError
            If ``metric`` is not a column in the results DataFrame.

        Examples
        --------
        >>> fig = result.plot(metric="cpd", by="method")  # doctest: +SKIP
        """
        import matplotlib.pyplot as plt

        ok = self.df[self.df["status"] == "ok"]
        if metric not in ok.columns:
            raise ValueError(f"Metric {metric!r} not found in results.")

        group_cols = ([hue, by] if hue is not None and hue in ok.columns else [by])
        effective_hue = hue if hue in group_cols else None

        agg = ok.groupby(group_cols)[metric].agg(["mean", "std"]).reset_index()

        if ax is None:
            fig, ax = plt.subplots(figsize=(max(5, len(agg) * 0.9), 4))
        else:
            fig = ax.get_figure()

        by_vals = sorted(agg[by].unique())
        colors = plt.cm.tab10.colors  # type: ignore[attr-defined]

        if effective_hue is not None:
            hue_vals = sorted(agg[effective_hue].unique())
            n_hue = len(hue_vals)
            width = 0.75 / n_hue
            for i, hue_val in enumerate(hue_vals):
                sub = agg[agg[effective_hue] == hue_val]
                x_map = {v: j for j, v in enumerate(by_vals)}
                x_pos = [x_map[v] + (i - n_hue / 2 + 0.5) * width for v in sub[by]]
                ax.bar(
                    x_pos,
                    sub["mean"],
                    width=width,
                    label=str(hue_val),
                    yerr=sub["std"].fillna(0),
                    capsize=3,
                    color=colors[i % len(colors)],
                    alpha=0.85,
                )
            ax.legend(title=effective_hue, bbox_to_anchor=(1.01, 1), loc="upper left")
        else:
            means = [agg.loc[agg[by] == v, "mean"].mean() for v in by_vals]
            stds = [agg.loc[agg[by] == v, "std"].mean() for v in by_vals]
            ax.bar(
                range(len(by_vals)),
                means,
                yerr=[s if pd.notna(s) else 0 for s in stds],
                capsize=3,
                color=colors[0],
                alpha=0.85,
            )

        ax.set_xticks(range(len(by_vals)))
        ax.set_xticklabels(by_vals, rotation=15, ha="right")
        ax.set_xlabel(by)
        ax.set_ylabel(metric)
        title = f"{metric} by {by}"
        if effective_hue:
            title += f", coloured by {effective_hue}"
        ax.set_title(title)
        fig.tight_layout()
        return fig

    # ------------------------------------------------------------------
    # I/O
    # ------------------------------------------------------------------

    def save(self, path: str | Path) -> None:
        """Save results to a CSV or Parquet file.

        The format is inferred from the file extension (``.parquet`` → Parquet,
        anything else → CSV).

        Parameters
        ----------
        path : str or Path
            Destination path.  Parent directories are created automatically.

        Examples
        --------
        >>> result.save("results/comparison.csv")  # doctest: +SKIP
        """
        p = Path(path)
        p.parent.mkdir(parents=True, exist_ok=True)
        if p.suffix == ".parquet":
            self.df.to_parquet(p, index=False)
        else:
            self.df.to_csv(p, index=False)

    @classmethod
    def load(cls, path: str | Path) -> ComparisonResult:
        """Load results previously saved with :meth:`save`.

        Parameters
        ----------
        path : str or Path
            Path to a CSV or Parquet file created by :meth:`save`.

        Returns
        -------
        ComparisonResult

        Examples
        --------
        >>> result = ComparisonResult.load("results/comparison.csv")  # doctest: +SKIP
        """
        p = Path(path)
        if p.suffix == ".parquet":
            return cls(df=pd.read_parquet(p))
        return cls(df=pd.read_csv(p))

    # ------------------------------------------------------------------
    # Display
    # ------------------------------------------------------------------

    def __repr__(self) -> str:
        n_total = len(self.df)
        n_ok = int((self.df["status"] == "ok").sum()) if "status" in self.df.columns else n_total
        n_err = n_total - n_ok
        metrics = [m for m in _DEFAULT_METRICS if m in self.df.columns]
        return (
            f"ComparisonResult(rows={n_total}, ok={n_ok}, errors={n_err}, "
            f"metrics={metrics})"
        )

    def _repr_html_(self) -> str:
        return self.df._repr_html_()  # type: ignore[return-value]

Functions

summary(*, by=None, metrics=None)

Return mean metrics grouped by method and graph condition.

Only rows with status == "ok" are included.

Parameters:

Name Type Description Default
by list of str

Columns to group by. Default is ["graph_label", "method"].

None
metrics list of str

Metric columns to aggregate. Default is ["cpd", "knn"].

None

Returns:

Type Description
DataFrame

Mean of each metric for each group. Groups with no successful rows are absent.

Examples:

>>> result.summary()
                                 cpd    knn
graph_label  method
mode=knn__k=4  landmark_mds  0.8821  0.7341
               mds           0.7512  0.6103
Source code in src/spatial_graph_algorithms/compare/__init__.py
def summary(
    self,
    *,
    by: list[str] | None = None,
    metrics: list[str] | None = None,
) -> pd.DataFrame:
    """Return mean metrics grouped by method and graph condition.

    Only rows with ``status == "ok"`` are included.

    Parameters
    ----------
    by : list of str, optional
        Columns to group by.  Default is ``["graph_label", "method"]``.
    metrics : list of str, optional
        Metric columns to aggregate.  Default is ``["cpd", "knn"]``.

    Returns
    -------
    pandas.DataFrame
        Mean of each metric for each group.  Groups with no successful
        rows are absent.

    Examples
    --------
    >>> result.summary()  # doctest: +SKIP
                                     cpd    knn
    graph_label  method
    mode=knn__k=4  landmark_mds  0.8821  0.7341
                   mds           0.7512  0.6103
    """
    by = list(by) if by is not None else _DEFAULT_SUMMARY_BY
    metrics = list(metrics) if metrics is not None else _DEFAULT_METRICS
    ok = self.df[self.df["status"] == "ok"]
    present = [m for m in metrics if m in ok.columns]
    if not present:
        return pd.DataFrame()
    return ok.groupby(by)[present].mean().round(4)

best(*, metric='cpd', by=None, higher_is_better=True)

Return the best-performing method per group.

Parameters:

Name Type Description Default
metric str

Metric column to rank by. Default is "cpd".

'cpd'
by list of str

Grouping columns. Default is ["graph_label"].

None
higher_is_better bool

If True (default), select the row with the highest metric value. Set to False for error / loss metrics.

True

Returns:

Type Description
DataFrame

One row per unique by group, showing the best method and its mean metric value (averaged over seeds).

Raises:

Type Description
ValueError

If metric is not a column in the results DataFrame.

Examples:

>>> result.best(metric="cpd")
      graph_label        method     cpd
0  mode=knn__k=4  landmark_mds  0.8821
Source code in src/spatial_graph_algorithms/compare/__init__.py
def best(
    self,
    *,
    metric: str = "cpd",
    by: list[str] | None = None,
    higher_is_better: bool = True,
) -> pd.DataFrame:
    """Return the best-performing method per group.

    Parameters
    ----------
    metric : str
        Metric column to rank by.  Default is ``"cpd"``.
    by : list of str, optional
        Grouping columns.  Default is ``["graph_label"]``.
    higher_is_better : bool
        If ``True`` (default), select the row with the highest metric
        value.  Set to ``False`` for error / loss metrics.

    Returns
    -------
    pandas.DataFrame
        One row per unique ``by`` group, showing the best method and its
        mean metric value (averaged over seeds).

    Raises
    ------
    ValueError
        If ``metric`` is not a column in the results DataFrame.

    Examples
    --------
    >>> result.best(metric="cpd")  # doctest: +SKIP
          graph_label        method     cpd
    0  mode=knn__k=4  landmark_mds  0.8821
    """
    by_cols = list(by) if by is not None else _DEFAULT_BEST_BY
    ok = self.df[self.df["status"] == "ok"]
    if metric not in ok.columns:
        available = [c for c in ok.columns if ok[c].dtype.kind == "f"]
        raise ValueError(
            f"Metric {metric!r} not found in results.  "
            f"Available numeric columns: {available}"
        )
    grp_cols = by_cols + ["method"]
    agg = ok.groupby(grp_cols)[metric].mean().reset_index()
    if higher_is_better:
        idx = agg.groupby(by_cols)[metric].idxmax()
    else:
        idx = agg.groupby(by_cols)[metric].idxmin()
    return agg.loc[idx.values].reset_index(drop=True)

plot(*, metric='cpd', by='method', hue='graph_label', ax=None)

Bar chart of a quality metric grouped by method and condition.

Means are computed over all successful rows (status == "ok"). Error bars show one standard deviation.

Parameters:

Name Type Description Default
metric str

Metric column to plot. Default is "cpd".

'cpd'
by str

Column that defines the x-axis categories. Default is "method".

'method'
hue str

Column that defines the colour grouping. Default is "graph_label". Pass None for a single-colour chart.

'graph_label'
ax Axes

Axes to draw on. A new figure is created when omitted.

None

Returns:

Type Description
Figure

Raises:

Type Description
ValueError

If metric is not a column in the results DataFrame.

Examples:

>>> fig = result.plot(metric="cpd", by="method")
Source code in src/spatial_graph_algorithms/compare/__init__.py
def plot(
    self,
    *,
    metric: str = "cpd",
    by: str = "method",
    hue: str | None = "graph_label",
    ax: plt.Axes | None = None,
) -> plt.Figure:
    """Bar chart of a quality metric grouped by method and condition.

    Means are computed over all successful rows (``status == "ok"``).
    Error bars show one standard deviation.

    Parameters
    ----------
    metric : str
        Metric column to plot.  Default is ``"cpd"``.
    by : str
        Column that defines the x-axis categories.  Default is
        ``"method"``.
    hue : str, optional
        Column that defines the colour grouping.  Default is
        ``"graph_label"``.  Pass ``None`` for a single-colour chart.
    ax : matplotlib.axes.Axes, optional
        Axes to draw on.  A new figure is created when omitted.

    Returns
    -------
    matplotlib.figure.Figure

    Raises
    ------
    ValueError
        If ``metric`` is not a column in the results DataFrame.

    Examples
    --------
    >>> fig = result.plot(metric="cpd", by="method")  # doctest: +SKIP
    """
    import matplotlib.pyplot as plt

    ok = self.df[self.df["status"] == "ok"]
    if metric not in ok.columns:
        raise ValueError(f"Metric {metric!r} not found in results.")

    group_cols = ([hue, by] if hue is not None and hue in ok.columns else [by])
    effective_hue = hue if hue in group_cols else None

    agg = ok.groupby(group_cols)[metric].agg(["mean", "std"]).reset_index()

    if ax is None:
        fig, ax = plt.subplots(figsize=(max(5, len(agg) * 0.9), 4))
    else:
        fig = ax.get_figure()

    by_vals = sorted(agg[by].unique())
    colors = plt.cm.tab10.colors  # type: ignore[attr-defined]

    if effective_hue is not None:
        hue_vals = sorted(agg[effective_hue].unique())
        n_hue = len(hue_vals)
        width = 0.75 / n_hue
        for i, hue_val in enumerate(hue_vals):
            sub = agg[agg[effective_hue] == hue_val]
            x_map = {v: j for j, v in enumerate(by_vals)}
            x_pos = [x_map[v] + (i - n_hue / 2 + 0.5) * width for v in sub[by]]
            ax.bar(
                x_pos,
                sub["mean"],
                width=width,
                label=str(hue_val),
                yerr=sub["std"].fillna(0),
                capsize=3,
                color=colors[i % len(colors)],
                alpha=0.85,
            )
        ax.legend(title=effective_hue, bbox_to_anchor=(1.01, 1), loc="upper left")
    else:
        means = [agg.loc[agg[by] == v, "mean"].mean() for v in by_vals]
        stds = [agg.loc[agg[by] == v, "std"].mean() for v in by_vals]
        ax.bar(
            range(len(by_vals)),
            means,
            yerr=[s if pd.notna(s) else 0 for s in stds],
            capsize=3,
            color=colors[0],
            alpha=0.85,
        )

    ax.set_xticks(range(len(by_vals)))
    ax.set_xticklabels(by_vals, rotation=15, ha="right")
    ax.set_xlabel(by)
    ax.set_ylabel(metric)
    title = f"{metric} by {by}"
    if effective_hue:
        title += f", coloured by {effective_hue}"
    ax.set_title(title)
    fig.tight_layout()
    return fig

save(path)

Save results to a CSV or Parquet file.

The format is inferred from the file extension (.parquet → Parquet, anything else → CSV).

Parameters:

Name Type Description Default
path str or Path

Destination path. Parent directories are created automatically.

required

Examples:

>>> result.save("results/comparison.csv")
Source code in src/spatial_graph_algorithms/compare/__init__.py
def save(self, path: str | Path) -> None:
    """Save results to a CSV or Parquet file.

    The format is inferred from the file extension (``.parquet`` → Parquet,
    anything else → CSV).

    Parameters
    ----------
    path : str or Path
        Destination path.  Parent directories are created automatically.

    Examples
    --------
    >>> result.save("results/comparison.csv")  # doctest: +SKIP
    """
    p = Path(path)
    p.parent.mkdir(parents=True, exist_ok=True)
    if p.suffix == ".parquet":
        self.df.to_parquet(p, index=False)
    else:
        self.df.to_csv(p, index=False)

load(path) classmethod

Load results previously saved with :meth:save.

Parameters:

Name Type Description Default
path str or Path

Path to a CSV or Parquet file created by :meth:save.

required

Returns:

Type Description
ComparisonResult

Examples:

>>> result = ComparisonResult.load("results/comparison.csv")
Source code in src/spatial_graph_algorithms/compare/__init__.py
@classmethod
def load(cls, path: str | Path) -> ComparisonResult:
    """Load results previously saved with :meth:`save`.

    Parameters
    ----------
    path : str or Path
        Path to a CSV or Parquet file created by :meth:`save`.

    Returns
    -------
    ComparisonResult

    Examples
    --------
    >>> result = ComparisonResult.load("results/comparison.csv")  # doctest: +SKIP
    """
    p = Path(path)
    if p.suffix == ".parquet":
        return cls(df=pd.read_parquet(p))
    return cls(df=pd.read_csv(p))

spatial_graph_algorithms.compare.parameter_grid(*, base=None, vary=None, cases=None, groups=None, where=None, label_keys=None, drop_none=False)

Build a list of parameter dictionaries for comparison studies.

Parameters:

Name Type Description Default
base mapping

Parameters shared by every cartesian product case.

None
vary mapping

Parameter values to expand using :func:itertools.product.

None
cases iterable of mapping

Explicit hand-picked cases. Useful for non-cartesian comparisons.

None
groups iterable of mapping

Multiple grid definitions. Each group can contain base, vary, cases, where, label_keys, and drop_none. Groups avoid creating invalid cartesian products for unrelated parameters.

None
where callable

Predicate used to keep or discard expanded specs.

None
label_keys iterable of str

Keys used to auto-generate "_label". Defaults to all public keys.

None
drop_none bool

If True, remove keys with value None after filtering.

False

Returns:

Type Description
list of dict

Parameter specs. Each spec has a readable "_label" unless one was provided explicitly.

Examples:

>>> from spatial_graph_algorithms.compare import parameter_grid
>>> parameter_grid(base={"n": 100}, vary={"mode": ["knn"], "k": [4, 8]})
[{'_label': 'mode=knn__k=4', 'n': 100, 'mode': 'knn', 'k': 4}, ...]
Source code in src/spatial_graph_algorithms/compare/__init__.py
def parameter_grid(
    *,
    base: Mapping[str, Any] | None = None,
    vary: Mapping[str, Iterable[Any]] | None = None,
    cases: Iterable[Mapping[str, Any]] | None = None,
    groups: Iterable[GridGroup] | None = None,
    where: Callable[[Spec], bool] | None = None,
    label_keys: Iterable[str] | None = None,
    drop_none: bool = False,
) -> list[Spec]:
    """Build a list of parameter dictionaries for comparison studies.

    Parameters
    ----------
    base : mapping, optional
        Parameters shared by every cartesian product case.
    vary : mapping, optional
        Parameter values to expand using :func:`itertools.product`.
    cases : iterable of mapping, optional
        Explicit hand-picked cases.  Useful for non-cartesian comparisons.
    groups : iterable of mapping, optional
        Multiple grid definitions.  Each group can contain ``base``, ``vary``,
        ``cases``, ``where``, ``label_keys``, and ``drop_none``.  Groups avoid
        creating invalid cartesian products for unrelated parameters.
    where : callable, optional
        Predicate used to keep or discard expanded specs.
    label_keys : iterable of str, optional
        Keys used to auto-generate ``"_label"``.  Defaults to all public keys.
    drop_none : bool
        If ``True``, remove keys with value ``None`` after filtering.

    Returns
    -------
    list of dict
        Parameter specs.  Each spec has a readable ``"_label"`` unless one was
        provided explicitly.

    Examples
    --------
    >>> from spatial_graph_algorithms.compare import parameter_grid
    >>> parameter_grid(base={"n": 100}, vary={"mode": ["knn"], "k": [4, 8]})
    [{'_label': 'mode=knn__k=4', 'n': 100, 'mode': 'knn', 'k': 4}, ...]
    """
    specs: list[Spec] = []

    if groups is not None:
        for group in groups:
            group_specs = parameter_grid(
                base=group.get("base"),
                vary=group.get("vary"),
                cases=group.get("cases"),
                where=group.get("where"),
                label_keys=group.get("label_keys", label_keys),
                drop_none=bool(group.get("drop_none", drop_none)),
            )
            specs.extend(group_specs)

    specs.extend(_expand_single_grid(base=base, vary=vary, cases=cases))

    filtered: list[Spec] = []
    for spec in specs:
        if where is not None and not where(dict(spec)):
            continue
        clean = {k: v for k, v in spec.items() if not (drop_none and v is None)}
        _ensure_label(clean, label_keys=label_keys)
        filtered.append(clean)
    return filtered

spatial_graph_algorithms.compare.dry_run_comparison(*, graph_specs, reconstruction_specs, seeds)

Preview comparison combinations without generating graphs.

Parameters:

Name Type Description Default
graph_specs iterable of mapping

Graph-generation specs, usually returned by :func:parameter_grid.

required
reconstruction_specs iterable of mapping

Reconstruction specs, usually returned by :func:parameter_grid.

required
seeds iterable of int

Top-level seeds to combine with each graph and reconstruction spec.

required

Returns:

Type Description
DataFrame

One row per planned graph_spec × seed × reconstruction_spec with labels and parameter columns. No simulation, reconstruction, or metric computation is performed.

Source code in src/spatial_graph_algorithms/compare/__init__.py
def dry_run_comparison(
    *,
    graph_specs: Iterable[Mapping[str, Any]],
    reconstruction_specs: Iterable[Mapping[str, Any]],
    seeds: Iterable[int],
) -> pd.DataFrame:
    """Preview comparison combinations without generating graphs.

    Parameters
    ----------
    graph_specs : iterable of mapping
        Graph-generation specs, usually returned by :func:`parameter_grid`.
    reconstruction_specs : iterable of mapping
        Reconstruction specs, usually returned by :func:`parameter_grid`.
    seeds : iterable of int
        Top-level seeds to combine with each graph and reconstruction spec.

    Returns
    -------
    pandas.DataFrame
        One row per planned ``graph_spec × seed × reconstruction_spec`` with
        labels and parameter columns.  No simulation, reconstruction, or metric
        computation is performed.
    """
    graph_specs_list = [_with_label(dict(spec)) for spec in graph_specs]
    reconstruction_specs_list = [_with_label(dict(spec)) for spec in reconstruction_specs]
    seeds_list = [int(seed) for seed in seeds]

    rows = [
        _base_row(
            graph_spec=graph_spec,
            recon_spec=recon_spec,
            seed=seed,
            status="planned",
            error=None,
            generation_seconds=None,
        )
        for graph_spec in graph_specs_list
        for seed in seeds_list
        for recon_spec in reconstruction_specs_list
    ]
    return pd.DataFrame(rows)

spatial_graph_algorithms.compare.run_comparison(*, graph_specs, reconstruction_specs, seeds, dim=None, k_neighbors=15, compute_distortion=False, verbose=True)

Run a simulation/reconstruction comparison and return a :class:ComparisonResult.

Each graph spec is generated once per seed. All reconstruction specs are then applied to that graph, so generation cost is not repeated.

Parameters:

Name Type Description Default
graph_specs iterable of mapping

Specs passed to :func:spatial_graph_algorithms.simulate.generate. Private keys starting with "_" are treated as metadata.

required
reconstruction_specs iterable of mapping

Specs passed to :func:spatial_graph_algorithms.reconstruct.reconstruct. Each spec must include "method". Other public keys are passed as method-specific keyword arguments.

required
seeds iterable of int

Top-level seeds. Each graph spec is generated once per seed, then all reconstruction specs are run against that graph.

required
dim int

Reconstruction dimensionality. Defaults to the graph spec dim if present, otherwise 2.

None
k_neighbors int

Number of neighbours for reconstruction quality KNN evaluation.

15
compute_distortion bool

Whether to compute the O(n²) distortion metric.

False
verbose bool

If True (default), print one progress line per completed row.

True

Returns:

Type Description
ComparisonResult

One row per graph_spec × seed × reconstruction_spec.

Examples:

>>> from spatial_graph_algorithms.compare import parameter_grid, run_comparison
>>> graphs = parameter_grid(cases=[{"n": 50, "mode": "knn", "k": 4}])
>>> recons = parameter_grid(cases=[{"method": "mds"}])
>>> result = run_comparison(
...     graph_specs=graphs, reconstruction_specs=recons, seeds=[1], verbose=False
... )
>>> result.df["status"].iloc[0]
'ok'
Source code in src/spatial_graph_algorithms/compare/__init__.py
def run_comparison(
    *,
    graph_specs: Iterable[Mapping[str, Any]],
    reconstruction_specs: Iterable[Mapping[str, Any]],
    seeds: Iterable[int],
    dim: int | None = None,
    k_neighbors: int = 15,
    compute_distortion: bool = False,
    verbose: bool = True,
) -> ComparisonResult:
    """Run a simulation/reconstruction comparison and return a :class:`ComparisonResult`.

    Each graph spec is generated once per seed.  All reconstruction specs
    are then applied to that graph, so generation cost is not repeated.

    Parameters
    ----------
    graph_specs : iterable of mapping
        Specs passed to :func:`spatial_graph_algorithms.simulate.generate`.
        Private keys starting with ``"_"`` are treated as metadata.
    reconstruction_specs : iterable of mapping
        Specs passed to :func:`spatial_graph_algorithms.reconstruct.reconstruct`.
        Each spec must include ``"method"``.  Other public keys are passed as
        method-specific keyword arguments.
    seeds : iterable of int
        Top-level seeds.  Each graph spec is generated once per seed, then all
        reconstruction specs are run against that graph.
    dim : int, optional
        Reconstruction dimensionality.  Defaults to the graph spec ``dim`` if
        present, otherwise ``2``.
    k_neighbors : int
        Number of neighbours for reconstruction quality KNN evaluation.
    compute_distortion : bool
        Whether to compute the O(n²) distortion metric.
    verbose : bool
        If ``True`` (default), print one progress line per completed row.

    Returns
    -------
    ComparisonResult
        One row per ``graph_spec × seed × reconstruction_spec``.

    Examples
    --------
    >>> from spatial_graph_algorithms.compare import parameter_grid, run_comparison
    >>> graphs = parameter_grid(cases=[{"n": 50, "mode": "knn", "k": 4}])
    >>> recons = parameter_grid(cases=[{"method": "mds"}])
    >>> result = run_comparison(
    ...     graph_specs=graphs, reconstruction_specs=recons, seeds=[1], verbose=False
    ... )
    >>> result.df["status"].iloc[0]
    'ok'
    """
    graph_specs_list = [_with_label(dict(spec)) for spec in graph_specs]
    reconstruction_specs_list = [_with_label(dict(spec)) for spec in reconstruction_specs]
    seeds_list = [int(seed) for seed in seeds]
    rows: list[Spec] = []

    total = len(graph_specs_list) * len(seeds_list) * len(reconstruction_specs_list)
    completed = 0

    for graph_spec in graph_specs_list:
        graph_params = _public_params(graph_spec)
        graph_params.pop("seed", None)
        for seed in seeds_list:
            graph_start = time.perf_counter()
            try:
                sg = generate(**graph_params, seed=seed)
                generation_seconds = time.perf_counter() - graph_start
                graph_metrics = evaluate(
                    sg,
                    k_neighbors=k_neighbors,
                    compute_distortion=compute_distortion,
                )
            except Exception as exc:  # noqa: BLE001 - experiments should continue.
                generation_seconds = time.perf_counter() - graph_start
                for recon_spec in reconstruction_specs_list:
                    completed += 1
                    row = _base_row(
                        graph_spec=graph_spec,
                        recon_spec=recon_spec,
                        seed=seed,
                        status="generation_error",
                        error=f"{type(exc).__name__}: {exc}",
                        generation_seconds=generation_seconds,
                    )
                    rows.append(row)
                    if verbose:
                        _print_row(completed, total, graph_spec, recon_spec, seed, row)
                continue

            for recon_spec in reconstruction_specs_list:
                row = _base_row(
                    graph_spec=graph_spec,
                    recon_spec=recon_spec,
                    seed=seed,
                    status="ok",
                    error=None,
                    generation_seconds=generation_seconds,
                )
                row.update(graph_metrics)

                recon_params = _public_params(recon_spec)
                method = recon_params.pop("method", None)
                recon_params.pop("seed", None)
                if method is None:
                    row.update(
                        {
                            "status": "reconstruction_error",
                            "error": "ValueError: reconstruction spec must include 'method'",
                            "reconstruction_seconds": None,
                        }
                    )
                    completed += 1
                    rows.append(row)
                    if verbose:
                        _print_row(completed, total, graph_spec, recon_spec, seed, row)
                    continue

                recon_dim = dim if dim is not None else int(graph_params.get("dim", 2))
                recon_start = time.perf_counter()
                try:
                    sg_rec = reconstruct(
                        sg,
                        method=str(method),
                        dim=recon_dim,
                        seed=seed,
                        **recon_params,
                    )
                    row["reconstruction_seconds"] = time.perf_counter() - recon_start
                    row.update(
                        evaluate(
                            sg_rec,
                            k_neighbors=k_neighbors,
                            compute_distortion=compute_distortion,
                        )
                    )
                except Exception as exc:  # noqa: BLE001 - experiments should continue.
                    row.update(
                        {
                            "status": "reconstruction_error",
                            "error": f"{type(exc).__name__}: {exc}",
                            "reconstruction_seconds": time.perf_counter() - recon_start,
                            "cpd": None,
                            "knn": None,
                            "distortion": None,
                        }
                    )
                completed += 1
                rows.append(row)
                if verbose:
                    _print_row(completed, total, graph_spec, recon_spec, seed, row)

    return ComparisonResult(df=pd.DataFrame(rows))