Skip to content

Graph Algorithms

Graph Algorithms

WIP: These classes are currently actively being developed and are subject to change in both API and functionality over time.

Graph Algorithms

Docs TBD

ProximityGraph

Build a NetworkX graph using the Proximity class.

Source code in src/workbench/algorithms/graph/light/proximity_graph.py
class ProximityGraph:
    """Build a NetworkX graph using the Proximity class."""

    def __init__(self):
        """Build a NetworkX graph using the Proximity class."""

        # The graph is stored as NetworkX graph
        self._nx_graph = None

        # GraphStore
        self.graph_store = GraphStore()

    def build_graph(self, proximity_instance: Proximity, node_attributes_df: pd.DataFrame) -> None:
        """
        Build a NetworkX graph using a Proximity class.

        Args:
            proximity_instance (Proximity): An instance of a Proximity class to compute neighbors.
            node_attributes_df (pd.DataFrame): DataFrame containing node attributes
        """
        # Retrieve all neighbors and their distances
        prox = proximity_instance
        id_column = prox.id_column
        log.info("Retrieving all neighbors...")
        all_neighbors_df = prox.all_neighbors()

        # Add nodes with attributes (features)
        log.info("Building proximity graph...")
        self._nx_graph = nx.Graph()
        for _, row in all_neighbors_df.iterrows():
            node_id = row[id_column]

            # Get all the node attributes
            if node_id in node_attributes_df[id_column].values:
                node_attributes = node_attributes_df[node_attributes_df[id_column] == node_id].iloc[0].to_dict()
                self._nx_graph.add_node(node_id, **node_attributes)
            else:
                log.error(f"Node ID '{node_id}' not found in the node attributes DataFrame. Terminating graph build.")
                return

        # Add edges with weights based on proximity
        min_edges = 2
        min_wieght = 0.8
        current_id = None
        log.info("Adding edges to the graph...")
        for _, row in all_neighbors_df.iterrows():
            source_id = row[id_column]
            if source_id != current_id:
                num_edges = 0
                current_id = source_id
            weight = prox.get_edge_weight(row)
            if num_edges <= min_edges or weight > min_wieght:
                weight = 0.1 if weight < 0.1 else weight
                self._nx_graph.add_edge(row[id_column], row["neighbor_id"], weight=weight)
                num_edges += 1

        # Print the number of nodes and edges
        nodes = self._nx_graph.number_of_nodes()
        edges = self._nx_graph.number_of_edges()
        log.info(f"Graph built with {nodes} nodes and {edges} edges.")

    @property
    def nx_graph(self) -> nx.Graph:
        """
        Get the NetworkX graph object.

        Returns:
            nx.Graph: The NetworkX graph object.
        """
        return self._nx_graph

    def load_graph(self, graph_path: str):
        """
        Load a graph from the GraphStore.

        Args:
            graph_path (str): The path to the graph in GraphStore.
        """
        self._nx_graph = self.graph_store.get(graph_path)

    def store_graph(self, graph_path: str):
        """
        Store the graph in the GraphStore.

        Args:
            graph_path (str): The path to store the graph in GraphStore.
        """
        self.graph_store.upsert(graph_path, self._nx_graph)

    def get_neighborhood(self, node_id: Union[str, int], radius: int = 1) -> nx.Graph:
        """
        Get a subgraph containing nodes within a given number of hops around a specific node.

        Args:
            node_id: The ID of the node to center the neighborhood around.
            radius: The number of hops to consider around the node (default: 1).

        Returns:
            nx.Graph: A subgraph containing the specified neighborhood.
        """
        # Use NetworkX's ego_graph to extract the neighborhood within the given radius
        if node_id in self._nx_graph:
            return nx.ego_graph(self._nx_graph, node_id, radius=radius)
        else:
            raise ValueError(f"Node ID '{node_id}' not found in the graph.")

nx_graph property

Get the NetworkX graph object.

Returns:

Type Description
Graph

nx.Graph: The NetworkX graph object.

__init__()

Build a NetworkX graph using the Proximity class.

Source code in src/workbench/algorithms/graph/light/proximity_graph.py
def __init__(self):
    """Build a NetworkX graph using the Proximity class."""

    # The graph is stored as NetworkX graph
    self._nx_graph = None

    # GraphStore
    self.graph_store = GraphStore()

build_graph(proximity_instance, node_attributes_df)

Build a NetworkX graph using a Proximity class.

Parameters:

Name Type Description Default
proximity_instance Proximity

An instance of a Proximity class to compute neighbors.

required
node_attributes_df DataFrame

DataFrame containing node attributes

required
Source code in src/workbench/algorithms/graph/light/proximity_graph.py
def build_graph(self, proximity_instance: Proximity, node_attributes_df: pd.DataFrame) -> None:
    """
    Build a NetworkX graph using a Proximity class.

    Args:
        proximity_instance (Proximity): An instance of a Proximity class to compute neighbors.
        node_attributes_df (pd.DataFrame): DataFrame containing node attributes
    """
    # Retrieve all neighbors and their distances
    prox = proximity_instance
    id_column = prox.id_column
    log.info("Retrieving all neighbors...")
    all_neighbors_df = prox.all_neighbors()

    # Add nodes with attributes (features)
    log.info("Building proximity graph...")
    self._nx_graph = nx.Graph()
    for _, row in all_neighbors_df.iterrows():
        node_id = row[id_column]

        # Get all the node attributes
        if node_id in node_attributes_df[id_column].values:
            node_attributes = node_attributes_df[node_attributes_df[id_column] == node_id].iloc[0].to_dict()
            self._nx_graph.add_node(node_id, **node_attributes)
        else:
            log.error(f"Node ID '{node_id}' not found in the node attributes DataFrame. Terminating graph build.")
            return

    # Add edges with weights based on proximity
    min_edges = 2
    min_wieght = 0.8
    current_id = None
    log.info("Adding edges to the graph...")
    for _, row in all_neighbors_df.iterrows():
        source_id = row[id_column]
        if source_id != current_id:
            num_edges = 0
            current_id = source_id
        weight = prox.get_edge_weight(row)
        if num_edges <= min_edges or weight > min_wieght:
            weight = 0.1 if weight < 0.1 else weight
            self._nx_graph.add_edge(row[id_column], row["neighbor_id"], weight=weight)
            num_edges += 1

    # Print the number of nodes and edges
    nodes = self._nx_graph.number_of_nodes()
    edges = self._nx_graph.number_of_edges()
    log.info(f"Graph built with {nodes} nodes and {edges} edges.")

get_neighborhood(node_id, radius=1)

Get a subgraph containing nodes within a given number of hops around a specific node.

Parameters:

Name Type Description Default
node_id Union[str, int]

The ID of the node to center the neighborhood around.

required
radius int

The number of hops to consider around the node (default: 1).

1

Returns:

Type Description
Graph

nx.Graph: A subgraph containing the specified neighborhood.

Source code in src/workbench/algorithms/graph/light/proximity_graph.py
def get_neighborhood(self, node_id: Union[str, int], radius: int = 1) -> nx.Graph:
    """
    Get a subgraph containing nodes within a given number of hops around a specific node.

    Args:
        node_id: The ID of the node to center the neighborhood around.
        radius: The number of hops to consider around the node (default: 1).

    Returns:
        nx.Graph: A subgraph containing the specified neighborhood.
    """
    # Use NetworkX's ego_graph to extract the neighborhood within the given radius
    if node_id in self._nx_graph:
        return nx.ego_graph(self._nx_graph, node_id, radius=radius)
    else:
        raise ValueError(f"Node ID '{node_id}' not found in the graph.")

load_graph(graph_path)

Load a graph from the GraphStore.

Parameters:

Name Type Description Default
graph_path str

The path to the graph in GraphStore.

required
Source code in src/workbench/algorithms/graph/light/proximity_graph.py
def load_graph(self, graph_path: str):
    """
    Load a graph from the GraphStore.

    Args:
        graph_path (str): The path to the graph in GraphStore.
    """
    self._nx_graph = self.graph_store.get(graph_path)

store_graph(graph_path)

Store the graph in the GraphStore.

Parameters:

Name Type Description Default
graph_path str

The path to store the graph in GraphStore.

required
Source code in src/workbench/algorithms/graph/light/proximity_graph.py
def store_graph(self, graph_path: str):
    """
    Store the graph in the GraphStore.

    Args:
        graph_path (str): The path to store the graph in GraphStore.
    """
    self.graph_store.upsert(graph_path, self._nx_graph)

Questions?

The SuperCowPowers team is happy to answer any questions you may have about AWS and Workbench. Please contact us at workbench@supercowpowers.com or on chat us up on Discord