Network

Network

A class for building and analyzing a directed graph network of biological pathways.

Parameters:

Name Type Description Default
input_data DataFrame

A DataFrame containing the input data for the pathways.

required
pathways DataFrame

A DataFrame containing information on the pathways.

required
mapping DataFrame or None

A DataFrame containing mapping information. If None, then a DataFrame will be constructed from the input_data argument. Default is None.

None
input_data_column str

The name of the column in input_data that contains the input data. Default is 'Protein'.

'Protein'
subset_pathways bool

Whether to subset the pathways DataFrame to include only those pathways that are relevant to the input data. Default is True.

True

Attributes:

Name Type Description
mapping DataFrame

A DataFrame containing the mapping information.

pathways DataFrame

A DataFrame containing information on the pathways.

input_data DataFrame

A DataFrame containing the input data for the pathways.

inputs list

A list of the unique inputs in the mapping DataFrame.

netx DiGraph

A directed graph network of the pathways.

Source code in binn/network.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
class Network:
    """
    A class for building and analyzing a directed graph network of biological pathways.

    Args:
        input_data (pandas.DataFrame): A DataFrame containing the input data for the pathways.
        pathways (pandas.DataFrame): A DataFrame containing information on the pathways.
        mapping (pandas.DataFrame or None, optional): A DataFrame containing mapping information.
            If None, then a DataFrame will be constructed from the `input_data` argument.
            Default is None.
        input_data_column (str, optional): The name of the column in `input_data` that contains
            the input data. Default is 'Protein'.
        subset_pathways (bool, optional): Whether to subset the pathways DataFrame to include
            only those pathways that are relevant to the input data. Default is True.

    Attributes:
        mapping (pandas.DataFrame): A DataFrame containing the mapping information.
        pathways (pandas.DataFrame): A DataFrame containing information on the pathways.
        input_data (pandas.DataFrame): A DataFrame containing the input data for the pathways.
        inputs (list): A list of the unique inputs in the mapping DataFrame.
        netx (networkx.DiGraph): A directed graph network of the pathways.

    """

    def __init__(
        self,
        input_data: pd.DataFrame,
        pathways: pd.DataFrame,
        mapping: Union[pd.DataFrame, None] = None,
        input_data_column: str = "Protein",
        subset_pathways: bool = True,
        source_column: str = "source",
        target_column: str = "target",
    ):
        self.input_data_column = input_data_column
        pathways = pathways.rename(
            columns={source_column: "source", target_column: "target"}
        )

        if isinstance(mapping, pd.DataFrame):
            self.mapping = mapping
            self.unaltered_mapping = mapping

        else:
            self.mapping = pd.DataFrame(
                {
                    "input": input_data[input_data_column].values,
                    "translation": input_data[input_data_column].values,
                }
            )
            self.unaltered_mapping = mapping

        if subset_pathways:
            self.mapping = _subset_input(input_data, self.mapping, input_data_column)

            self.pathways = _subset_pathways_on_idx(pathways, self.mapping)

        else:
            self.pathways = pathways

        self.mapping = _get_mapping_to_all_layers(self.pathways, self.mapping)

        self.input_data = input_data

        self.inputs = self.mapping["input"].unique()

        self.netx = self.build_network()

    def build_network(self):
        """
        Constructs a networkx DiGraph from the edges in the 'pathways' attribute of the object, with a root node added to the graph to connect all root nodes together.

        Returns:
            A networkx DiGraph object representing the constructed network.
        """
        if hasattr(self, "netx"):
            return self.netx

        net = nx.from_pandas_edgelist(
            self.pathways, source="target", target="source", create_using=nx.DiGraph()
        )
        roots = [n for n, d in net.in_degree() if d == 0]
        root_node = "root"
        edges = [(root_node, n) for n in roots]
        net.add_edges_from(edges)

        return net

    def get_layers(self, n_levels, direction="root_to_leaf") -> list:
        """
        Returns a list of dictionaries where each dictionary contains the pathways at a certain level of the completed network and their inputs.

        Args:
            n_levels: The number of levels below the root node to complete the network to.
            direction: The direction of the layers to return. Must be either "root_to_leaf" or "leaf_to_root". Defaults to "root_to_leaf".

        Returns:
            A list of dictionaries, where each dictionary contains pathway names as keys and input lists as values.
        """
        if direction == "root_to_leaf":
            net = _complete_network(self.netx, n_levels=n_levels)
            layers = _get_layers_from_net(net, n_levels)

        terminal_nodes = [n for n, d in net.out_degree() if d == 0]

        mapping_df = self.mapping
        dict = {}
        missing_pathways = []
        for p in terminal_nodes:
            pathway_name = re.sub("_copy.*", "", p)
            inputs = (
                mapping_df[mapping_df["connections"] == pathway_name]["input"]
                .unique()
                .tolist()
            )
            if len(inputs) == 0:
                missing_pathways.append(pathway_name)
            dict[pathway_name] = inputs
        layers.append(dict)
        return layers

    def get_connectivity_matrices(self, n_levels, direction="root_to_leaf") -> list:
        """
        Returns a list of connectivity matrices for each layer of the completed network, ordered from leaf nodes to root node.

        Args:
            n_levels: The number of levels below the root node to complete the network to.
            direction: The direction of the layers to return. Must be either "root_to_leaf" or "leaf_to_root". Defaults to "root_to_leaf".

        Returns:
            A list of pandas DataFrames representing the connectivity matrices for each layer of the completed network.
        """
        connectivity_matrices = []
        layers = self.get_layers(n_levels, direction)
        for i, layer in enumerate(layers[::-1]):
            layer_map = _get_map_from_layer(layer)
            if i == 0:
                inputs = list(layer_map.index)
                self.inputs = sorted(inputs)
            filter_df = pd.DataFrame(index=inputs)
            all = filter_df.merge(
                layer_map, right_index=True, left_index=True, how="inner"
            )
            all = all.reindex(sorted(all.columns), axis=1)
            all = all.sort_index()
            inputs = list(layer_map.columns)
            connectivity_matrices.append(all)
        return connectivity_matrices

build_network()

Constructs a networkx DiGraph from the edges in the 'pathways' attribute of the object, with a root node added to the graph to connect all root nodes together.

Returns:

Type Description

A networkx DiGraph object representing the constructed network.

Source code in binn/network.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def build_network(self):
    """
    Constructs a networkx DiGraph from the edges in the 'pathways' attribute of the object, with a root node added to the graph to connect all root nodes together.

    Returns:
        A networkx DiGraph object representing the constructed network.
    """
    if hasattr(self, "netx"):
        return self.netx

    net = nx.from_pandas_edgelist(
        self.pathways, source="target", target="source", create_using=nx.DiGraph()
    )
    roots = [n for n, d in net.in_degree() if d == 0]
    root_node = "root"
    edges = [(root_node, n) for n in roots]
    net.add_edges_from(edges)

    return net

get_connectivity_matrices(n_levels, direction='root_to_leaf')

Returns a list of connectivity matrices for each layer of the completed network, ordered from leaf nodes to root node.

Parameters:

Name Type Description Default
n_levels

The number of levels below the root node to complete the network to.

required
direction

The direction of the layers to return. Must be either "root_to_leaf" or "leaf_to_root". Defaults to "root_to_leaf".

'root_to_leaf'

Returns:

Type Description
list

A list of pandas DataFrames representing the connectivity matrices for each layer of the completed network.

Source code in binn/network.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def get_connectivity_matrices(self, n_levels, direction="root_to_leaf") -> list:
    """
    Returns a list of connectivity matrices for each layer of the completed network, ordered from leaf nodes to root node.

    Args:
        n_levels: The number of levels below the root node to complete the network to.
        direction: The direction of the layers to return. Must be either "root_to_leaf" or "leaf_to_root". Defaults to "root_to_leaf".

    Returns:
        A list of pandas DataFrames representing the connectivity matrices for each layer of the completed network.
    """
    connectivity_matrices = []
    layers = self.get_layers(n_levels, direction)
    for i, layer in enumerate(layers[::-1]):
        layer_map = _get_map_from_layer(layer)
        if i == 0:
            inputs = list(layer_map.index)
            self.inputs = sorted(inputs)
        filter_df = pd.DataFrame(index=inputs)
        all = filter_df.merge(
            layer_map, right_index=True, left_index=True, how="inner"
        )
        all = all.reindex(sorted(all.columns), axis=1)
        all = all.sort_index()
        inputs = list(layer_map.columns)
        connectivity_matrices.append(all)
    return connectivity_matrices

get_layers(n_levels, direction='root_to_leaf')

Returns a list of dictionaries where each dictionary contains the pathways at a certain level of the completed network and their inputs.

Parameters:

Name Type Description Default
n_levels

The number of levels below the root node to complete the network to.

required
direction

The direction of the layers to return. Must be either "root_to_leaf" or "leaf_to_root". Defaults to "root_to_leaf".

'root_to_leaf'

Returns:

Type Description
list

A list of dictionaries, where each dictionary contains pathway names as keys and input lists as values.

Source code in binn/network.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def get_layers(self, n_levels, direction="root_to_leaf") -> list:
    """
    Returns a list of dictionaries where each dictionary contains the pathways at a certain level of the completed network and their inputs.

    Args:
        n_levels: The number of levels below the root node to complete the network to.
        direction: The direction of the layers to return. Must be either "root_to_leaf" or "leaf_to_root". Defaults to "root_to_leaf".

    Returns:
        A list of dictionaries, where each dictionary contains pathway names as keys and input lists as values.
    """
    if direction == "root_to_leaf":
        net = _complete_network(self.netx, n_levels=n_levels)
        layers = _get_layers_from_net(net, n_levels)

    terminal_nodes = [n for n, d in net.out_degree() if d == 0]

    mapping_df = self.mapping
    dict = {}
    missing_pathways = []
    for p in terminal_nodes:
        pathway_name = re.sub("_copy.*", "", p)
        inputs = (
            mapping_df[mapping_df["connections"] == pathway_name]["input"]
            .unique()
            .tolist()
        )
        if len(inputs) == 0:
            missing_pathways.append(pathway_name)
        dict[pathway_name] = inputs
    layers.append(dict)
    return layers