Module treespace_metrics.utils

Expand source code
from networkx import DiGraph, Graph
from networkx.algorithms.components import connected_components
from networkx.algorithms.bipartite import hopcroft_karp_matching


def is_reticulation(network: DiGraph, node: str) -> bool:
    """
    Check if a node is a reticulation node.

    Args:
        network (DiGraph): The directed graph where the node is located.
        node (str): The node to check.

    Returns:
        bool: True if the node is a reticulation node, False otherwise.
    """
    if network.in_degree(node) >= 2 and network.out_degree(node) == 1:
        return True
    else:
        return False


def is_omnian(network: DiGraph, node: str) -> bool:
    """
    Check if a node is an omnian node.

    Args:
        network (DiGraph): The directed graph where the node is located.
        node (str): The node to check.

    Returns:
        bool: True if the node is an omnian node, False otherwise.
    """
    for child in network.successors(node):
        if not is_reticulation(network, child):
            return False
    if network.out_degree(node) != 0:
        return True
    else:
        return False

def maximum_matching_all(network: Graph) -> dict:
    """
    Compute the maximum matching for all connected components of a graph.

    Args:
        network (Graph): The input graph.

    Returns:
        dict: A dictionary representing the maximum matching of the graph.
    """
    matches = {}
    for conn in connected_components(network):
        sub = network.subgraph(conn)
        matches.update(hopcroft_karp_matching(sub))
    return matches


def get_leaves(network: DiGraph) -> set:
    """
    Get all leaf nodes of a phylogenetic network.

    Args:
        network (DiGraph): The input directed graph.

    Returns:
        set: A set of all leaf nodes in the network.
    """
    leaves = set()
    for v in network.nodes():
        if network.out_degree(v) == 0:
            leaves.add(v)
    return leaves


def get_root(network: DiGraph) -> str:
    """
    Get the root of a phylogenetic network.

    Args:
        network (DiGraph): The input directed graph.

    Returns:
        str: The root node if there is exactly one root, None otherwise.

    Raises:
        NotImplementedError: If multiple roots are found.
    """
    roots = []
    for v in network.nodes():
        if network.in_degree(v) == 0:
            roots.append(v)
    if len(roots) == 0:
        return None
    if len(roots) == 1:
        return roots[0]
    raise NotImplementedError(f"Found multiple roots: {roots}")


def get_all_roots(graph: DiGraph) -> set:
    """
    Get all root nodes of a graph.

    Args:
        graph (DiGraph): The input directed graph.

    Returns:
        set: A set of all nodes with in-degree 0.
    """
    roots = set()
    for v in graph.nodes():
        if graph.in_degree(v) == 0:
            roots.add(v)
    return roots


def path_to_edges(paths: list[list]) -> list[tuple]:
    """
    Convert a list of paths to a list of edges.
    a List of paths [[1,2,3],[4,5,6]] to a List of edges [(1,2),(2,3),(4,5),(5,6)]

    Args:
        paths (list[list]): A list of paths, where each path is a list of nodes.

    Returns:
        list[tuple]: A list of edges represented as tuples.
    """
    edge_list = []
    for path in paths:
        for i in range(len(path) - 1):
            edge = (path[i], path[i + 1])
            edge_list.append(edge)
    return edge_list


def create_dag(g: Graph) -> DiGraph:
    """
    Convert an undirected phylogenetic tree to a directed acyclic graph (DAG).
    This only works for rooted networks generated from BioPhylo and from Newick Format

    Args:
        g (Graph): A newick - networkx undirected phylogenetic tree.

    Returns:
        DiGraph: A directed acyclic graph (DAG) representation of the input tree.
    """
    g_prime = DiGraph()
    for node in g.nodes(data=False):
        n = getattr(node, 'name')
        c = getattr(node, 'confidence')

        if n is not None:
            g_prime.add_node(str(n))
        else:
            g_prime.add_node(str(c))

    for s, t in g.edges():
        n_1 = getattr(s, 'name')
        c_1 = getattr(s, 'confidence')
        n_2 = getattr(t, 'name')
        c_2 = getattr(t, 'confidence')
        if n_1 is not None:
            if n_2 is not None:
                g_prime.add_edge(str(n_1), str(n_2))
            else:
                g_prime.add_edge(str(n_1), str(c_2))
        else:
            if n_2 is not None:
                g_prime.add_edge(str(c_1), str(n_2))
            else:
                g_prime.add_edge(str(c_1), str(c_2))
    return g_prime


def read_adjacency_list(adjacency_list_file: str) -> DiGraph:
    """
    Read an adjacency list from a text file and create a directed graph.

    Args:
        adjacency_list_file (str): Path to the adjacency list file.

    Returns:
        DiGraph: A directed graph based on the adjacency list.
    """
    g = DiGraph()
    with open(adjacency_list_file, 'r') as fd:
        for line in fd:
            source, target = line.strip().split(' ')
            g.add_edge(source, target)
    return g

Functions

def create_dag(g: networkx.classes.graph.Graph) ‑> networkx.classes.digraph.DiGraph

Convert an undirected phylogenetic tree to a directed acyclic graph (DAG). This only works for rooted networks generated from BioPhylo and from Newick Format

Args

g : Graph
A newick - networkx undirected phylogenetic tree.

Returns

DiGraph
A directed acyclic graph (DAG) representation of the input tree.
Expand source code
def create_dag(g: Graph) -> DiGraph:
    """
    Convert an undirected phylogenetic tree to a directed acyclic graph (DAG).
    This only works for rooted networks generated from BioPhylo and from Newick Format

    Args:
        g (Graph): A newick - networkx undirected phylogenetic tree.

    Returns:
        DiGraph: A directed acyclic graph (DAG) representation of the input tree.
    """
    g_prime = DiGraph()
    for node in g.nodes(data=False):
        n = getattr(node, 'name')
        c = getattr(node, 'confidence')

        if n is not None:
            g_prime.add_node(str(n))
        else:
            g_prime.add_node(str(c))

    for s, t in g.edges():
        n_1 = getattr(s, 'name')
        c_1 = getattr(s, 'confidence')
        n_2 = getattr(t, 'name')
        c_2 = getattr(t, 'confidence')
        if n_1 is not None:
            if n_2 is not None:
                g_prime.add_edge(str(n_1), str(n_2))
            else:
                g_prime.add_edge(str(n_1), str(c_2))
        else:
            if n_2 is not None:
                g_prime.add_edge(str(c_1), str(n_2))
            else:
                g_prime.add_edge(str(c_1), str(c_2))
    return g_prime
def get_all_roots(graph: networkx.classes.digraph.DiGraph) ‑> set

Get all root nodes of a graph.

Args

graph : DiGraph
The input directed graph.

Returns

set
A set of all nodes with in-degree 0.
Expand source code
def get_all_roots(graph: DiGraph) -> set:
    """
    Get all root nodes of a graph.

    Args:
        graph (DiGraph): The input directed graph.

    Returns:
        set: A set of all nodes with in-degree 0.
    """
    roots = set()
    for v in graph.nodes():
        if graph.in_degree(v) == 0:
            roots.add(v)
    return roots
def get_leaves(network: networkx.classes.digraph.DiGraph) ‑> set

Get all leaf nodes of a phylogenetic network.

Args

network : DiGraph
The input directed graph.

Returns

set
A set of all leaf nodes in the network.
Expand source code
def get_leaves(network: DiGraph) -> set:
    """
    Get all leaf nodes of a phylogenetic network.

    Args:
        network (DiGraph): The input directed graph.

    Returns:
        set: A set of all leaf nodes in the network.
    """
    leaves = set()
    for v in network.nodes():
        if network.out_degree(v) == 0:
            leaves.add(v)
    return leaves
def get_root(network: networkx.classes.digraph.DiGraph) ‑> str

Get the root of a phylogenetic network.

Args

network : DiGraph
The input directed graph.

Returns

str
The root node if there is exactly one root, None otherwise.

Raises

NotImplementedError
If multiple roots are found.
Expand source code
def get_root(network: DiGraph) -> str:
    """
    Get the root of a phylogenetic network.

    Args:
        network (DiGraph): The input directed graph.

    Returns:
        str: The root node if there is exactly one root, None otherwise.

    Raises:
        NotImplementedError: If multiple roots are found.
    """
    roots = []
    for v in network.nodes():
        if network.in_degree(v) == 0:
            roots.append(v)
    if len(roots) == 0:
        return None
    if len(roots) == 1:
        return roots[0]
    raise NotImplementedError(f"Found multiple roots: {roots}")
def is_omnian(network: networkx.classes.digraph.DiGraph, node: str) ‑> bool

Check if a node is an omnian node.

Args

network : DiGraph
The directed graph where the node is located.
node : str
The node to check.

Returns

bool
True if the node is an omnian node, False otherwise.
Expand source code
def is_omnian(network: DiGraph, node: str) -> bool:
    """
    Check if a node is an omnian node.

    Args:
        network (DiGraph): The directed graph where the node is located.
        node (str): The node to check.

    Returns:
        bool: True if the node is an omnian node, False otherwise.
    """
    for child in network.successors(node):
        if not is_reticulation(network, child):
            return False
    if network.out_degree(node) != 0:
        return True
    else:
        return False
def is_reticulation(network: networkx.classes.digraph.DiGraph, node: str) ‑> bool

Check if a node is a reticulation node.

Args

network : DiGraph
The directed graph where the node is located.
node : str
The node to check.

Returns

bool
True if the node is a reticulation node, False otherwise.
Expand source code
def is_reticulation(network: DiGraph, node: str) -> bool:
    """
    Check if a node is a reticulation node.

    Args:
        network (DiGraph): The directed graph where the node is located.
        node (str): The node to check.

    Returns:
        bool: True if the node is a reticulation node, False otherwise.
    """
    if network.in_degree(node) >= 2 and network.out_degree(node) == 1:
        return True
    else:
        return False
def maximum_matching_all(network: networkx.classes.graph.Graph) ‑> dict

Compute the maximum matching for all connected components of a graph.

Args

network : Graph
The input graph.

Returns

dict
A dictionary representing the maximum matching of the graph.
Expand source code
def maximum_matching_all(network: Graph) -> dict:
    """
    Compute the maximum matching for all connected components of a graph.

    Args:
        network (Graph): The input graph.

    Returns:
        dict: A dictionary representing the maximum matching of the graph.
    """
    matches = {}
    for conn in connected_components(network):
        sub = network.subgraph(conn)
        matches.update(hopcroft_karp_matching(sub))
    return matches
def path_to_edges(paths: list[list]) ‑> list[tuple]

Convert a list of paths to a list of edges. a List of paths [[1,2,3],[4,5,6]] to a List of edges [(1,2),(2,3),(4,5),(5,6)]

Args

paths : list[list]
A list of paths, where each path is a list of nodes.

Returns

list[tuple]
A list of edges represented as tuples.
Expand source code
def path_to_edges(paths: list[list]) -> list[tuple]:
    """
    Convert a list of paths to a list of edges.
    a List of paths [[1,2,3],[4,5,6]] to a List of edges [(1,2),(2,3),(4,5),(5,6)]

    Args:
        paths (list[list]): A list of paths, where each path is a list of nodes.

    Returns:
        list[tuple]: A list of edges represented as tuples.
    """
    edge_list = []
    for path in paths:
        for i in range(len(path) - 1):
            edge = (path[i], path[i + 1])
            edge_list.append(edge)
    return edge_list
def read_adjacency_list(adjacency_list_file: str) ‑> networkx.classes.digraph.DiGraph

Read an adjacency list from a text file and create a directed graph.

Args

adjacency_list_file : str
Path to the adjacency list file.

Returns

DiGraph
A directed graph based on the adjacency list.
Expand source code
def read_adjacency_list(adjacency_list_file: str) -> DiGraph:
    """
    Read an adjacency list from a text file and create a directed graph.

    Args:
        adjacency_list_file (str): Path to the adjacency list file.

    Returns:
        DiGraph: A directed graph based on the adjacency list.
    """
    g = DiGraph()
    with open(adjacency_list_file, 'r') as fd:
        for line in fd:
            source, target = line.strip().split(' ')
            g.add_edge(source, target)
    return g