Module treespace_metrics.francis
Expand source code
from networkx import DiGraph, Graph, all_simple_paths
from networkx import get_node_attributes
import platform
from treespace_metrics.drawing import draw_bipartite
from treespace_metrics.utils import get_root, maximum_matching_all, get_leaves
plt = platform.system()
def build_francis_bipartite(network: DiGraph) -> Graph:
"""
Read the paper "New Characterisations of Tree-Based Networks and Proximity Measures"
Builds a bipartite graph from the given phylogenetic network.
Args:
network (DiGraph): The input phylogenetic network.
Returns:
Graph: A bipartite graph used for the next step of the algorithm.
"""
francis = Graph()
for node in network.nodes():
francis.add_node(node, biparite=0)
francis.add_node('V-' + node, biparite=1)
data = get_node_attributes(francis, 'biparite')
for s, t in network.edges():
if data[s] == 0:
francis.add_edge(s, 'V-' + t)
return francis
def vertex_disjoint_paths(network: DiGraph, name=None, draw=False) -> [int, list]:
"""
Taken from "New Characterisations of Tree-Based Networks and Proximity Measures"
Computes vertex disjoint paths from the given phylogenetic network.
Args:
network (DiGraph): The input phylogenetic network.
name (str, optional): The name of the graph for saving output images. Defaults to None.
draw (bool, optional): If True, draws the bipartite graph. Defaults to False.
Returns:
tuple:
- int: The number of unmatched omnian nodes in the network.
- list: The list of vertex disjoint paths in the bipartite graph.
"""
francis = build_francis_bipartite(network)
max_matchings = maximum_matching_all(francis)
# Step 1: Compute disjoint paths
u2 = set()
matches = []
nodes = set(network.nodes())
missing_v1 = set()
data = get_node_attributes(francis, 'biparite')
for s, t in max_matchings.items():
if data[s] == 0:
matches.append((s, t[2:]))
u2.add(t[2:])
missing_v1.add(s)
u2 = nodes - u2
missing_v1 = nodes - missing_v1
# Build all Vertex Disjoint paths
paths = []
for u in u2:
# You do delete matches, so you need to pass a copy
p = build_path(u, matches.copy())
paths.append(p)
# Step 2: Exclude leaves to know number of new leaves
leaves = get_leaves(network)
missing_v1 = len(missing_v1 - set(leaves))
if draw:
if name is None:
draw_bipartite(francis, max_matchings, "francis-bipartite")
else:
draw_bipartite(francis, max_matchings, name + "-francis-bipartite")
return missing_v1, paths
def get_next_node(u: str, matches):
"""
Helper function for rooted_spanning_tree and starting_match.
Finds the next node in the path from the given node.
Args:
u (str): A node in the network.
matches (list): Maximum matching used to generate the vertex disjoint path.
Returns:
str: The next node in the path, or None if no match is found.
"""
for match in matches:
if match[0] == u:
matches.remove(match)
return match[1]
return None
def build_path(u: str, matches):
"""
Helper function for rooted_spanning_tree.
Builds a path starting from the given node using maximum matching.
Args:
u (str): The starting node.
matches (list): The complete maximum matching.
Returns:
list: The maximum path starting at the given node.
"""
max_path = list()
new_vertex = u
max_path.append(u)
while True:
new_vertex = get_next_node(new_vertex, matches)
if new_vertex is None:
return max_path
else:
max_path.append(new_vertex)
def rooted_spanning_tree(network: DiGraph, paths: list) -> DiGraph:
"""
Taken from "New Characterisations of Tree-Based Networks and Proximity Measures"
Builds a rooted spanning tree using vertex disjoint paths.
Args:
network (DiGraph): The input phylogenetic network.
paths (list): The vertex disjoint paths.
Returns:
DiGraph: A rooted spanning tree based on the vertex disjoint paths.
"""
spanning_tree = DiGraph()
root = get_root(network)
# Build the Spanning Tree from each path
for path in paths:
parents = list(network.predecessors(path[0]))
if root not in path:
spanning_tree.add_edge(parents[0], path[0])
# Add the disjoint path
for i in range(len(path) - 1):
spanning_tree.add_edge(path[i], path[i + 1])
return spanning_tree
def tree_based_network(network: DiGraph, spanning_tree: DiGraph) -> DiGraph:
"""
Taken from "New Characterisations of Tree-Based Networks and Proximity Measures"
Builds a tree-based network by appending leaves to unmatched omnian nodes to generated spanning tree.
Args:
network (DiGraph): The input phylogenetic network.
spanning_tree (DiGraph): The rooted spanning tree.
Returns:
DiGraph: A tree-based network based on the rooted spanning tree.
"""
leaves = get_leaves(network)
paths = get_paths(spanning_tree)
leaf_count = 0
for path in paths:
if not path[len(path) - 1] in leaves:
node = 'new-leaf-' + str(leaf_count)
spanning_tree.add_node(node)
spanning_tree.add_edge(path[len(path) - 1], node)
leaf_count += 1
return spanning_tree
def get_paths(spanning_tree: DiGraph) -> list:
"""
Helper function for tree_based_network function.
Retrieves all paths from the rooted spanning tree that start at a root and ends in a leaf.
Args:
spanning_tree (DiGraph): The rooted spanning tree.
Returns:
list: The list of disjoint paths in the spanning tree.
"""
paths = []
roots = []
leaves = []
for node in spanning_tree.nodes():
if spanning_tree.in_degree(node) == 0:
roots.append(node)
elif spanning_tree.out_degree(node) == 0:
leaves.append(node)
for root in roots:
for leaf in leaves:
for path in all_simple_paths(spanning_tree, root, leaf):
paths.append(path)
return paths
Functions
def build_francis_bipartite(network: networkx.classes.digraph.DiGraph) ‑> networkx.classes.graph.Graph
-
Read the paper "New Characterisations of Tree-Based Networks and Proximity Measures" Builds a bipartite graph from the given phylogenetic network.
Args
network
:DiGraph
- The input phylogenetic network.
Returns
Graph
- A bipartite graph used for the next step of the algorithm.
Expand source code
def build_francis_bipartite(network: DiGraph) -> Graph: """ Read the paper "New Characterisations of Tree-Based Networks and Proximity Measures" Builds a bipartite graph from the given phylogenetic network. Args: network (DiGraph): The input phylogenetic network. Returns: Graph: A bipartite graph used for the next step of the algorithm. """ francis = Graph() for node in network.nodes(): francis.add_node(node, biparite=0) francis.add_node('V-' + node, biparite=1) data = get_node_attributes(francis, 'biparite') for s, t in network.edges(): if data[s] == 0: francis.add_edge(s, 'V-' + t) return francis
def build_path(u: str, matches)
-
Helper function for rooted_spanning_tree. Builds a path starting from the given node using maximum matching.
Args
u
:str
- The starting node.
matches
:list
- The complete maximum matching.
Returns
list
- The maximum path starting at the given node.
Expand source code
def build_path(u: str, matches): """ Helper function for rooted_spanning_tree. Builds a path starting from the given node using maximum matching. Args: u (str): The starting node. matches (list): The complete maximum matching. Returns: list: The maximum path starting at the given node. """ max_path = list() new_vertex = u max_path.append(u) while True: new_vertex = get_next_node(new_vertex, matches) if new_vertex is None: return max_path else: max_path.append(new_vertex)
def get_next_node(u: str, matches)
-
Helper function for rooted_spanning_tree and starting_match. Finds the next node in the path from the given node.
Args
u
:str
- A node in the network.
matches
:list
- Maximum matching used to generate the vertex disjoint path.
Returns
str
- The next node in the path, or None if no match is found.
Expand source code
def get_next_node(u: str, matches): """ Helper function for rooted_spanning_tree and starting_match. Finds the next node in the path from the given node. Args: u (str): A node in the network. matches (list): Maximum matching used to generate the vertex disjoint path. Returns: str: The next node in the path, or None if no match is found. """ for match in matches: if match[0] == u: matches.remove(match) return match[1] return None
def get_paths(spanning_tree: networkx.classes.digraph.DiGraph) ‑> list
-
Helper function for tree_based_network function. Retrieves all paths from the rooted spanning tree that start at a root and ends in a leaf.
Args
spanning_tree
:DiGraph
- The rooted spanning tree.
Returns
list
- The list of disjoint paths in the spanning tree.
Expand source code
def get_paths(spanning_tree: DiGraph) -> list: """ Helper function for tree_based_network function. Retrieves all paths from the rooted spanning tree that start at a root and ends in a leaf. Args: spanning_tree (DiGraph): The rooted spanning tree. Returns: list: The list of disjoint paths in the spanning tree. """ paths = [] roots = [] leaves = [] for node in spanning_tree.nodes(): if spanning_tree.in_degree(node) == 0: roots.append(node) elif spanning_tree.out_degree(node) == 0: leaves.append(node) for root in roots: for leaf in leaves: for path in all_simple_paths(spanning_tree, root, leaf): paths.append(path) return paths
def rooted_spanning_tree(network: networkx.classes.digraph.DiGraph, paths: list) ‑> networkx.classes.digraph.DiGraph
-
Taken from "New Characterisations of Tree-Based Networks and Proximity Measures" Builds a rooted spanning tree using vertex disjoint paths.
Args
network
:DiGraph
- The input phylogenetic network.
paths
:list
- The vertex disjoint paths.
Returns
DiGraph
- A rooted spanning tree based on the vertex disjoint paths.
Expand source code
def rooted_spanning_tree(network: DiGraph, paths: list) -> DiGraph: """ Taken from "New Characterisations of Tree-Based Networks and Proximity Measures" Builds a rooted spanning tree using vertex disjoint paths. Args: network (DiGraph): The input phylogenetic network. paths (list): The vertex disjoint paths. Returns: DiGraph: A rooted spanning tree based on the vertex disjoint paths. """ spanning_tree = DiGraph() root = get_root(network) # Build the Spanning Tree from each path for path in paths: parents = list(network.predecessors(path[0])) if root not in path: spanning_tree.add_edge(parents[0], path[0]) # Add the disjoint path for i in range(len(path) - 1): spanning_tree.add_edge(path[i], path[i + 1]) return spanning_tree
def tree_based_network(network: networkx.classes.digraph.DiGraph, spanning_tree: networkx.classes.digraph.DiGraph) ‑> networkx.classes.digraph.DiGraph
-
Taken from "New Characterisations of Tree-Based Networks and Proximity Measures" Builds a tree-based network by appending leaves to unmatched omnian nodes to generated spanning tree.
Args
network
:DiGraph
- The input phylogenetic network.
spanning_tree
:DiGraph
- The rooted spanning tree.
Returns
DiGraph
- A tree-based network based on the rooted spanning tree.
Expand source code
def tree_based_network(network: DiGraph, spanning_tree: DiGraph) -> DiGraph: """ Taken from "New Characterisations of Tree-Based Networks and Proximity Measures" Builds a tree-based network by appending leaves to unmatched omnian nodes to generated spanning tree. Args: network (DiGraph): The input phylogenetic network. spanning_tree (DiGraph): The rooted spanning tree. Returns: DiGraph: A tree-based network based on the rooted spanning tree. """ leaves = get_leaves(network) paths = get_paths(spanning_tree) leaf_count = 0 for path in paths: if not path[len(path) - 1] in leaves: node = 'new-leaf-' + str(leaf_count) spanning_tree.add_node(node) spanning_tree.add_edge(path[len(path) - 1], node) leaf_count += 1 return spanning_tree
def vertex_disjoint_paths(network: networkx.classes.digraph.DiGraph, name=None, draw=False) ‑> [
, ] -
Taken from "New Characterisations of Tree-Based Networks and Proximity Measures" Computes vertex disjoint paths from the given phylogenetic network.
Args
network
:DiGraph
- The input phylogenetic network.
name
:str
, optional- The name of the graph for saving output images. Defaults to None.
draw
:bool
, optional- If True, draws the bipartite graph. Defaults to False.
Returns
tuple
-
- int: The number of unmatched omnian nodes in the network.
- list: The list of vertex disjoint paths in the bipartite graph.
Expand source code
def vertex_disjoint_paths(network: DiGraph, name=None, draw=False) -> [int, list]: """ Taken from "New Characterisations of Tree-Based Networks and Proximity Measures" Computes vertex disjoint paths from the given phylogenetic network. Args: network (DiGraph): The input phylogenetic network. name (str, optional): The name of the graph for saving output images. Defaults to None. draw (bool, optional): If True, draws the bipartite graph. Defaults to False. Returns: tuple: - int: The number of unmatched omnian nodes in the network. - list: The list of vertex disjoint paths in the bipartite graph. """ francis = build_francis_bipartite(network) max_matchings = maximum_matching_all(francis) # Step 1: Compute disjoint paths u2 = set() matches = [] nodes = set(network.nodes()) missing_v1 = set() data = get_node_attributes(francis, 'biparite') for s, t in max_matchings.items(): if data[s] == 0: matches.append((s, t[2:])) u2.add(t[2:]) missing_v1.add(s) u2 = nodes - u2 missing_v1 = nodes - missing_v1 # Build all Vertex Disjoint paths paths = [] for u in u2: # You do delete matches, so you need to pass a copy p = build_path(u, matches.copy()) paths.append(p) # Step 2: Exclude leaves to know number of new leaves leaves = get_leaves(network) missing_v1 = len(missing_v1 - set(leaves)) if draw: if name is None: draw_bipartite(francis, max_matchings, "francis-bipartite") else: draw_bipartite(francis, max_matchings, name + "-francis-bipartite") return missing_v1, paths