From e449d52f9f7398692c95dfcafed44222720c74a3 Mon Sep 17 00:00:00 2001
From: unknown <david.hoksza@gmail.com>
Date: Thu, 17 Oct 2019 15:30:48 +0200
Subject: [PATCH] reusing ods for connecting species

---
 talent/layout.py | 447 ++++++++++++++++++++++++-----------------------
 1 file changed, 231 insertions(+), 216 deletions(-)

diff --git a/talent/layout.py b/talent/layout.py
index 0fe3c92..dc47bd3 100644
--- a/talent/layout.py
+++ b/talent/layout.py
@@ -66,10 +66,8 @@ class MinMaxCoords:
                 self.max[i] = m[i]
 
 
-
-
 def get_mapping_dicts(m):
-    dicts = [{}, {}]
+    dicts = [{}, {}] # The first dictionary contains mapping from target to template, while the second mapping from template to target
     for op in m:
         if op[0] is not None:
             dicts[0][op[0]] = op[1]
@@ -97,7 +95,17 @@ def get_reaction_direction(g: nx.MultiGraph, available_ids: List[str] or Set[str
     dir /= len(known_ids)
     return gr.utils.normalize(dir)
 
+
 def layout_on_line(s_ids: List[str], center: np.ndarray, dir: np.ndarray, forbidden: List[gr.Rectangle]=[]) -> Dict[str, np.ndarray]:
+    """
+    Lay out species on a line difend by a point and a direction vector. The positions are alternating between left
+    and right of the center, skipping the positions which are inside the forbidden areas.
+    :param s_ids:
+    :param center:
+    :param dir:
+    :param forbidden:
+    :return:
+    """
 
     rv = {}
 
@@ -118,7 +126,8 @@ def layout_on_line(s_ids: List[str], center: np.ndarray, dir: np.ndarray, forbid
 
     i = 0
     # for i in range(len(s_ids)):
-    while len(rv) < len(s_ids):
+    poss = []
+    while len(poss) < len(s_ids):
         alt *= -1
         j = i - 1 if odd else i
         i += 1
@@ -132,7 +141,13 @@ def layout_on_line(s_ids: List[str], center: np.ndarray, dir: np.ndarray, forbid
         if intersection:
             continue
 
-        rv[s_ids[len(rv)]] = aux_pos
+        if alt < 1:
+            poss.insert(0, aux_pos)
+        else:
+            poss.append(aux_pos)
+
+    for i in range(len(poss)):
+        rv[s_ids[i]] = poss[i]
 
     return rv
 
@@ -736,19 +751,6 @@ def transfer_layout(tgt, tmp, mapping, do_beautify=False) -> nx.MultiGraph:
 
     tl_one_degree_species(g_res, tmp, tgt, md, laid_out_rs_tgt, laid_out_rs_tmp, tmp_stats, nodes_mapping)
 
-    # add edges which are missing (when adding one-degree species, edges are not added)
-    # for (n1, n2) in tgt_orig.edges():
-    #     if n1 in g_res and n2 in g_res:
-    #         roles = gu.get_roles(g_res, n1, n2)
-    #         for role in tgt_orig[n1][n2]:
-    #             if role not in roles:
-    #                 g_res.add_edge(n1, n2, role)
-
-    # update_reaction_species(g_res)
-
-
-
-
     for r_id in gu.get_all_reaction_ids(g_res):
         gu.get_node_data(g_res.nodes[r_id]).get_layout(be.LAYOUT_TYPE.PREDICTED).recompute_layout()
 
@@ -849,6 +851,11 @@ def layout_ods_de_novo_mapped(g_res: nx.MultiGraph, r_id, ods_ids, stats) -> Dic
     return rv
 
 
+def get_ods_mapping(tgt_ods_ids: List[str], tgt_r_id: str, tgt_g: nx.MultiGraph, tmp_ods_ids: List[str], tmp_r_id: str, tmp_g: nx.MultiGraph) -> List[List[int]]:
+    dm = get_node_distance_matrix(tgt_ods_ids, tgt_r_id, tgt_g, tmp_ods_ids, tmp_r_id, tmp_g)
+    return list(linear_sum_assignment(dm))
+
+
 def tl_one_degree_species(g_res: nx.MultiGraph, tmp:rg.ReactionGraph, tgt:rg.ReactionGraph, md,
                           laid_out_rs_tgt, laid_out_rs_tmp, tmp_stats, nodes_mapping:NodesMapping):
     tgt_rg = tgt.get_reaction_graph()
@@ -873,8 +880,6 @@ def tl_one_degree_species(g_res: nx.MultiGraph, tmp:rg.ReactionGraph, tgt:rg.Rea
         if len(ods_nbs_tgt) == 0:
             continue
 
-
-
         r_id_tmp = md[0][r_id_tgt]
         if r_id_tmp:
 
@@ -883,8 +888,8 @@ def tl_one_degree_species(g_res: nx.MultiGraph, tmp:rg.ReactionGraph, tgt:rg.Rea
             # if given reaction was mapped to template, let's use munkers algorithm to find optimal mapping
             # between ods in target and template, lay out those and add the not mapped ods
             ods_nbs_tmp = [s for s in gu.get_ods_nbs(tmp_orig, r_id_tmp) if not rs_used(laid_out_rs_tmp, r_id_tmp, s)]
-            dm = get_node_distance_matrix(list(ods_nbs_tgt.keys()), r_id_tgt, tgt_orig, ods_nbs_tmp, r_id_tmp, tmp_orig)
-            lsa = list(linear_sum_assignment(dm))
+
+            lsa = get_ods_mapping(list(ods_nbs_tgt.keys()), r_id_tgt, tgt_orig, ods_nbs_tmp, r_id_tmp, tmp_orig)
 
             #lay out mapped ods
             tgt_nb_to_remove = [] #to keep track of ods which will still need to be mapped after application of the mapping
@@ -1177,142 +1182,14 @@ def tl_unmapped_reactions(g_res: nx.MultiGraph, tgt, tmp, unmapped_reactions:Lis
         g_res_nodes = g_res.nodes()
         nodes = [g_res_nodes[n] for n in laid_boundary_r]
 
-
         com = get_center_of_mass(nodes)
 
-        # push the existing nodes away from the center using cartesian distortion to make space for the new reactions
-        # https://bost.ocks.org/mike/fisheye/
-        # distort(g_res, com, tmp_stats.med_reactions_distance)
-        # distort(tmp_orig, com, tmp_stats.med_reactions_distance, layout_key='layout')
 
         for r in cc_rg_unmapped:
             set_predicted_position(g_res_nodes[r], com, copied=False)
 
 
 
-        # nodes = sorted(list(laid_boundary_r.union(cc_rg_unmapped)))
-        #
-        # g_circ = get_graph_for_fdp(tgt_orig, list(cc_rg_unmapped))
-        # radius = len(cc_rg_unmapped) * tmp_stats.med_reactions_distance  / (2 * math.pi)
-        # poss = nx.drawing.circular_layout(g_circ, center=com, scale=radius)
-        # for r in cc_rg_unmapped:
-        #     set_predicted_position(g_res_nodes[r], poss[r])
-        # radius += tmp_stats.med_one_degree_species_distance
-        # zoom(g_res, com, radius, nodes=set(g_res.nodes()).difference(cc_rg_unmapped))
-        # zoom(tmp_orig, com, radius, layout_key='layout')
-
-
-
-
-        # for r in cc_rg_unmapped:
-        #     set_predicted_position(g_res_nodes[r], com)
-
-        # # zoom(g_res, com, tmp_stats.med_reactions_distance, nodes=set(g_res.nodes()).difference(cc_rg_unmapped))
-        # # zoom(tmp_orig, com, tmp_stats.med_reactions_distance, layout_key='layout')
-        #
-        # g_fdp = get_graph_for_fdp(tgt_orig, nodes)
-        # # get initial positions which will be the real positions for laid nodes and com for the new ones
-        # poss_init = {}
-        # for n in nodes:
-        #     poss_init[n] = com if n in cc_rg_unmapped else get_node_pos(g_res_nodes[n])
-        # poss = nx.drawing.spring_layout(g_fdp, k=100,
-        #                                 pos=poss_init, fixed=laid_boundary_r, weight='weight', center=com, random_state=42)
-        # for r in cc_rg_unmapped:
-        #     set_predicted_position(g_res_nodes[r], poss[r])
-        #
-        # min_r_dist = sys.maxsize
-        # for (r1, r2) in get_connected_reactions_pairs(tgt_orig, list(cc_rg_unmapped)):
-        #     min_r_dist = min(min_r_dist, l2_dist(poss[r1], poss[r2]))
-        #
-        # nodes = [g_res_nodes[n] for n in cc_rg_unmapped]
-        # com_new = get_center_of_mass(nodes)
-        #
-        # # TODO: find relevant space to blow up by identification of closes reactions to COM in terms of Pareto front distance
-        # blow_up_factor = tmp_stats.med_reactions_distance / min_r_dist
-        # pan(g_res, com_new-com, cc_rg_unmapped)
-        # zoom(g_res, com, blow_up_factor, nodes=set(g_res.nodes()).difference(cc_rg_unmapped))
-        # zoom(g_res, com, blow_up_factor/2, nodes=cc_rg_unmapped)
-        # zoom(tmp_orig, com, blow_up_factor, layout_key='layout')
-
-        # cc = cc_rg_unmapped
-        # while True:
-        #     eb = sorted(list(nx.edge_boundary(tgt_rg, cc)))
-        #     if len(eb) == 0:
-        #         break
-        #     # remember reactions which neighbor directly with laid out reactions
-        #     not_laid_boundary_r = sorted(set([e[0] for e in eb]))
-        #     laid_boundary_r = set([e[1] for e in eb])
-        #     for r in not_laid_boundary_r:
-        #         # lay out each of the not yet laid out reactions on the boundary
-        #         # usable_nodes = []
-        #         # for r_nb_s in sorted(nx.all_neighbors(tgt_orig, r)):
-        #         #     nb_r_nb_s = list(set(nx.all_neighbors(tgt_orig, r_nb_s)).difference(unmapped_reactions))
-        #         #     cnt_one_hop_nbs = True
-        #         #     for i1 in range(len(nb_r_nb_s)):
-        #         #         if not cnt_one_hop_nbs:
-        #         #             break
-        #         #         for i2 in range(i1+1, len(nb_r_nb_s)):
-        #         #             if len(set(nx.all_neighbors(tgt_orig, nb_r_nb_s[i1])).intersects(nx.all_neighbors(tgt_orig, nb_r_nb_s[i2]))) == 0:
-        #         #                 cnt_one_hop_nbs = False
-        #         #                 break
-        #         #
-        #         #     if len(nb_r_nb_s) == 1 or (len(nb_r_nb_s) > 0 and cnt_one_hop_nbs):
-        #         #         # usable_nodes += nb_r_nb_s
-        #         #         if r_nb_s not in g_res:
-        #         #             usable_nodes += nb_r_nb_s
-        #         #         else:
-        #         #             usable_nodes.append(r_nb_s)
-        #         #
-        #         # if len(usable_nodes) == 0:
-        #         # usable_nodes = laid_boundary_r
-        #
-        #         # nodes = [g_res.nodes()[n] for n in usable_nodes]
-        #         # nodes = [g_res.nodes()[n] for n in usable_nodes]
-        #         # masses = [1 / (2 * len(nx.shortest_path(tgt_rg, source=r, target=n))) for n in usable_nodes]
-        #         # com = get_center_of_mass(nodes, masses=masses, layout_key="predicted_layout")
-        #         # com = get_center_of_mass(nodes)
-        #
-        #         new_node = copy.deepcopy(tgt_rg_nodes[r])
-        #         g_res.add_node(r, **new_node)
-        #
-        #         cc.remove(r)
-        #         unmapped_reactions.remove(r)
-        #
-        #     # Position the new reactions
-        #     usable_nodes = laid_boundary_r
-        #     g_res_nodes = g_res.nodes()
-        #     nodes = [g_res_nodes[n] for n in usable_nodes]
-        #     com = get_center_of_mass(nodes)
-        #     # push the existing nodes away from the center using cartesian distortion to make space for the new reactions
-        #     # https://bost.ocks.org/mike/fisheye/
-        #     for r in not_laid_boundary_r:
-        #         set_predicted_position(g_res_nodes[r], com)
-        #     distort(g_res, com, tmp_stats.med_reactions_distance)
-
-
-
-        # # we position all nodes in the center of mass defined by the nodes in edge boundary (in the edge case
-        # # of |eb|=1 this will directly overlap eb
-        # # the mass of each node on the edge is determined by its shortest distance to the node being laid out
-        # # which ensures that close neighbors will pull the new node to them, especially if a node is tightly connected
-        # # in a cluster but is also inderectly connected to other edge nodes when having a large connected component
-        #
-        # # nodes = [tmp_rg_nodes[md[0][e[1]]] for e in nx.edge_boundary(tgt_rg, cc_rg_unmapped)]
-        # # nodes = [tgt_rg_nodes[e[1]] for e in nx.edge_boundary(tgt_rg, cc_rg_unmapped)]
-        # node_ids = [e[1] for e in nx.edge_boundary(tgt_rg, cc_rg_unmapped)]
-        # nodes = [tgt.nodes()[n] for n in node_ids]
-        # # com = get_center_of_mass(nodes)
-        #
-        # # TODO Deal with case when |cc_rg_unmapped| = 1
-        #
-        # for r in cc_rg_unmapped:
-        #     masses = [1 / (2 * len(nx.shortest_path(tgt_rg, source=r, target=n))) for n in node_ids]
-        #
-        #     com = get_center_of_mass(nodes, masses=masses, layout_key="predicted_layout")
-        #     new_node = copy.deepcopy(tgt_rg_nodes[r])
-        #     # TODO finding proper positions for the new nodes (now all are positioned in the center)
-        #     set_predicted_position(new_node, com)
-        #     tgt.add_node(r, **new_node)
 
 
 def tl_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, unmapped_reactions:List, nodes_mapping:NodesMapping):
@@ -1332,7 +1209,7 @@ def tl_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, unmapped_re
     laid_out_rs_tmp = list() #reaction_tmp, species_tmp
     laid_out_rs_tgt = list()
 
-    cs_mappging = {} # connecting species mapping
+    cs_mappging: Dict[(str, str), str] = {} # connecting species mapping. Stores mapping between target reaction id, and target species id and the result graph species id
 
     def go_through_reactions(dup):
         """
@@ -1490,6 +1367,7 @@ def tl_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, unmapped_re
                 # If both reactions are inserted, then their connecting species could not have been laid out
                 continue
 
+            # One of the reactions is unmapped and one is mapped
             r_tgt_unmapped = None
             r_tgt_mapped = None
             if r1_tgt in unmapped_reactions:
@@ -1500,6 +1378,7 @@ def tl_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, unmapped_re
                 r_tgt_unmapped = r2_tgt
 
             if r_tgt_mapped:
+                not_mapped_cn = []
                 for s in gu.get_connecting_nodes(tgt_orig, r_tgt_mapped, r_tgt_unmapped):
                     # TODO : rs_used(laid_out_rs_tgt, r_tgt_unmapped, s) ensures that when s was duplicated,
                     # it will be used onlly once for given reaction. However, we now simply connect it to the
@@ -1512,6 +1391,55 @@ def tl_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, unmapped_re
                             # we need to find species name to which the species was mapped in the result and connect to that
                             g_res.add_edge(r_tgt_unmapped, cs_mappging[(r_tgt_mapped, s)], role)
                             mark_rs_used(laid_out_rs_tgt, r_tgt_unmapped, s)
+                    elif not rs_used(laid_out_rs_tgt, r_tgt_mapped, s) and not rs_used(laid_out_rs_tgt, r_tgt_unmapped, s):
+                        not_mapped_cn.append(s)
+
+                # Try to use existing ODS of TMP reaction for the connecting species if there is a free one. This is usefull
+                # for example in situation when only a single reaction (without species) has been removed from target. Then
+                # a connecting species can become an ODS
+                # 1a. Find ODS of the mapped reaction
+                # 1b. Find ODS of the tempalte reaction of the mapped reaction
+                # 2. Carry out ODS mapping between target and template ODSs
+                # 3. Take not mapped ODS
+                # 4. Use the ODS which are closest to the not mapped reaction and use them as connecting species
+
+                if len(not_mapped_cn) == 0:
+                    continue
+
+                r_tmp_mapped = md[0][r_tgt_mapped]
+                ods_tmp = list(gu.get_ods_nbs(tmp_orig, r_tmp_mapped).keys())
+                if len(ods_tmp) == 0:
+                    continue
+
+                ods_tgt = [id for id in gu.get_ods_nbs(tgt_orig, r_tgt_mapped).keys() if not rs_used(laid_out_rs_tgt, r_tmp_mapped, id) ]#make sure that this ODS has not been used for other reaction as a connecting species
+                if len(ods_tgt) > 0:
+                    lsa = get_ods_mapping(ods_tgt, r_tgt_mapped, tgt_orig, ods_tmp, r_tmp_mapped, tmp_orig)
+                    ods_tmp_not_mapped_ixs = list(lsa[1]) # species from r_tmp_mapped which will be mapped
+                    ods_tmp_not_mapped_ixs = set(range(len(ods_tgt))).difference(ods_tmp_not_mapped_ixs)
+                    free_tmp_ods_ids = [ods_tmp[ix] for ix in ods_tmp_not_mapped_ixs]
+                else:
+                    free_tmp_ods_ids = ods_tmp
+
+                pos_r_tgt = gu.get_predicted_layout(g_res, r_tgt_mapped).get_center()
+                free_tmp_ods_ids_sorted = sorted(free_tmp_ods_ids, key=lambda s_id: gr.utils.dist(pos_r_tgt, gu.get_layout(tmp_orig, s_id, layout_type=be.LAYOUT_TYPE.ORIGINAL).get_center()))
+                for i in range(min(len(not_mapped_cn), len(free_tmp_ods_ids_sorted))):
+                    tgt_cn_id = not_mapped_cn[i]
+                    tmp_ods_id = free_tmp_ods_ids_sorted[i]
+
+                    new_node_id = '{}__{}'.format(tgt_cn_id, tmp_ods_id)
+                    assert new_node_id not in g_res
+
+                    new_node = copy.deepcopy(tgt_orig_nodes[tgt_cn_id])
+                    copy_predicted_layout(tmp_orig_nodes[tmp_ods_id], new_node)
+                    add_node(g_res, nodes_mapping, new_node_id, tgt_cn_id, **new_node)
+
+                    for r_tgt in [r_tgt_mapped, r_tgt_unmapped]:
+                        add_edges(g_res, new_node_id, r_tgt, tgt_orig, tgt_cn_id, r_tgt, tmp_orig, tmp_ods_id, r_tmp_mapped)
+                        cs_mappging[(r_tgt, tgt_cn_id)] = new_node_id
+                        mark_rs_used(laid_out_rs_tgt, r_tgt, tgt_cn_id)
+                    mark_rs_used(laid_out_rs_tmp, r_tmp_mapped, tmp_ods_id)
+                    # there is no "not mapped" template reaction since one of the target reaction was not mapped
+
 
     # First, we lay out the "true" connecting species, i.e. those which connect reactions in the template layout.
     go_through_reactions(False)
@@ -1720,6 +1648,141 @@ def get_graph_stats(g, layout_key=None) -> Stats:
 
     return stats
 
+
+def l_not_mapped_connecting_species(g_res, tgt_orig, to_layout, nodes_mapping, tmp_stats):
+
+    for r1_tgt, r2_tgt, s_ids_to_add, new_ids, cn_existing in to_layout:
+
+        if len(cn_existing) > 0:
+
+            # If there already exist any connecting nodes which have been laid out, we use them to position the remaining,
+            # not mapped connecting nodes, relative to them.
+            # The not mapped connecting nodes will be distributed on a line perpendicular (or horizontal/vertical?) to the connecting line going through the main connecting species
+
+            def get_average_pos(ids: List[str]) -> np.ndarray or None:
+                if len(ids) == 0:
+                    return None
+                poss = [gu.get_predicted_layout(g_res, id).get_center() for id in ids]
+                poss_filtered = [x for x in poss if x is not None]
+                return sum(poss_filtered) / len(ids) if len(poss_filtered) > 0 else None
+
+            def impute_mirrored(center: np.ndarray, p1: np.ndarray, p2: np.ndarray):
+                if p1 is None and p2 is None:
+                    return None, None
+                if p1 is None:
+                    p1 = center - (p2 - center)
+                else:
+                    p2 = center - (p1 - center)
+
+                return p1, p2
+
+            # 1. Find out whether to use horizontal or vertical vector
+            # r1_center = gu.get_predicted_layout(g_res, r1_tgt).get_center()
+            # r2_center = gu.get_predicted_layout(g_res, r2_tgt).get_center()
+            r1_center = get_average_pos(list(g_res[r1_tgt]))
+            r2_center = get_average_pos(list(g_res[r2_tgt]))
+            r_vect = r1_center - r2_center
+            # TODO: consider the role of the added species (we can place reactants and and products in such a way that the lines will cross unnecessarilly)
+            if gr.utils.is_more_horizontal(r_vect):
+                dir = np.array([0, 1])
+            else:
+                dir = np.array([1, 0])
+
+            # orient the direction towards products
+            rs1 = gu.get_reaction_species(g_res, r1_tgt)
+            r1_reactants_pos = get_average_pos(rs1.reactantIds)
+            r1_products_pos = get_average_pos(rs1.productIds)
+            r1_reactants_pos, r1_products_pos = impute_mirrored(r1_center, r1_reactants_pos, r1_products_pos)
+            if r1_reactants_pos is not None and r1_products_pos is not None:
+                r_center_shift = r1_center + dir
+                if gr.utils.dist(r_center_shift, r1_reactants_pos) < gr.utils.dist(r_center_shift, r1_products_pos):
+                    dir = -dir
+            else:
+                rs2 = gu.get_reaction_species(g_res, r1_tgt)
+                r2_reactants_pos = get_average_pos(rs2.reactantIds)
+                r2_products_pos = get_average_pos(rs2.productIds)
+                r2_reactants_pos, r2_products_pos = impute_mirrored(r2_center, r2_reactants_pos, r2_products_pos)
+                if r2_reactants_pos is not None and r2_products_pos is not None:
+                    r_center_shift = r2_center + dir
+                    if gr.utils.dist(r_center_shift, r2_reactants_pos) < gr.utils.dist(r_center_shift, r2_products_pos):
+                        dir = -dir
+                # TODO: since we might not be able to get the positions because of the fact that the positions of both reactants
+                # and species might not be known in the time of laying out connecting species, it might be necesarry to later
+                # fix the possible overlaps
+
+            # 2. Find the center
+            cn_existing_lts: List[be.SpeciesLayout] = [
+                gu.get_predicted_layout(g_res, nodes_mapping.getSourceMapping(cn_id)[0]) for cn_id in cn_existing]
+            center = sum([lt.get_center() for lt in cn_existing_lts]) / len(cn_existing)
+            if len(cn_existing) == 1:
+                # If there is only one existing species, the center lies in that species which would then surely overlap
+                # while laying on the perpendicular line so we can immediately shift it
+                center += settings.render.optimal_neighboring_species_dist * dir
+
+            # 3. Git bounding boxes of existing species (forbidden regions)
+            forbidden = [lt.get_bb().get() for lt in cn_existing_lts]
+
+            # 4. Get positions on the line around the center
+            # We should optimally start at the side which is closer to the tentative "side" of the not mapped species.
+            # This means that, for example, if there exists a connecting species s1 which is a reactant of r1 and r2 and
+            # r1 go horizontally from let to right and the not mapped species s1 is in the product role in r1 and r2 then
+            # s2 needs to be laid out right of s1. If these are the only connecting species, length of poss will be 1 and
+            # the position needs to be right of s1. Therefore, the dir vector needs to be in the correct direction.
+            s_id_roles_scores: Dict[str, int] = {}
+
+            def get_roles_score(roles: List[be.SPECIES_ROLE]) -> int:
+                score = 0
+                if be.SPECIES_ROLE.CONTAINS_PRODUCT_ROLE(roles):
+                    score += 1
+                if be.SPECIES_ROLE.CONTAINS_REACTANT_ROLE(roles):
+                    score -= 1
+                return score
+
+            for s_id in s_ids_to_add:
+                roles1: List[be.SPECIES_ROLE] = gu.get_roles(tgt_orig, r1_tgt, s_id)
+                roles2: List[be.SPECIES_ROLE] = gu.get_roles(tgt_orig, r2_tgt, s_id)
+                s_id_roles_scores[s_id] = get_roles_score(roles1) + get_roles_score(roles2)
+
+            sum_roles_score: int = sum(list(s_id_roles_scores.values()))
+            if sum_roles_score < 0:
+                # Majority of the new connecting species are in the reactant role and since the dir vector
+                # points toward product roles we need to switch the direction so that first will be laid out reactants
+                # (if there is space)
+                dir -= dir
+
+            # Sort the species from reactants to products since the positions will be assigned in this order
+            # and they will go from "left" to "right" with respect to the center and direction vector
+            aux_s_ids_to_add = [x[0] for x in sorted(s_id_roles_scores.items(), key=lambda x: x[1])]
+            poss = layout_on_line(s_ids=aux_s_ids_to_add, center=center, dir=dir, forbidden=forbidden)
+
+            # 5. Swap positions to minimize number of line overlaps
+            # TODO
+        else:
+            p_r1 = get_node_pos(g_res.nodes()[r1_tgt])
+            p_r2 = get_node_pos(g_res.nodes()[r2_tgt])
+
+            p_r1_r2 = p_r2 - p_r1
+            p_r1_r2_c = p_r1 + p_r1_r2 / 2
+            p_r1_r2_perp = np.array([p_r1[1] - p_r2[1], p_r2[0] - p_r1[0]])
+            p_r1_r2_perp_n = gr.utils.normalize(p_r1_r2_perp)
+
+            shift = tmp_stats.med_one_degree_species_distance
+
+            p_species = p_r1_r2_c - p_r1_r2_perp_n * shift * int(len(s_ids_to_add) / 2)
+            if len(s_ids_to_add) % 2 == 0:
+                p_species += p_r1_r2_perp_n * shift / 2
+
+            poss = {}
+            for s_id in s_ids_to_add:
+                poss[s_id] = np.array(p_species)
+                p_species += p_r1_r2_perp_n * shift / 2
+
+        # 6. Assign positions to nodes
+        for s_id in s_ids_to_add:
+            set_predicted_position(gu.get_node(g_res, new_ids[s_id]), poss[s_id], copied=False,
+                                   recompute_reaction_layout=False)
+
+
 def tl_not_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, laid_out_rs_tgt, laid_out_rs_tmp, tmp_stats, nodes_mapping:NodesMapping):
 
     """
@@ -1741,6 +1804,8 @@ def tl_not_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, laid_ou
     tgt_orig = tgt.get_original()
     tgt_orig_nodes = tgt_orig.nodes()
 
+    to_layout = []
+
     for (r1_tgt, r2_tgt) in sorted(tgt_rg.edges()):  # TODO do this in the order of decreasing amount of common
 
         # check for each common species how many times each of them  is present in the target (t1 and r2 can share multiple species)
@@ -1787,74 +1852,15 @@ def tl_not_mapped_connecting_species(g_res: nx.MultiGraph, tmp, tgt, md, laid_ou
             add_edges(g_res, mapped_id, r2_tgt, tgt_orig, n_id, nodes_mapping.getSourceMapping(r2_tgt)[0])
 
 
-        if len(s_ids_to_add) == 0:
-            #If there is no new node added we don't need to proceed with finding positions
-            continue
-
-        if len(cn_existing) > 0:
-            # If there already exist any connecting nodes which have been laid out, we use them to position the remaining,
-            # not mapped connecting nodes, relative to them.
-            # The not mapped connecting nodes will be distributed on a line perpendicular (or horizontal/vertical?) to the connecting line going through the main connecting species
-
-            # 1. Find out whether to use horizontal or vertical vector
-            r_vect = gu.get_predicted_layout(g_res, r1_tgt).get_center() - gu.get_predicted_layout(g_res, r2_tgt).get_center()
-            # TODO: consider the role of the added species (we can place reactants and and products in such a way that the lines will cross unnecessarilly)
-            if gr.utils.is_more_horizontal(r_vect):
-                dir = np.array([0, 1])
-            else:
-                dir = np.array([1, 0])
-
-            # 2. Find the center
-            cn_existing_lts: List[be.SpeciesLayout] = [gu.get_predicted_layout(g_res, nodes_mapping.getSourceMapping(cn_id)[0]) for cn_id in cn_existing]
-            center = sum([lt.get_center() for lt in cn_existing_lts])/len(cn_existing)
-
-            # 3. Git bounding boxes of existing species (forbidden regions)
-            forbidden = [lt.get_bb().get() for lt in cn_existing_lts]
+        if len(s_ids_to_add) > 0:
+            # Layout of the connecting nodes happens only after all reactions are checked because existing connecting species
+            # which are not yet connected (nodes exist but edges not) might be important for laying the connecting species
+            to_layout.append((r1_tgt, r2_tgt, s_ids_to_add, new_ids, cn_existing))
 
-            # 4. Get positions on the line around the center
-            poss = layout_on_line(s_ids=s_ids_to_add, center=center, dir=dir, forbidden=forbidden)
 
-            # 5. Swap positions to minimize number of line overlaps
-            # This needs to be done only after all species are laid out, because the positions might depend on positions of
-            # one degree species which have not been laid out yet
-        else:
-            p_r1 = get_node_pos(g_res.nodes()[r1_tgt])
-            p_r2 = get_node_pos(g_res.nodes()[r2_tgt])
+    l_not_mapped_connecting_species(g_res, tgt_orig, to_layout, nodes_mapping, tmp_stats)
 
-            p_r1_r2 = p_r2 - p_r1
-            p_r1_r2_c = p_r1 + p_r1_r2 / 2
-            p_r1_r2_perp = np.array([p_r1[1] - p_r2[1], p_r2[0] - p_r1[0]])
-            p_r1_r2_perp_n = gr.utils.normalize(p_r1_r2_perp)
 
-            shift = tmp_stats.med_one_degree_species_distance
-
-            p_species = p_r1_r2_c - p_r1_r2_perp_n * shift * int(len(s_ids_to_add) / 2)
-            if len(s_ids_to_add) % 2 == 0:
-                p_species += p_r1_r2_perp_n * shift / 2
-
-            poss = {}
-            for s_id in s_ids_to_add:
-                poss[s_id] = np.array(p_species)
-                p_species += p_r1_r2_perp_n * shift / 2
-
-        # 6. Assign positions to nodes
-        for s_id in s_ids_to_add:
-            set_predicted_position(gu.get_node(g_res, new_ids[s_id]), poss[s_id], copied=False, recompute_reaction_layout=False)
-
-        # https://stackoverflow.com/questions/20890270/how-to-calculate-coordinates-of-the-perpendicular-line
-        # p_r1 = get_node_pos(g_res.nodes()[r1_tgt])
-        # p_r2 = get_node_pos(g_res.nodes()[r2_tgt])
-        #
-        # layout_on_perp_line()
-        #
-        # p_r1_r2 = p_r2 - p_r1
-        # p_r1_r2_c = p_r1 + p_r1_r2 / 2
-        # p_r1_r2_perp = np.array([p_r1[1] - p_r2[1], p_r2[0] - p_r1[0]])
-        # p_r1_r2_perp_n = gr.utils.normalize(p_r1_r2_perp)
-        #
-        # p_species = p_r1_r2_c - p_r1_r2_perp_n * shift * int(len(s_ids_to_add) / 2)
-        # if len(s_ids_to_add) % 2 == 0:
-        #     p_species += p_r1_r2_perp_n * shift / 2
 
 
 
@@ -1925,7 +1931,7 @@ def mark_rs_used(rs_pairs, r, s):
     rs_pairs.append((r, s))
 
 
-def get_node_distance_matrix(nodes1, r1_id, g1, nodes2, r2_id, g2, tmp_n_tgt_n_map = None):
+def get_node_distance_matrix(nodes1: List[str], r1_id: str, g1: nx.MultiGraph, nodes2: List[str], r2_id: str, g2: nx.MultiGraph, tmp_n_tgt_n_map = None):
     """
 
     :param nodes1: target
@@ -1981,6 +1987,15 @@ def node_distance(n1, r1_id, g1, n2, r2_id, g2):
     return dist
 
 def add_node(g: nx.MultiGraph, nodes_mapping:NodesMapping, new_id, source_id, **node_info):
+    """
+
+    :param g:
+    :param nodes_mapping:
+    :param new_id:  ID in the result graph
+    :param source_id: ID in the original target graph
+    :param node_info:
+    :return:
+    """
     entity = gu.get_node_data(node_info)
     entity.set_graph(g)
     entity.set_id(new_id)
-- 
GitLab