#!/usr/bin/env python3 """ add_bpmn_di.py — append a BPMN Diagram Interchange section to a BPMN file. Reads a BPMN file's logical model (process + lanes + nodes + flows), runs a small swim-lane left-to-right auto-layout, and writes a `` block back into the file just before ``. The logical model is preserved byte-for-byte; the DI section is added or, if one already exists, replaced. Usage: add_bpmn_di.py [ ...] The same `compute_layout` / `render_di` functions are reused by the register-transcoder so newly emitted skeletons carry DI from the start. """ import re import sys import xml.etree.ElementTree as ET BPMN = "http://www.omg.org/spec/BPMN/20100524/MODEL" # Standard bpmn.io sizes SIZES = { "startEvent": (36, 36), "endEvent": (36, 36), "intermediateThrowEvent": (36, 36), "intermediateCatchEvent": (36, 36), "exclusiveGateway": (50, 50), "parallelGateway": (50, 50), "inclusiveGateway": (50, 50), "eventBasedGateway": (50, 50), "userTask": (100, 80), "task": (100, 80), "serviceTask": (100, 80), "businessRuleTask": (100, 80), "scriptTask": (100, 80), "manualTask": (100, 80), "sendTask": (100, 80), "receiveTask": (100, 80), "subProcess": (100, 80), "callActivity": (100, 80), } NODE_TAGS = list(SIZES.keys()) # Layout constants LANE_HEADER_W = 30 # left strip for lane label COL_W = 170 # horizontal pitch between columns LEFT_PAD = 60 # padding left of the first column TOP_PAD = 40 # padding above the first lane LANE_H = 180 # lane height def collect_model(proc): """Return (nodes, flows, lanes) from a element.""" b = f"{{{BPMN}}}" nodes = {} for tag in NODE_TAGS: for e in proc.iter(f"{b}{tag}"): nodes[e.get("id")] = tag flows = [] for sf in proc.iter(f"{b}sequenceFlow"): flows.append((sf.get("id"), sf.get("sourceRef"), sf.get("targetRef"))) lanes = [] for lane in proc.iter(f"{b}lane"): lid = lane.get("id") lname = lane.get("name") or lid refs = [r.text.strip() for r in lane.findall(f"{b}flowNodeRef") if r.text and r.text.strip()] lanes.append((lid, lname, refs)) return nodes, flows, lanes def compute_layout(nodes, flows, lanes): """Assign each node a (col, lane_idx). Returns dict id -> (col, lane_idx).""" succ = {n: [] for n in nodes} pred = {n: [] for n in nodes} for _, s, t in flows: if s in succ and t in pred: succ[s].append(t) pred[t].append(s) # Kahn layering — start from indegree-0 nodes (or startEvents if none). indeg = {n: len(pred[n]) for n in nodes} col_of = {} frontier = [n for n in nodes if indeg[n] == 0] if not frontier: frontier = [n for n, t in nodes.items() if t == "startEvent"] if not frontier and nodes: frontier = [next(iter(nodes))] col = 0 while frontier: nxt = [] for n in frontier: if n in col_of: continue col_of[n] = col for m in succ[n]: indeg[m] -= 1 if indeg[m] <= 0 and m not in col_of: nxt.append(m) frontier = nxt col += 1 # Cycle remnants: place them after their best-known predecessor's column. leftover = [n for n in nodes if n not in col_of] guard = 0 while leftover and guard < 1000: progressed = False for n in list(leftover): preds_known = [col_of[p] for p in pred[n] if p in col_of] if preds_known: col_of[n] = max(preds_known) + 1 leftover.remove(n) progressed = True if not progressed: base = max(col_of.values(), default=0) + 1 for n in leftover: col_of[n] = base break guard += 1 # Lane assignment. lane_of = {} for li, (_, _, refs) in enumerate(lanes): for r in refs: if r in nodes: lane_of[r] = li for n in nodes: if n not in lane_of: lane_of[n] = 0 # Disambiguate nodes that share a (col, lane) bucket — assign a sub-index # so they fan out vertically within the lane instead of overlapping. buckets = {} for n in nodes: buckets.setdefault((col_of[n], lane_of[n]), []).append(n) sub_of = {} sub_count = {} for key, members in buckets.items(): sub_count[key] = len(members) for i, n in enumerate(members): sub_of[n] = i return {n: (col_of[n], lane_of[n], sub_of[n], sub_count[(col_of[n], lane_of[n])]) for n in nodes} def render_di(plane_id, nodes, flows, lanes, placement): """Emit a XML string for the given layout.""" if not nodes: return "" max_col = max(c for c, _, _, _ in placement.values()) diagram_w = LANE_HEADER_W + LEFT_PAD + (max_col + 1) * COL_W + 60 n_lanes = max(1, len(lanes)) def node_geom(nid): col, lane_idx, sub_idx, sub_n = placement[nid] tag = nodes[nid] w, h = SIZES.get(tag, (100, 80)) cx = LANE_HEADER_W + LEFT_PAD + col * COL_W + 50 # stagger vertically within the lane if multiple nodes share the bucket lane_cy = TOP_PAD + lane_idx * LANE_H + LANE_H // 2 if sub_n > 1: spacing = min(70, (LANE_H - 20) // sub_n) offset = (sub_idx - (sub_n - 1) / 2) * spacing cy = int(lane_cy + offset) else: cy = lane_cy return cx - w // 2, cy - h // 2, w, h def node_center(nid): x, y, w, h = node_geom(nid) return x + w // 2, y + h // 2 def edge_anchor(nid, going_right): x, y, w, h = node_geom(nid) cx, cy = x + w // 2, y + h // 2 return ((x + w if going_right else x), cy) L = [] L.append(' ') L.append(' ' % plane_id) # Lanes (shapes are full-width strips). if lanes: lane_outer_x = LANE_HEADER_W lane_outer_w = diagram_w - LANE_HEADER_W - 20 for li, (lid, _, _) in enumerate(lanes): ly = TOP_PAD + li * LANE_H L.append(' ' % (lid, lid)) L.append(' ' % (lane_outer_x, ly, lane_outer_w, LANE_H)) L.append(' ') # Node shapes. for nid, tag in nodes.items(): x, y, w, h = node_geom(nid) L.append(' ' % (nid, nid)) L.append(' ' % (x, y, w, h)) L.append(' ') # Edges — orthogonal dogleg between source-right and target-left. for fid, s, t in flows: if s not in nodes or t not in nodes: continue sx, sy = edge_anchor(s, going_right=True) tx, ty = edge_anchor(t, going_right=False) if abs(sy - ty) < 4: wps = [(sx, sy), (tx, ty)] elif tx <= sx: # back-edge (cycle) — route via above the source mid_y = min(sy, ty) - 60 wps = [(sx, sy), (sx + 20, sy), (sx + 20, mid_y), (tx - 20, mid_y), (tx - 20, ty), (tx, ty)] else: mid_x = (sx + tx) // 2 wps = [(sx, sy), (mid_x, sy), (mid_x, ty), (tx, ty)] L.append(' ' % (fid, fid)) for x, y in wps: L.append(' ' % (x, y)) L.append(' ') L.append(' ') L.append(' ') return "\n".join(L) + "\n" def annotate_text(text): """Take BPMN XML text and return XML text with a fresh DI section.""" root = ET.fromstring(text) proc = root.find(f"{{{BPMN}}}process") if proc is None: raise ValueError("no element") nodes, flows, lanes = collect_model(proc) if not nodes: raise ValueError("no nodes in process") placement = compute_layout(nodes, flows, lanes) di = render_di(proc.get("id"), nodes, flows, lanes, placement) text = re.sub( r"\n?\s*\s*\n?", "\n", text, flags=re.MULTILINE) return text.replace("", di + ""), \ (len(nodes), len(flows), len(lanes)) def annotate_bpmn(path): text = open(path, encoding="utf-8").read() new_text, stats = annotate_text(text) with open(path, "w", encoding="utf-8") as fh: fh.write(new_text) return stats def main(argv): if len(argv) < 2: sys.exit(__doc__.strip()) for p in argv[1:]: n, f, l = annotate_bpmn(p) print(f" {p}: {n} nodes / {f} flows / {l} lanes — DI written") if __name__ == "__main__": main(sys.argv)