rewrite 2.0.0: real process — extract the algorithm into DMN

The 1.x package was a single ai.extract call wrapped in three BPMN service tasks. No decision logic, no dmn cornerstone, no weights — the risk/routing/validation algorithm lived invisibly in host code. There was nothing for a runtime to actually execute. 2.0.0 makes it a real process: - dmn cornerstone added with three decision tables: * assess-personal-data-risk — PII regex signals -> risk level * gdpr-processing-route — risk x centralisation -> CENTRAL/LOCAL, anonymisation, redaction level * human-validation-gate — confidence thresholds + PII re-scan -> REJECTED/PENDING_REVIEW/APPROVED_AUTO - BPMN expanded 3 -> 6 nodes (3 serviceTask + 3 businessRuleTask), with horizontal DI. - Task ids, mappings, docs, manifest (dmn:true), uapf.yaml, lifecycle and eval-set updated; added a PII-bearing fixture. Only the semantic extraction remains a model step. Risk classification, GDPR routing and validation gating are now explicit ranked DMN rules — inspectable, versioned, portable. Breaking change: structure + outputs.
2026-05-17 20:00:36 +00:00
parent 3f1d62c748
commit dd69a04355
15 changed files with 496 additions and 120 deletions
--- a/bpmn/semantic-document-analysis.bpmn
+++ b/bpmn/semantic-document-analysis.bpmn
@@ -14,45 +14,89 @@

    <bpmn:startEvent id="Start" name="Document text received"/>

-    <bpmn:serviceTask id="Task_RedactPii"
-                      name="Redact personally identifiable information"
+    <bpmn:serviceTask id="Task_DetectRedactPii"
+                      name="Detect and redact PII"
                      uapf:capability="ai.redact@1">
      <bpmn:documentation>
-        Calls ai.redact@1 to mask names, identifiers, addresses, financial
-        and health data before downstream extraction. Required by
-        resources/guardrails.yaml (GDPR Art. 5 minimisation).
+        Calls ai.redact@1 over the source text. Beyond masking, the host
+        runs the four Latvian PII regex detectors (personas kods, IBAN,
+        e-mail, phone) and returns the deterministic signal set the risk
+        decision consumes: personasKodaPresent, financialDataPresent,
+        contactDataPresent, piiCategoryCount, detectedEntityTypes, plus
+        redactedContent. No model inference — pure pattern detection.
      </bpmn:documentation>
    </bpmn:serviceTask>

+    <bpmn:businessRuleTask id="Decision_AssessRisk"
+                           name="Assess personal-data risk"
+                           uapf:decision="assess-personal-data-risk">
+      <bpmn:documentation>
+        DMN dmn/assess-personal-data-risk.dmn. Maps the PII signal set to
+        personalDataRisk (NONE | LOW | MEDIUM | HIGH) by explicit ranked
+        rules. Personas kods or IBAN forces HIGH; two or more categories
+        or contact data gives MEDIUM. Deterministic and auditable.
+      </bpmn:documentation>
+    </bpmn:businessRuleTask>
+
+    <bpmn:businessRuleTask id="Decision_GdprRoute"
+                           name="Decide GDPR processing route"
+                           uapf:decision="gdpr-processing-route">
+      <bpmn:documentation>
+        DMN dmn/gdpr-processing-route.dmn. From personalDataRisk and
+        allowCentralization decides processingRoute (CENTRAL | LOCAL),
+        anonymizationRequired and redactionLevel. This is the routing
+        rule extracted from the host's generate_semantic_metadata: a
+        sensitive document where centralisation is not permitted stays
+        LOCAL with full redaction.
+      </bpmn:documentation>
+    </bpmn:businessRuleTask>
+
    <bpmn:serviceTask id="Task_ExtractSemantics"
                      name="Extract semantic metadata"
                      uapf:capability="ai.extract@1"
                      uapf:schemaRef="resources/schemas/vdvc-semantic-summary.schema.json">
      <bpmn:documentation>
-        Calls ai.extract@1 with the redacted text and the VDVC v1.1 output
-        schema (resources/schemas/vdvc-semantic-summary.schema.json). The
-        host's AI agent must produce output that validates against that
-        schema. Output records aiModelVersion + aiConfidenceScore per
-        EU AI Act Art. 13.
+        Calls ai.extract@1 on redactedContent with the VDVC v1.1 output
+        schema. This is the single bounded model step: it produces the
+        semanticSummary (topic, summary, keywords, urgency, risk) and
+        must validate against resources/schemas/vdvc-semantic-summary.
+        The host also returns flat aiConfidenceScore and the result of
+        the post-extraction PII re-scan as outputPiiErrorCount.
      </bpmn:documentation>
    </bpmn:serviceTask>

-    <bpmn:serviceTask id="Task_EmitResultEvent"
+    <bpmn:businessRuleTask id="Decision_ValidationGate"
+                           name="Determine human-validation status"
+                           uapf:decision="human-validation-gate">
+      <bpmn:documentation>
+        DMN dmn/human-validation-gate.dmn. From outputPiiErrorCount,
+        aiConfidenceScore and personalDataRisk decides
+        humanValidationStatus (REJECTED | PENDING_REVIEW | APPROVED_AUTO)
+        and requiresHumanReview. Any leaked PII or confidence below 0.3
+        rejects; below 0.7, or HIGH risk, forces review; 0.7 and above
+        with clean output auto-approves. The thresholds are the weights.
+      </bpmn:documentation>
+    </bpmn:businessRuleTask>
+
+    <bpmn:serviceTask id="Task_EmitResult"
                      name="Emit semantic-analysis-completed event"
                      uapf:capability="event.emit@1"
                      uapf:eventType="document.semantic-analysis.completed.v1">
      <bpmn:documentation>
-        Calls event.emit@1 to publish a CloudEvent containing the extracted
-        semantic summary. Downstream processes consume this event.
+        Calls event.emit@1 to publish a CloudEvent carrying the semantic
+        summary, the routing decision and the validation status.
      </bpmn:documentation>
    </bpmn:serviceTask>

    <bpmn:endEvent id="End" name="Semantic analysis complete"/>

-    <bpmn:sequenceFlow id="f1" sourceRef="Start"                 targetRef="Task_RedactPii"/>
-    <bpmn:sequenceFlow id="f2" sourceRef="Task_RedactPii"        targetRef="Task_ExtractSemantics"/>
-    <bpmn:sequenceFlow id="f3" sourceRef="Task_ExtractSemantics" targetRef="Task_EmitResultEvent"/>
-    <bpmn:sequenceFlow id="f4" sourceRef="Task_EmitResultEvent"  targetRef="End"/>
+    <bpmn:sequenceFlow id="f1" sourceRef="Start"                 targetRef="Task_DetectRedactPii"/>
+    <bpmn:sequenceFlow id="f2" sourceRef="Task_DetectRedactPii"  targetRef="Decision_AssessRisk"/>
+    <bpmn:sequenceFlow id="f3" sourceRef="Decision_AssessRisk"   targetRef="Decision_GdprRoute"/>
+    <bpmn:sequenceFlow id="f4" sourceRef="Decision_GdprRoute"    targetRef="Task_ExtractSemantics"/>
+    <bpmn:sequenceFlow id="f5" sourceRef="Task_ExtractSemantics" targetRef="Decision_ValidationGate"/>
+    <bpmn:sequenceFlow id="f6" sourceRef="Decision_ValidationGate" targetRef="Task_EmitResult"/>
+    <bpmn:sequenceFlow id="f7" sourceRef="Task_EmitResult"       targetRef="End"/>

  </bpmn:process>

@@ -61,33 +105,54 @@
      <bpmndi:BPMNShape id="Start_di" bpmnElement="Start">
        <dc:Bounds x="152" y="102" width="36" height="36"/>
      </bpmndi:BPMNShape>
-      <bpmndi:BPMNShape id="Task_RedactPii_di" bpmnElement="Task_RedactPii">
-        <dc:Bounds x="240" y="80" width="100" height="80"/>
+      <bpmndi:BPMNShape id="Task_DetectRedactPii_di" bpmnElement="Task_DetectRedactPii">
+        <dc:Bounds x="240" y="90" width="110" height="80"/>
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="Decision_AssessRisk_di" bpmnElement="Decision_AssessRisk">
+        <dc:Bounds x="410" y="90" width="110" height="80"/>
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="Decision_GdprRoute_di" bpmnElement="Decision_GdprRoute">
+        <dc:Bounds x="580" y="90" width="120" height="80"/>
      </bpmndi:BPMNShape>
      <bpmndi:BPMNShape id="Task_ExtractSemantics_di" bpmnElement="Task_ExtractSemantics">
-        <dc:Bounds x="420" y="80" width="100" height="80"/>
+        <dc:Bounds x="760" y="90" width="110" height="80"/>
      </bpmndi:BPMNShape>
-      <bpmndi:BPMNShape id="Task_EmitResultEvent_di" bpmnElement="Task_EmitResultEvent">
-        <dc:Bounds x="600" y="80" width="100" height="80"/>
+      <bpmndi:BPMNShape id="Decision_ValidationGate_di" bpmnElement="Decision_ValidationGate">
+        <dc:Bounds x="930" y="90" width="120" height="80"/>
+      </bpmndi:BPMNShape>
+      <bpmndi:BPMNShape id="Task_EmitResult_di" bpmnElement="Task_EmitResult">
+        <dc:Bounds x="1110" y="90" width="110" height="80"/>
      </bpmndi:BPMNShape>
      <bpmndi:BPMNShape id="End_di" bpmnElement="End">
-        <dc:Bounds x="780" y="102" width="36" height="36"/>
+        <dc:Bounds x="1290" y="102" width="36" height="36"/>
      </bpmndi:BPMNShape>
      <bpmndi:BPMNEdge id="f1_di" bpmnElement="f1">
        <di:waypoint x="188" y="120"/>
        <di:waypoint x="240" y="120"/>
      </bpmndi:BPMNEdge>
      <bpmndi:BPMNEdge id="f2_di" bpmnElement="f2">
-        <di:waypoint x="340" y="120"/>
-        <di:waypoint x="420" y="120"/>
+        <di:waypoint x="350" y="120"/>
+        <di:waypoint x="410" y="120"/>
      </bpmndi:BPMNEdge>
      <bpmndi:BPMNEdge id="f3_di" bpmnElement="f3">
        <di:waypoint x="520" y="120"/>
-        <di:waypoint x="600" y="120"/>
+        <di:waypoint x="580" y="120"/>
      </bpmndi:BPMNEdge>
      <bpmndi:BPMNEdge id="f4_di" bpmnElement="f4">
        <di:waypoint x="700" y="120"/>
-        <di:waypoint x="780" y="120"/>
+        <di:waypoint x="760" y="120"/>
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="f5_di" bpmnElement="f5">
+        <di:waypoint x="870" y="120"/>
+        <di:waypoint x="930" y="120"/>
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="f6_di" bpmnElement="f6">
+        <di:waypoint x="1050" y="120"/>
+        <di:waypoint x="1110" y="120"/>
+      </bpmndi:BPMNEdge>
+      <bpmndi:BPMNEdge id="f7_di" bpmnElement="f7">
+        <di:waypoint x="1220" y="120"/>
+        <di:waypoint x="1290" y="120"/>
      </bpmndi:BPMNEdge>
    </bpmndi:BPMNPlane>
  </bpmndi:BPMNDiagram>