Merge pull request 'feat(3.2.0): align with UAPF v2.5.0 — embed algorithm card tests, drop sidecar' (#3 ) from v3.2.0-embedded-tests into main

Reviewed-on: #3
feat(3.2.0): align with UAPF v2.5.0 — embed algorithm card tests, drop sidecar
2026-05-21 08:27:54 +00:00 · 2026-05-21 08:02:26 +00:00 · 2026-05-20 14:51:21 +00:00
6 changed files with 237 additions and 158 deletions
--- a/README.md
+++ b/README.md
@@ -98,3 +98,15 @@ Validates against UAPF v2.4.0 schemas at
 ```bash
 python tools/uapf-cli/uapf.py validate /path/to/dokumenta-semantiska-analize
 ```
+
+## v3.2.0 (UAPF v2.5.0 alignment)
+
+Tests are now **embedded in each algorithm card** under a top-level `tests:` array (minimum 2 entries per card). The old sidecar location `tests/algorithms/<card-id>.test.yaml` is **removed** per UAPF v2.5.0 — that location no longer applies to algorithm cards.
+
+Embedded tests for this package:
+- `algo.semantic_document_analysis.pii_redactor` — 3 cases (Latvian personas kods inline, plain text with no PII, financial figures + IBAN)
+- `algo.semantic_document_analysis.vdvc_semantic_extractor` — 2 cases (regulatory complaint, non-regulatory thank-you), both with `ai_confidence_score` tolerance bands appropriate for a stochastic LLM extractor
+- `algo.semantic_document_analysis.completion_event_emitter` — 2 cases (success completion, failure completion)
+
+The Algorithm Card viewer (UAPF v2.5.0 chapter 13.16, ProcessGit Preview tab) consumes these embedded tests as its primary interaction surface — sample browser for `external` cards, regex/FEEL/source-display for `inline` cards.
+
--- a/algorithms/completion_event_emitter.card.yaml
+++ b/algorithms/completion_event_emitter.card.yaml
@@ -1,17 +1,10 @@
 kind: uapf.algorithm.card
-
 id: algo.semantic_document_analysis.completion_event_emitter
-version: "1.0.0"
-name: "Process completion event emitter"
-intent: >
-  Publishes a CloudEvents 1.0-conformant event marking the completion
-  of one semantic analysis cycle, with the DMN-decided fields
-  (personal data risk, processing route, redaction level, human
-  validation status) attached. Personal data is NEVER included in
-  the emitted payload — only the deterministic classification fields.
-
+version: 1.0.0
+name: Process completion event emitter
+intent: |
+  Publishes a CloudEvents 1.0-conformant event marking the completion of one semantic analysis cycle, with the DMN-decided fields (personal data risk, processing route, redaction level, human validation status) attached. Personal data is NEVER included in the emitted payload — only the deterministic classification fields.
 algorithm_kind: emitter
-
 io:
  inputs:
  - id: event_type
@@ -23,42 +16,55 @@ io:
  outputs:
  - id: published
    type: boolean
-
 implementation:
  type: external
  medium: mcp_tool
-  uri: "uapf-ip://capability/event.emit@1"
-  hash: "sha256:0000000000000000000000000000000000000000000000000000000000000000"
+  uri: uapf-ip://capability/event.emit@1
+  hash: sha256:0000000000000000000000000000000000000000000000000000000000000000
  runtime:
-    capability: "event.emit@1"
-    cloud_events_spec: "1.0"
-
+    capability: event.emit@1
+    cloud_events_spec: '1.0'
 determinism: deterministic
 side_effects: writes_state
-
 confidence:
  type: none
-
 complexity:
  typical_latency_ms: 25
  max_latency_ms: 1000
-
-failure_mode: "throw — process must complete reliably or fail loudly."
-
+failure_mode: throw — process must complete reliably or fail loudly.
 reference:
-  standard: "CloudEvents 1.0"
-  url: "https://github.com/cloudevents/spec/blob/v1.0/spec.md"
-
+  standard: CloudEvents 1.0
+  url: https://github.com/cloudevents/spec/blob/v1.0/spec.md
 owners:
-  - type: team
+- type: team
  id: uapf-stewards
  contact: stewards@uapf.dev
-
 lifecycle:
  status: draft
-  since: "2026-05-20"
-
+  since: '2026-05-20'
 audit:
  log_inputs: full
  log_outputs: full
-  retention: "1y"
+  retention: 1y
+tests:
+- name: Successful analysis completion
+  description: Standard happy-path completion event with full payload.
+  inputs:
+    event_type: dev.dokumenta.semantic_analysis.completed
+    payload:
+      document_id: doc-2026-05-21-001
+      outcome: ok
+      confidence: 0.87
+  expected_outputs:
+    published: true
+- name: Analysis failure completion
+  description: Failure-path completion event still emits successfully (the emitter
+    does not gate on payload contents).
+  inputs:
+    event_type: dev.dokumenta.semantic_analysis.failed
+    payload:
+      document_id: doc-2026-05-21-002
+      outcome: extraction_failed
+      reason: low_confidence
+  expected_outputs:
+    published: true
--- a/algorithms/pii_redactor.card.yaml
+++ b/algorithms/pii_redactor.card.yaml
@@ -1,17 +1,10 @@
 kind: uapf.algorithm.card
-
 id: algo.semantic_document_analysis.pii_redactor
-version: "1.0.0"
-name: "PII detector and redactor"
-intent: >
-  Detects personally identifiable information in free-text documents
-  (Latvian personas kods, IBAN, phone numbers, e-mail addresses,
-  names) and returns the source text with PII masked plus structured
-  regex-hit signals used by the downstream DMN decision
-  assess-personal-data-risk.
-
+version: 1.0.0
+name: PII detector and redactor
+intent: |
+  Detects personally identifiable information in free-text documents (Latvian personas kods, IBAN, phone numbers, e-mail addresses, names) and returns the source text with PII masked plus structured regex-hit signals used by the downstream DMN decision assess-personal-data-risk.
 algorithm_kind: redactor
-
 io:
  inputs:
  - id: content
@@ -19,14 +12,14 @@ io:
    cardinality: single
    constraints:
      maxLength: 200000
-      documentation: "Raw document text submitted for semantic analysis."
+    documentation: Raw document text submitted for semantic analysis.
  outputs:
  - id: redacted_content
    type: string
-      documentation: "Source text with PII masked by category tokens."
+    documentation: Source text with PII masked by category tokens.
  - id: detected_entity_types
    type: array
-      documentation: "PII category names only — never values."
+    documentation: PII category names only — never values.
  - id: personas_koda_present
    type: boolean
  - id: financial_data_present
@@ -35,53 +28,90 @@ io:
    type: boolean
  - id: pii_category_count
    type: integer
-      constraints: { minimum: 0 }
-
+    constraints:
+      minimum: 0
 implementation:
  type: external
  medium: mcp_tool
-  uri: "uapf-ip://capability/ai.redact@1"
-  hash: "sha256:0000000000000000000000000000000000000000000000000000000000000000"
+  uri: uapf-ip://capability/ai.redact@1
+  hash: sha256:0000000000000000000000000000000000000000000000000000000000000000
  runtime:
-    capability: "ai.redact@1"
-    note: "Host-fulfilled UAPF-IP capability. Hash is a placeholder until the runtime publishes the implementation hash of its ai.redact handler."
-
+    capability: ai.redact@1
+    note: Host-fulfilled UAPF-IP capability. Hash is a placeholder until the runtime
+      publishes the implementation hash of its ai.redact handler.
 determinism: deterministic
 side_effects: pure
-
 complexity:
  typical_latency_ms: 250
  max_latency_ms: 10000
-
-failure_mode: "throw — refuse processing if redactor unavailable; PII risk dominates."
-
+failure_mode: throw — refuse processing if redactor unavailable; PII risk dominates.
 limitations:
-  - "Latviešu valodas personu vārdi atpazīstami ~92% gadījumu"
-  - "Pieņem, ka teksts jau ir digitāls — OCR nav iekļauta"
-
+- Latviešu valodas personu vārdi atpazīstami ~92% gadījumu
+- Pieņem, ka teksts jau ir digitāls — OCR nav iekļauta
 reference:
-  legal: "GDPR 2016/679 5. pants (datu minimizēšana); Fizisko personu datu apstrādes likums."
-  standard: "NIST SP 800-188 — De-Identification of Personal Information."
-
+  legal: GDPR 2016/679 5. pants (datu minimizēšana); Fizisko personu datu apstrādes
+    likums.
+  standard: NIST SP 800-188 — De-Identification of Personal Information.
 owners:
-  - type: role
+- type: role
  id: data_protection_officer
  contact: stewards@uapf.dev
-
 lifecycle:
  status: draft
-  since: "2026-05-20"
-
+  since: '2026-05-20'
 audit:
  log_inputs: redacted
  log_outputs: full
-  retention: "7y"
-
+  retention: 7y
 privacy:
  processesPII: true
  technique: pseudonymization
  reidentificationRisk: low
-
 risk:
  aiActRiskClass: limited
  humanOversight: advisory
+tests:
+- name: Latvian personas kods inline in text
+  description: Standard 11-character Latvian personal identity code (NNNNNN-NNNNN)
+    should be detected and redacted.
+  inputs:
+    content: 'Lūgums izskatīt iesniegumu. Iesniedzējs: Jānis Bērziņš, personas kods:
+      010101-12345. Adrese: Brīvības iela 1, Rīga.'
+  expected_outputs:
+    redacted_content: 'Lūgums izskatīt iesniegumu. Iesniedzējs: [NAME], personas kods:
+      [REDACTED]. Adrese: [ADDRESS].'
+    detected_entity_types:
+    - PERSONAS_KODS
+    - PERSON
+    - ADDRESS
+    personas_koda_present: true
+    financial_data_present: false
+    contact_data_present: true
+    pii_category_count: 3
+- name: Plain administrative text with no PII
+  description: Generic administrative paragraph; nothing to redact. Verifies the redactor
+    doesn't false-positive on plain text.
+  inputs:
+    content: Iesniegums tiek izskatīts atbilstoši normatīvajiem aktiem. Lēmums tiks
+      paziņots noteiktajā kārtībā.
+  expected_outputs:
+    redacted_content: Iesniegums tiek izskatīts atbilstoši normatīvajiem aktiem. Lēmums
+      tiks paziņots noteiktajā kārtībā.
+    detected_entity_types: []
+    personas_koda_present: false
+    financial_data_present: false
+    contact_data_present: false
+    pii_category_count: 0
+- name: Financial figures and account numbers
+  description: EUR amounts and IBAN — both detected as financial PII; no personas_kods.
+  inputs:
+    content: Maksājums EUR 1250.00 pārskaitīts uz kontu LV80BANK0000435195001.
+  expected_outputs:
+    redacted_content: Maksājums EUR [AMOUNT] pārskaitīts uz kontu [IBAN].
+    detected_entity_types:
+    - AMOUNT
+    - IBAN
+    personas_koda_present: false
+    financial_data_present: true
+    contact_data_present: false
+    pii_category_count: 2
--- a/algorithms/vdvc_semantic_extractor.card.yaml
+++ b/algorithms/vdvc_semantic_extractor.card.yaml
@@ -1,18 +1,10 @@
 kind: uapf.algorithm.card
-
 id: algo.semantic_document_analysis.vdvc_semantic_extractor
-version: "1.0.0"
-name: "VDVC semantic metadata extractor"
-intent: >
-  Extracts a VDVC v1.1-conformant structured semantic summary from
-  the redacted document text — primary topic, keywords,
-  classification, summary, sensitivity signals. Output validates
-  against resources/schemas/vdvc-semantic-summary.schema.json. This
-  is the sole model-inference step in the process; everything else
-  in the package is deterministic.
-
+version: 1.0.0
+name: VDVC semantic metadata extractor
+intent: |
+  Extracts a VDVC v1.1-conformant structured semantic summary from the redacted document text — primary topic, keywords, classification, summary, sensitivity signals. Output validates against resources/schemas/vdvc-semantic-summary.schema.json. This is the sole model-inference step in the process; everything else in the package is deterministic.
 algorithm_kind: extractor
-
 io:
  inputs:
  - id: redacted_content
@@ -20,69 +12,108 @@ io:
    cardinality: single
    constraints:
      maxLength: 200000
-      documentation: "Output of the upstream PII redactor."
+    documentation: Output of the upstream PII redactor.
  - id: schema_ref
    type: string
-      documentation: "Path to the JSON Schema the output must validate against."
+    documentation: Path to the JSON Schema the output must validate against.
  outputs:
  - id: semantic_summary
    type: object
-      schema: "../resources/schemas/vdvc-semantic-summary.schema.json"
+    schema: ../resources/schemas/vdvc-semantic-summary.schema.json
  - id: sensitivity_control
    type: object
  - id: ai_confidence_score
    type: probability
  - id: output_pii_error_count
    type: integer
-      constraints: { minimum: 0 }
-
+    constraints:
+      minimum: 0
 implementation:
  type: external
  medium: llm_prompt
-  uri: "uapf-ip://capability/ai.extract@1"
-  hash: "sha256:0000000000000000000000000000000000000000000000000000000000000000"
+  uri: uapf-ip://capability/ai.extract@1
+  hash: sha256:0000000000000000000000000000000000000000000000000000000000000000
  runtime:
-    capability: "ai.extract@1"
-    note: "Host-fulfilled UAPF-IP capability. Specific model identity and prompt hash are runtime concerns of the host; the Card declares the contract, not the implementation choice."
-
+    capability: ai.extract@1
+    note: Host-fulfilled UAPF-IP capability. Specific model identity and prompt hash
+      are runtime concerns of the host; the Card declares the contract, not the implementation
+      choice.
 determinism: stochastic
 side_effects: external_call
-
 confidence:
  type: probability
-  threshold: 0.70
-  below_threshold: "route-to:human.legal_reviewer (enforced by DMN human-validation-gate)"
-
+  threshold: 0.7
+  below_threshold: route-to:human.legal_reviewer (enforced by DMN human-validation-gate)
 complexity:
  typical_latency_ms: 8000
  max_latency_ms: 60000
-
-failure_mode: "default:null + flag — DMN human-validation-gate routes low-confidence outputs to PENDING_REVIEW."
-
+failure_mode: default:null + flag — DMN human-validation-gate routes low-confidence
+  outputs to PENDING_REVIEW.
 limitations:
-  - "Garie dokumenti (>50 000 znaki) tiek apgriezti — pirmie 50K + pēdējie 5K"
-  - "Nav juridisks vērtējums — tikai semantiska klasifikācija"
-  - "Latviešu valodas juridiskā retorika var samazināt recall"
-
+- Garie dokumenti (>50 000 znaki) tiek apgriezti — pirmie 50K + pēdējie 5K
+- Nav juridisks vērtējums — tikai semantiska klasifikācija
+- Latviešu valodas juridiskā retorika var samazināt recall
 reference:
-  legal: "EU AI Act 2024/1689, Pielikums III (augstā riska MI sistēmas), 13. pants (caurspīdība)."
-  url: "https://eur-lex.europa.eu/eli/reg/2024/1689/oj"
-
+  legal: EU AI Act 2024/1689, Pielikums III (augstā riska MI sistēmas), 13. pants
+    (caurspīdība).
+  url: https://eur-lex.europa.eu/eli/reg/2024/1689/oj
 owners:
-  - type: team
+- type: team
  id: uapf-stewards
  contact: stewards@uapf.dev
-
 lifecycle:
  status: draft
-  since: "2026-05-20"
-
+  since: '2026-05-20'
 audit:
  log_inputs: redacted
  log_outputs: full
-  retention: "7y"
-
+  retention: 7y
 risk:
  aiActRiskClass: high
  humanOversight: mandatory
  transparencyTier: tier-3-full
+tests:
+- name: Regulatory iesniegums about administrative decision
+  description: Typical Latvian administrative complaint with redacted PII. The extractor
+    should identify topic + risk + applicable regulation.
+  inputs:
+    redacted_content: Iesniedzējs [NAME] iesniedza sūdzību par būvvaldes lēmumu Nr.
+      12345 atteikt būvatļauju adresē [ADDRESS]. Tiek lūgts pārskatīt lēmumu.
+    schema_ref: schemas/iesniegums/v1
+  expected_outputs:
+    semantic_summary:
+      topic: construction-permit-appeal
+      subject_area: administrative-law
+      applicable_regulations:
+      - BL
+      - APL
+      language: lv
+    sensitivity_control:
+      contains_decision_reference: true
+      external_communication_recommended: false
+    ai_confidence_score: 0.87
+    output_pii_error_count: 0
+  tolerance:
+    ai_confidence_score: 0.1
+    output_pii_error_count: 0
+- name: Non-regulatory thank-you note
+  description: Out-of-domain input. Extractor should yield low-confidence summary
+    and a sensitivity flag that no decision is referenced.
+  inputs:
+    redacted_content: Paldies par jūsu pakalpojumu! Bija ļoti patīkami sadarboties
+      ar [NAME] no jūsu komandas.
+    schema_ref: schemas/iesniegums/v1
+  expected_outputs:
+    semantic_summary:
+      topic: non-actionable-correspondence
+      subject_area: feedback
+      applicable_regulations: []
+      language: lv
+    sensitivity_control:
+      contains_decision_reference: false
+      external_communication_recommended: false
+    ai_confidence_score: 0.62
+    output_pii_error_count: 0
+  tolerance:
+    ai_confidence_score: 0.15
+    output_pii_error_count: 0
--- a/manifest.json
+++ b/manifest.json
@@ -4,7 +4,7 @@
  "name": "Semantic Document Analysis",
  "description": "Level-4 UAPF process for semantic analysis of free-text documents.\n\nThree BPMN service tasks invoke the UAPF-IP capabilities ai.redact@1,\nai.extract@1 and event.emit@1. Three DMN decision tables encode the\ndeterministic algorithm the host previously hid inside application\ncode: assess-personal-data-risk maps PII regex signals to a risk\nlevel; gdpr-processing-route selects CENTRAL vs LOCAL processing,\nanonymisation and redaction level; human-validation-gate applies the\nconfidence thresholds that decide REJECTED / PENDING_REVIEW /\nAPPROVED_AUTO.\n\nOnly the semantic extraction is a model step. Risk classification,\nGDPR routing and the validation gate are explicit ranked rules in\nversioned DMN \u2014 inspectable, auditable, portable. Extraction output\nvalidates against the VDVC v1.1 semantic-summary JSON Schema.\n\nv3.1.0: aligned with UAPF v2.4.0 \u2014 Algorithm Card references move\nfrom resource targets to the BPMN service tasks themselves (via\nuapf24:algorithmCardRef attribute). Each card's io block is also\ndenormalised into a <bpmn:ioSpecification> on the task so inputs\nand outputs render as visible data objects on the diagram. The\ncards themselves and the DMN decisions are unchanged from v3.0.0.\n",
  "level": 4,
-  "version": "3.1.0",
+  "version": "3.2.0",
  "requires_capabilities": [
    "ai.redact@1+",
    "ai.extract@1+",
--- a/uapf.yaml
+++ b/uapf.yaml
@@ -26,7 +26,7 @@ description: |
  cards themselves and the DMN decisions are unchanged from v3.0.0.

 level: 4
-version: "3.1.0"
+version: "3.2.0"

 # ── UAPF-IP integration (capability needs + profile + guardrails) ──
 requires_capabilities: