diff --git a/csharp/Link.Foundation.Links.Notation.Tests/MultiRefTests.cs b/csharp/Link.Foundation.Links.Notation.Tests/MultiRefTests.cs new file mode 100644 index 00000000..e1ba21d6 --- /dev/null +++ b/csharp/Link.Foundation.Links.Notation.Tests/MultiRefTests.cs @@ -0,0 +1,128 @@ +using Xunit; + +namespace Link.Foundation.Links.Notation.Tests +{ + /// + /// Multi-Reference Feature Tests (Issue #184) + /// Tests for multi-word references without quotes: + /// - (some example: some example is a link) + /// - ID as multi-word string: "some example" + /// + public static class MultiRefTests + { + [Fact] + public static void ParsesTwoWordMultiReferenceId() + { + var parser = new Parser(); + var result = parser.Parse("(some example: value)"); + Assert.Single(result); + // Multi-word ID should be joined with space + Assert.Equal("some example", result[0].Id); + Assert.Single(result[0].Values); + } + + [Fact] + public static void ParsesThreeWordMultiReferenceId() + { + var parser = new Parser(); + var result = parser.Parse("(new york city: value)"); + Assert.Single(result); + Assert.Equal("new york city", result[0].Id); + } + + [Fact] + public static void ParsesSingleWordIdBackwardCompatible() + { + var parser = new Parser(); + var result = parser.Parse("(papa: value)"); + Assert.Single(result); + Assert.Equal("papa", result[0].Id); + } + + [Fact] + public static void ParsesQuotedMultiWordIdBackwardCompatible() + { + var parser = new Parser(); + var result = parser.Parse("('some example': value)"); + Assert.Single(result); + // Quoted ID should be preserved as-is + Assert.Equal("some example", result[0].Id); + } + + [Fact] + public static void FormatMultiReferenceId() + { + var parser = new Parser(); + var result = parser.Parse("(some example: value)"); + var formatted = result.Format(); + // Multi-reference IDs are formatted with quotes (normalized form) + Assert.Equal("('some example': value)", formatted); + } + + [Fact] + public static void RoundTripMultiReference() + { + var parser = new Parser(); + var input = "(new york city: great)"; + var result = parser.Parse(input); + var formatted = result.Format(); + // Round-trip normalizes multi-word ID to quoted form + Assert.Equal("('new york city': great)", formatted); + } + + [Fact] + public static void ParsesIndentedSyntaxMultiReference() + { + var parser = new Parser(); + var input = "some example:\n value1\n value2"; + var result = parser.Parse(input); + Assert.Single(result); + Assert.Equal("some example", result[0].Id); + Assert.Equal(2, result[0].Values?.Count); + } + + [Fact] + public static void BackwardCompatibilitySingleLine() + { + var parser = new Parser(); + var result = parser.Parse("papa: loves mama"); + Assert.Single(result); + Assert.Equal("papa", result[0].Id); + Assert.Equal(2, result[0].Values?.Count); + } + + [Fact] + public static void BackwardCompatibilityParenthesized() + { + var parser = new Parser(); + var result = parser.Parse("(papa: loves mama)"); + Assert.Single(result); + Assert.Equal("papa", result[0].Id); + Assert.Equal(2, result[0].Values?.Count); + } + + [Fact] + public static void BackwardCompatibilityNested() + { + var parser = new Parser(); + var result = parser.Parse("(outer: (inner: value))"); + Assert.Single(result); + Assert.Equal("outer", result[0].Id); + Assert.Single(result[0].Values); + Assert.Equal("inner", result[0].Values?[0].Id); + } + + [Fact] + public static void MultiRefWithMultipleValues() + { + var parser = new Parser(); + var result = parser.Parse("(some example: one two three)"); + Assert.Single(result); + Assert.Equal("some example", result[0].Id); + Assert.Equal(3, result[0].Values?.Count); + Assert.Equal("one", result[0].Values?[0].Id); + Assert.Equal("two", result[0].Values?[1].Id); + Assert.Equal("three", result[0].Values?[2].Id); + } + } +} diff --git a/csharp/Link.Foundation.Links.Notation/Parser.peg b/csharp/Link.Foundation.Links.Notation/Parser.peg index a715a998..204eacb7 100644 --- a/csharp/Link.Foundation.Links.Notation/Parser.peg +++ b/csharp/Link.Foundation.Links.Notation/Parser.peg @@ -87,11 +87,16 @@ multiLineValueAndWhitespace > = value:referenceOrLink _ { value } multiLineValues >> = _ list:multiLineValueAndWhitespace* { list } singleLineValueAndWhitespace > = __ value:referenceOrLink { value } singleLineValues >> = list:singleLineValueAndWhitespace+ { list } -singleLineLink > = __ id:(reference) __ ":" v:singleLineValues { new Link(id, v) } -multiLineLink > = "(" _ id:(reference) _ ":" v:multiLineValues _ ")" { new Link(id, v) } +singleLineLink > = __ id:multiRefId __ ":" v:singleLineValues { new Link(id, v) } +multiLineLink > = "(" _ id:multiRefId _ ":" v:multiLineValues _ ")" { new Link(id, v) } singleLineValueLink > = v:singleLineValues { new Link(v) } multiLineValueLink > = "(" v:multiLineValues _ ")" { new Link(v) } -indentedIdLink > = id:(reference) __ ":" eol { new Link(id) } +indentedIdLink > = id:multiRefId __ ":" eol { new Link(id) } + +// Multi-reference ID: space-separated words before colon (joined with space) +// For backward compatibility, single word remains as-is +multiRefId = refs:multiRefIdParts { string.Join(" ", refs) } +multiRefIdParts > = first:reference rest:(__ !(":" / eol / ")") r:reference { r })* { new List { first }.Concat(rest).ToList() } // Reference can be quoted (with any number of quotes) or simple unquoted // Order: high quotes (3+) first, then double quotes (2), then single quotes (1), then simple diff --git a/experiments/multi_reference_design.md b/experiments/multi_reference_design.md new file mode 100644 index 00000000..538e60d2 --- /dev/null +++ b/experiments/multi_reference_design.md @@ -0,0 +1,61 @@ +# Multi-Reference Feature Design (Issue #184) + +## Overview + +This document outlines the design for supporting multi-references in Links Notation. + +## Current Behavior + +``` +Input: (papa: loves mama) +Parsed: Link(id="papa", values=[Ref("loves"), Ref("mama")]) +``` + +For multi-word references, quoting is required: +``` +Input: ('some example': value) +Parsed: Link(id="some example", values=[Ref("value")]) +``` + +## Proposed Behavior + +### Multi-Reference Definition + +When a colon appears after multiple space-separated words, those words form a multi-reference: + +``` +Input: (some example: some example is a link) +Parsed: Link(id=["some", "example"], values=[MultiRef(["some", "example"]), Ref("is"), Ref("a"), Ref("link")]) +``` + +### Key Changes + +1. **ID field becomes an array**: + - Single-word: `id = ["papa"]` + - Multi-word: `id = ["some", "example"]` + +2. **Values remain an array** but can contain multi-references: + - `values = [MultiRef(["some", "example"]), Ref("is"), ...]` + +3. **Context-aware parsing**: + - First pass: Identify all multi-reference definitions (IDs before colons) + - Second pass: When parsing values, check if consecutive tokens form a known multi-reference + +## Implementation Strategy + +### Phase 1: Data Structure Changes +- Change `id` from `string | null` to `string[] | null` +- Add helper methods for multi-reference comparison + +### Phase 2: Parser Changes +- Collect multi-reference definitions during parsing +- When parsing values, check for multi-reference matches + +### Phase 3: Formatter Changes +- Format multi-word IDs without quotes (when possible) +- Preserve backward compatibility with quoted strings + +## Backward Compatibility + +- Quoted strings (`'some example'`) still work as single-token references +- Single-word IDs work the same way: `papa` -> `id = ["papa"]` diff --git a/experiments/test_multi_reference.js b/experiments/test_multi_reference.js new file mode 100644 index 00000000..31f0e40d --- /dev/null +++ b/experiments/test_multi_reference.js @@ -0,0 +1,119 @@ +/** + * Multi-Reference Feature Experiment (Issue #184) + * + * This script tests the concept of multi-references where + * multiple space-separated words before a colon form a single reference. + */ + +import { Parser, Link, formatLinks } from '../js/src/index.js'; + +const parser = new Parser(); + +console.log('=== Multi-Reference Feature Tests (Issue #184) ===\n'); + +// Test 1: Single-word ID (backward compatibility) +const test1 = 'papa: loves mama'; +console.log('Test 1 - Single-word ID (backward compatible):'); +console.log('Input:', test1); +try { + const result1 = parser.parse(test1); + console.log('Parsed:', JSON.stringify(result1, null, 2)); + console.log('Formatted:', formatLinks(result1, true)); + console.log('✅ Pass: Single-word ID still works'); +} catch (e) { + console.log('❌ Fail:', e.message); +} +console.log(); + +// Test 2: Quoted multi-word ID (backward compatibility) +const test2 = "('some example': value)"; +console.log('Test 2 - Quoted multi-word ID (backward compatible):'); +console.log('Input:', test2); +try { + const result2 = parser.parse(test2); + console.log('Parsed:', JSON.stringify(result2, null, 2)); + console.log('Formatted:', formatLinks(result2, true)); + console.log('✅ Pass: Quoted multi-word ID still works'); +} catch (e) { + console.log('❌ Fail:', e.message); +} +console.log(); + +// Test 3: Unquoted multi-word ID (NEW FEATURE) +const test3 = '(some example: some example is a link)'; +console.log('Test 3 - Unquoted multi-word ID (NEW):'); +console.log('Input:', test3); +try { + const result3 = parser.parse(test3); + console.log('Parsed:', JSON.stringify(result3, null, 2)); + console.log('Formatted:', formatLinks(result3, true)); + // Check if ID is an array with 2 elements + if (Array.isArray(result3[0].id) && result3[0].id.length === 2) { + console.log('✅ Pass: Multi-reference ID parsed as array:', result3[0].id); + } else { + console.log('⚠️ ID is not an array:', result3[0].id); + } +} catch (e) { + console.log('❌ Fail:', e.message); +} +console.log(); + +// Test 4: Context-aware multi-reference recognition in values +const test4 = '(some example: some example is a link)'; +console.log('Test 4 - Context-aware multi-reference in values:'); +console.log('Input:', test4); +try { + const result4 = parser.parse(test4); + console.log('Values count:', result4[0].values.length); + console.log('First value:', result4[0].values[0]); + // Check if "some example" in values is recognized as a single multi-ref + if (Array.isArray(result4[0].values[0].id) && + result4[0].values[0].id.length === 2 && + result4[0].values[0].id[0] === 'some' && + result4[0].values[0].id[1] === 'example') { + console.log('✅ Pass: "some example" recognized as multi-reference in values'); + } else { + console.log('⚠️ Multi-reference not recognized:', result4[0].values[0].id); + } +} catch (e) { + console.log('❌ Fail:', e.message); +} +console.log(); + +// Test 5: Multiple multi-references in one document +const test5 = `(some example: some example is a link) +some example`; +console.log('Test 5 - Self-reference (multi-ref used standalone):'); +console.log('Input:', test5); +try { + const result5 = parser.parse(test5); + console.log('Parsed links count:', result5.length); + console.log('Second link:', JSON.stringify(result5[1], null, 2)); +} catch (e) { + console.log('❌ Fail:', e.message); +} +console.log(); + +// Test 6: Mixed references (single and multi) +const test6 = '(new york city: new york city is great)'; +console.log('Test 6 - Three-word multi-reference:'); +console.log('Input:', test6); +try { + const result6 = parser.parse(test6); + console.log('Parsed:', JSON.stringify(result6, null, 2)); + console.log('ID:', result6[0].id); + console.log('Values count:', result6[0].values.length); + if (Array.isArray(result6[0].id) && result6[0].id.length === 3) { + console.log('✅ Pass: 3-word multi-reference parsed correctly'); + } +} catch (e) { + console.log('❌ Fail:', e.message); +} +console.log(); + +console.log('=== Summary ===\n'); +console.log('Multi-reference feature implemented:'); +console.log('1. Grammar updated to allow multiple references before colon'); +console.log('2. ID field can now be string (single) or string[] (multi)'); +console.log('3. Context-aware recognition: defined multi-refs recognized in values'); +console.log('4. Backward compatible: single-word and quoted IDs still work'); diff --git a/experiments/test_multi_reference.py b/experiments/test_multi_reference.py new file mode 100644 index 00000000..2cfbb1e1 --- /dev/null +++ b/experiments/test_multi_reference.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +""" +Multi-Reference Feature Experiment (Issue #184) + +This script tests the concept of multi-references where +multiple space-separated words before a colon form a single reference. +""" + +import sys +sys.path.insert(0, 'python') + +from links_notation import Parser, format_links + +parser = Parser() + +print("=== Multi-Reference Feature Tests (Issue #184) - Python ===\n") + +# Test 1: Single-word ID (backward compatibility) +test1 = "papa: loves mama" +print("Test 1 - Single-word ID (backward compatible):") +print("Input:", test1) +try: + result1 = parser.parse(test1) + print("Parsed ID:", result1[0].id) + print("Values:", [v.id for v in result1[0].values]) + print("Formatted:", format_links(result1, True)) + if isinstance(result1[0].id, str): + print("✅ Pass: Single-word ID still works as string") + else: + print("⚠️ Warning: ID type changed") +except Exception as e: + print("❌ Fail:", e) +print() + +# Test 2: Quoted multi-word ID (backward compatibility) +test2 = "('some example': value)" +print("Test 2 - Quoted multi-word ID (backward compatible):") +print("Input:", test2) +try: + result2 = parser.parse(test2) + print("Parsed ID:", result2[0].id) + print("Formatted:", format_links(result2, True)) + if isinstance(result2[0].id, str) and result2[0].id == "some example": + print("✅ Pass: Quoted multi-word ID still works as string") + else: + print("⚠️ Warning: ID type changed") +except Exception as e: + print("❌ Fail:", e) +print() + +# Test 3: Unquoted multi-word ID (NEW FEATURE) +test3 = "(some example: some example is a link)" +print("Test 3 - Unquoted multi-word ID (NEW):") +print("Input:", test3) +try: + result3 = parser.parse(test3) + print("Parsed ID:", result3[0].id) + print("Values:", [v.id for v in result3[0].values]) + print("Formatted:", format_links(result3, True)) + if isinstance(result3[0].id, list) and result3[0].id == ["some", "example"]: + print("✅ Pass: Multi-reference ID parsed as list:", result3[0].id) + else: + print("⚠️ ID is not a list:", result3[0].id) +except Exception as e: + print("❌ Fail:", e) +print() + +# Test 4: Context-aware multi-reference recognition in values +test4 = "(some example: some example is a link)" +print("Test 4 - Context-aware multi-reference in values:") +print("Input:", test4) +try: + result4 = parser.parse(test4) + print("Values count:", len(result4[0].values)) + print("First value ID:", result4[0].values[0].id) + # Check if "some example" in values is recognized as a single multi-ref + if (isinstance(result4[0].values[0].id, list) and + result4[0].values[0].id == ["some", "example"]): + print("✅ Pass: 'some example' recognized as multi-reference in values") + else: + print("⚠️ Multi-reference not recognized:", result4[0].values[0].id) +except Exception as e: + print("❌ Fail:", e) +print() + +# Test 5: Three-word multi-reference +test5 = "(new york city: new york city is great)" +print("Test 5 - Three-word multi-reference:") +print("Input:", test5) +try: + result5 = parser.parse(test5) + print("Parsed ID:", result5[0].id) + print("Values count:", len(result5[0].values)) + if isinstance(result5[0].id, list) and len(result5[0].id) == 3: + print("✅ Pass: 3-word multi-reference parsed correctly") + else: + print("⚠️ Unexpected result") +except Exception as e: + print("❌ Fail:", e) +print() + +# Test 6: Indented syntax with multi-reference +test6 = """some example: + value1 + value2""" +print("Test 6 - Indented syntax with multi-reference:") +print("Input:", repr(test6)) +try: + result6 = parser.parse(test6) + print("Parsed ID:", result6[0].id) + print("Values count:", len(result6[0].values)) + if isinstance(result6[0].id, list) and result6[0].id == ["some", "example"]: + print("✅ Pass: Indented multi-reference works") + else: + print("⚠️ Unexpected result") +except Exception as e: + print("❌ Fail:", e) +print() + +print("=== Summary ===\n") +print("Multi-reference feature implemented in Python:") +print("1. Parser updated to support multi-word IDs before colon") +print("2. ID field can now be string (single) or list[str] (multi)") +print("3. Context-aware recognition: defined multi-refs recognized in values") +print("4. Backward compatible: single-word and quoted IDs still work") diff --git a/go/multi_ref_test.go b/go/multi_ref_test.go new file mode 100644 index 00000000..587f47c5 --- /dev/null +++ b/go/multi_ref_test.go @@ -0,0 +1,243 @@ +package lino + +import ( + "testing" +) + +// Multi-Reference Feature Tests (Issue #184) +// +// Tests for multi-word references without quotes: +// - (some example: some example is a link) +// - ID as multi-word string: "some example" + +func TestParsesTwoWordMultiReferenceID(t *testing.T) { + result, err := Parse("(some example: value)") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + // Multi-word ID should be joined with space + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "some example" { + t.Errorf("Expected ID 'some example', got '%s'", *result[0].ID) + } + if len(result[0].Values) != 1 { + t.Errorf("Expected 1 value, got %d", len(result[0].Values)) + } +} + +func TestParsesThreeWordMultiReferenceID(t *testing.T) { + result, err := Parse("(new york city: value)") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "new york city" { + t.Errorf("Expected ID 'new york city', got '%s'", *result[0].ID) + } +} + +func TestSingleWordIDBackwardCompatible(t *testing.T) { + result, err := Parse("(papa: value)") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "papa" { + t.Errorf("Expected ID 'papa', got '%s'", *result[0].ID) + } +} + +func TestQuotedMultiWordIDBackwardCompatible(t *testing.T) { + result, err := Parse("('some example': value)") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + // Quoted ID should be preserved as-is + if *result[0].ID != "some example" { + t.Errorf("Expected ID 'some example', got '%s'", *result[0].ID) + } +} + +func TestFormatMultiReferenceID(t *testing.T) { + result, err := Parse("(some example: value)") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + formatted := Format(result) + // Multi-reference IDs are formatted with quotes (normalized form) + expected := "('some example': value)" + if formatted != expected { + t.Errorf("Expected '%s', got '%s'", expected, formatted) + } +} + +func TestRoundTripMultiReference(t *testing.T) { + input := "(new york city: great)" + result, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + formatted := Format(result) + // Round-trip normalizes multi-word ID to quoted form + expected := "('new york city': great)" + if formatted != expected { + t.Errorf("Expected '%s', got '%s'", expected, formatted) + } +} + +func TestIndentedSyntaxMultiReference(t *testing.T) { + input := "some example:\n value1\n value2" + result, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "some example" { + t.Errorf("Expected ID 'some example', got '%s'", *result[0].ID) + } + if len(result[0].Values) != 2 { + t.Errorf("Expected 2 values, got %d", len(result[0].Values)) + } +} + +func TestValuesIncludeMultiReferenceContext(t *testing.T) { + // When the same multi-word pattern appears in values, + // they are parsed as separate words (no context-aware grouping) + input := "(some example: some example is a link)" + result, err := Parse(input) + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "some example" { + t.Errorf("Expected ID 'some example', got '%s'", *result[0].ID) + } + // Values should be separate: "some", "example", "is", "a", "link" + if len(result[0].Values) != 5 { + t.Errorf("Expected 5 values, got %d", len(result[0].Values)) + } +} + +func TestBackwardCompatibilitySingleLine(t *testing.T) { + result, err := Parse("papa: loves mama") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "papa" { + t.Errorf("Expected ID 'papa', got '%s'", *result[0].ID) + } + if len(result[0].Values) != 2 { + t.Errorf("Expected 2 values, got %d", len(result[0].Values)) + } +} + +func TestBackwardCompatibilityParenthesized(t *testing.T) { + result, err := Parse("(papa: loves mama)") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "papa" { + t.Errorf("Expected ID 'papa', got '%s'", *result[0].ID) + } + if len(result[0].Values) != 2 { + t.Errorf("Expected 2 values, got %d", len(result[0].Values)) + } +} + +func TestBackwardCompatibilityNested(t *testing.T) { + result, err := Parse("(outer: (inner: value))") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "outer" { + t.Errorf("Expected ID 'outer', got '%s'", *result[0].ID) + } + if len(result[0].Values) != 1 { + t.Errorf("Expected 1 value, got %d", len(result[0].Values)) + } + innerLink := result[0].Values[0] + if innerLink.ID == nil { + t.Fatal("Expected inner ID to be set") + } + if *innerLink.ID != "inner" { + t.Errorf("Expected inner ID 'inner', got '%s'", *innerLink.ID) + } +} + +func TestMultiRefWithMultipleValues(t *testing.T) { + result, err := Parse("(some example: one two three)") + if err != nil { + t.Fatalf("Parse failed: %v", err) + } + if len(result) != 1 { + t.Fatalf("Expected 1 link, got %d", len(result)) + } + if result[0].ID == nil { + t.Fatal("Expected ID to be set") + } + if *result[0].ID != "some example" { + t.Errorf("Expected ID 'some example', got '%s'", *result[0].ID) + } + if len(result[0].Values) != 3 { + t.Errorf("Expected 3 values, got %d", len(result[0].Values)) + } + expectedValues := []string{"one", "two", "three"} + for i, expectedValue := range expectedValues { + if result[0].Values[i].ID == nil { + t.Errorf("Expected value %d to have ID", i) + continue + } + if *result[0].Values[i].ID != expectedValue { + t.Errorf("Expected value %d to be '%s', got '%s'", i, expectedValue, *result[0].Values[i].ID) + } + } +} diff --git a/java/src/test/java/io/github/linkfoundation/linksnotation/MultiRefTest.java b/java/src/test/java/io/github/linkfoundation/linksnotation/MultiRefTest.java new file mode 100644 index 00000000..cf106dcb --- /dev/null +++ b/java/src/test/java/io/github/linkfoundation/linksnotation/MultiRefTest.java @@ -0,0 +1,136 @@ +package io.github.linkfoundation.linksnotation; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; +import org.junit.jupiter.api.Test; + +/** + * Multi-Reference Feature Tests (Issue #184) + * + * Tests for multi-word references without quotes: + * + * + * (some example: some example is a link) + * ID as multi-word string: "some example" + * + */ +public class MultiRefTest { + + @Test + public void testParsesTwoWordMultiReferenceId() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("(some example: value)"); + assertEquals(1, result.size()); + // Multi-word ID should be joined with space + assertEquals("some example", result.get(0).getId()); + assertEquals(1, result.get(0).getValues().size()); + } + + @Test + public void testParsesThreeWordMultiReferenceId() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("(new york city: value)"); + assertEquals(1, result.size()); + assertEquals("new york city", result.get(0).getId()); + } + + @Test + public void testSingleWordIdBackwardCompatible() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("(papa: value)"); + assertEquals(1, result.size()); + assertEquals("papa", result.get(0).getId()); + } + + @Test + public void testQuotedMultiWordIdBackwardCompatible() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("('some example': value)"); + assertEquals(1, result.size()); + // Quoted ID should be preserved as-is + assertEquals("some example", result.get(0).getId()); + } + + @Test + public void testFormatMultiReferenceId() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("(some example: value)"); + String formatted = Link.formatLinks(result); + // Multi-reference IDs are formatted with quotes (normalized form) + assertEquals("('some example': value)", formatted); + } + + @Test + public void testRoundTripMultiReference() throws ParseException { + Parser parser = new Parser(); + String input = "(new york city: great)"; + List result = parser.parse(input); + String formatted = Link.formatLinks(result); + // Round-trip normalizes multi-word ID to quoted form + assertEquals("('new york city': great)", formatted); + } + + @Test + public void testIndentedSyntaxMultiReference() throws ParseException { + Parser parser = new Parser(); + String input = "some example:\n value1\n value2"; + List result = parser.parse(input); + assertEquals(1, result.size()); + assertEquals("some example", result.get(0).getId()); + assertEquals(2, result.get(0).getValues().size()); + } + + @Test + public void testValuesIncludeMultiReferenceContext() throws ParseException { + // When the same multi-word pattern appears in values, + // they are parsed as separate words (no context-aware grouping) + Parser parser = new Parser(); + String input = "(some example: some example is a link)"; + List result = parser.parse(input); + assertEquals(1, result.size()); + assertEquals("some example", result.get(0).getId()); + // Values should be separate: "some", "example", "is", "a", "link" + assertEquals(5, result.get(0).getValues().size()); + } + + @Test + public void testBackwardCompatibilitySingleLine() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("papa: loves mama"); + assertEquals(1, result.size()); + assertEquals("papa", result.get(0).getId()); + assertEquals(2, result.get(0).getValues().size()); + } + + @Test + public void testBackwardCompatibilityParenthesized() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("(papa: loves mama)"); + assertEquals(1, result.size()); + assertEquals("papa", result.get(0).getId()); + assertEquals(2, result.get(0).getValues().size()); + } + + @Test + public void testBackwardCompatibilityNested() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("(outer: (inner: value))"); + assertEquals(1, result.size()); + assertEquals("outer", result.get(0).getId()); + assertEquals(1, result.get(0).getValues().size()); + assertEquals("inner", result.get(0).getValues().get(0).getId()); + } + + @Test + public void testMultiRefWithMultipleValues() throws ParseException { + Parser parser = new Parser(); + List result = parser.parse("(some example: one two three)"); + assertEquals(1, result.size()); + assertEquals("some example", result.get(0).getId()); + assertEquals(3, result.get(0).getValues().size()); + assertEquals("one", result.get(0).getValues().get(0).getId()); + assertEquals("two", result.get(0).getValues().get(1).getId()); + assertEquals("three", result.get(0).getValues().get(2).getId()); + } +} diff --git a/js/src/Link.js b/js/src/Link.js index 7f4421be..5f9dbfe7 100644 --- a/js/src/Link.js +++ b/js/src/Link.js @@ -1,12 +1,19 @@ export class Link { /** * Create a new Link - * @param {string|null} id - Optional identifier for the link + * @param {string|string[]|null} id - Optional identifier for the link (string for single ref, array for multi-ref) * @param {Link[]|null} values - Optional array of nested links * @throws {TypeError} If values is not an array or null */ constructor(id = null, values = null) { - this.id = id; + // Store ids as an array internally (primary storage) + if (id === null || id === undefined) { + this._ids = null; + } else if (Array.isArray(id)) { + this._ids = id; + } else { + this._ids = [id]; + } // Validate that values is an array if provided if (values !== null && values !== undefined) { @@ -19,6 +26,53 @@ export class Link { } } + /** + * Get the ids array (primary storage for reference identifiers) + * @returns {string[]|null} Array of reference strings, or null if no id + */ + get ids() { + return this._ids; + } + + /** + * Set the ids array + * @param {string[]|null} value - Array of reference strings, or null + */ + set ids(value) { + this._ids = value; + } + + /** + * Get the id as a single string (backward compatibility) + * @throws {Error} If ids has more than one element (use ids property instead) + * @returns {string|null} Single reference string, or null if no id + */ + get id() { + if (this._ids === null) { + return null; + } + if (this._ids.length > 1) { + throw new Error( + `This link has a multi-reference id with ${this._ids.length} parts. Use the 'ids' property instead of 'id'.` + ); + } + return this._ids[0]; + } + + /** + * Set the id (backward compatibility) + * @param {string|string[]|null} value - Single reference string, array, or null + */ + set id(value) { + if (value === null || value === undefined) { + this._ids = null; + } else if (Array.isArray(value)) { + this._ids = value; + } else { + this._ids = [value]; + } + } + /** * Convert link to string representation * @returns {string} String representation of the link @@ -51,7 +105,7 @@ export class Link { // Check if value has simplify method (defensive programming) return v && typeof v.simplify === 'function' ? v.simplify() : v; }); - return new Link(this.id, newValues); + return new Link(this._ids, newValues); } } @@ -77,52 +131,65 @@ export class Link { } /** - * Escape a reference string by adding quotes if necessary - * @param {string} reference - The reference to escape + * Escape a reference string or multi-reference array by adding quotes if necessary + * @param {string|string[]} reference - The reference to escape (string or array of strings for multi-ref) * @returns {string} Escaped reference */ static escapeReference(reference) { - if (!reference || reference.trim() === '') { + // Handle multi-reference (array of strings) + if (Array.isArray(reference)) { + // Multi-reference: join with space, each part should be a simple reference + // For output, we can either keep as space-separated or quote if needed + return reference.map((r) => Link.escapeReference(r)).join(' '); + } + + if ( + !reference || + (typeof reference === 'string' && reference.trim() === '') + ) { return ''; } - const hasSingleQuote = reference.includes("'"); - const hasDoubleQuote = reference.includes('"'); + // Ensure reference is a string + const refStr = String(reference); + + const hasSingleQuote = refStr.includes("'"); + const hasDoubleQuote = refStr.includes('"'); const needsQuoting = - reference.includes(':') || - reference.includes('(') || - reference.includes(')') || - reference.includes(' ') || - reference.includes('\t') || - reference.includes('\n') || - reference.includes('\r') || + refStr.includes(':') || + refStr.includes('(') || + refStr.includes(')') || + refStr.includes(' ') || + refStr.includes('\t') || + refStr.includes('\n') || + refStr.includes('\r') || hasDoubleQuote || hasSingleQuote; // Handle edge case: reference contains both single and double quotes if (hasSingleQuote && hasDoubleQuote) { // Escape single quotes and wrap in single quotes - return `'${reference.replace(/'/g, "\\'")}'`; + return `'${refStr.replace(/'/g, "\\'")}'`; } // Prefer single quotes if double quotes are present if (hasDoubleQuote) { - return `'${reference}'`; + return `'${refStr}'`; } // Use double quotes if single quotes are present if (hasSingleQuote) { - return `"${reference}"`; + return `"${refStr}"`; } // Use single quotes for special characters if (needsQuoting) { - return `'${reference}'`; + return `'${refStr}'`; } // No quoting needed - return reference; + return refStr; } /** @@ -131,7 +198,7 @@ export class Link { */ toLinkOrIdString() { if (!this.values || this.values.length === 0) { - return this.id === null ? '' : Link.escapeReference(this.id); + return this._ids === null ? '' : Link.escapeReference(this._ids); } return this.toString(); } @@ -143,7 +210,16 @@ export class Link { */ equals(other) { if (!(other instanceof Link)) return false; - if (this.id !== other.id) return false; + + // Compare ids arrays + if (this._ids === null && other._ids !== null) return false; + if (this._ids !== null && other._ids === null) return false; + if (this._ids !== null && other._ids !== null) { + if (this._ids.length !== other._ids.length) return false; + for (let i = 0; i < this._ids.length; i++) { + if (this._ids[i] !== other._ids[i]) return false; + } + } // Handle null/undefined values arrays const thisValues = this.values || []; @@ -187,18 +263,18 @@ export class Link { // Original implementation for backward compatibility // Empty link - if (this.id === null && (!this.values || this.values.length === 0)) { + if (this._ids === null && (!this.values || this.values.length === 0)) { return lessParentheses ? '' : '()'; } // Link with only ID, no values if (!this.values || this.values.length === 0) { - const escapedId = Link.escapeReference(this.id); + const escapedId = Link.escapeReference(this._ids); // When used as a value in a compound link (created from combining links), wrap in parentheses if (isCompoundValue) { return `(${escapedId})`; } - return lessParentheses && !this.needsParentheses(this.id) + return lessParentheses && !this.needsParentheses(this._ids) ? escapedId : `(${escapedId})`; } @@ -207,7 +283,7 @@ export class Link { const valuesStr = this.values.map((v) => this.formatValue(v)).join(' '); // Link with values only (null id) - if (this.id === null) { + if (this._ids === null) { // For lessParentheses mode with simple values, don't wrap the whole thing if (lessParentheses) { // Check if all values are simple (no nested values) @@ -217,7 +293,7 @@ export class Link { if (allSimple) { // Format each value without extra wrapping const simpleValuesStr = this.values - .map((v) => Link.escapeReference(v.id)) + .map((v) => Link.escapeReference(v._ids)) .join(' '); return simpleValuesStr; } @@ -230,9 +306,9 @@ export class Link { } // Link with ID and values - const idStr = Link.escapeReference(this.id); + const idStr = Link.escapeReference(this._ids); const withColon = `${idStr}: ${valuesStr}`; - return lessParentheses && !this.needsParentheses(this.id) + return lessParentheses && !this.needsParentheses(this._ids) ? withColon : `(${withColon})`; } @@ -244,7 +320,7 @@ export class Link { */ formatValue(value) { if (!value || !value.format) { - return Link.escapeReference((value && value.id) || ''); + return Link.escapeReference((value && value._ids) || ''); } // Check if we're in a compound link that was created from path combinations @@ -258,7 +334,7 @@ export class Link { // Simple link with just an ID - don't wrap in parentheses when used as a value if (!value.values || value.values.length === 0) { - return Link.escapeReference(value.id); + return Link.escapeReference(value._ids); } // Complex value with its own structure - format it normally with parentheses @@ -266,11 +342,15 @@ export class Link { } /** - * Check if a string needs to be wrapped in parentheses - * @param {string} str - The string to check + * Check if a string or array needs to be wrapped in parentheses + * @param {string|string[]} str - The string or array to check * @returns {boolean} True if parentheses are needed */ needsParentheses(str) { + // Multi-reference arrays always need parentheses when formatted inline + if (Array.isArray(str)) { + return str.length > 1; + } return ( str && (str.includes(' ') || @@ -288,17 +368,17 @@ export class Link { */ _formatWithOptions(options, isCompoundValue = false) { // Empty link - if (this.id === null && (!this.values || this.values.length === 0)) { + if (this._ids === null && (!this.values || this.values.length === 0)) { return options.lessParentheses ? '' : '()'; } // Link with only ID, no values if (!this.values || this.values.length === 0) { - const escapedId = Link.escapeReference(this.id); + const escapedId = Link.escapeReference(this._ids); if (isCompoundValue) { return `(${escapedId})`; } - return options.lessParentheses && !this.needsParentheses(this.id) + return options.lessParentheses && !this.needsParentheses(this._ids) ? escapedId : `(${escapedId})`; } @@ -311,8 +391,8 @@ export class Link { // Try inline format first const valuesStr = this.values.map((v) => this.formatValue(v)).join(' '); let testLine; - if (this.id !== null) { - const idStr = Link.escapeReference(this.id); + if (this._ids !== null) { + const idStr = Link.escapeReference(this._ids); testLine = options.lessParentheses ? `${idStr}: ${valuesStr}` : `(${idStr}: ${valuesStr})`; @@ -334,13 +414,13 @@ export class Link { const valuesStr = this.values.map((v) => this.formatValue(v)).join(' '); // Link with values only (null id) - if (this.id === null) { + if (this._ids === null) { if (options.lessParentheses) { const allSimple = this.values.every( (v) => !v.values || v.values.length === 0 ); if (allSimple) { - return this.values.map((v) => Link.escapeReference(v.id)).join(' '); + return this.values.map((v) => Link.escapeReference(v._ids)).join(' '); } return valuesStr; } @@ -348,9 +428,9 @@ export class Link { } // Link with ID and values - const idStr = Link.escapeReference(this.id); + const idStr = Link.escapeReference(this._ids); const withColon = `${idStr}: ${valuesStr}`; - return options.lessParentheses && !this.needsParentheses(this.id) + return options.lessParentheses && !this.needsParentheses(this._ids) ? withColon : `(${withColon})`; } @@ -361,7 +441,7 @@ export class Link { * @returns {string} Indented formatted string */ _formatIndented(options) { - if (this.id === null) { + if (this._ids === null) { // Values only - format each on separate line const lines = this.values.map( (v) => options.indentString + this.formatValue(v) @@ -370,7 +450,7 @@ export class Link { } // Link with ID - format as id:\n value1\n value2 - const idStr = Link.escapeReference(this.id); + const idStr = Link.escapeReference(this._ids); const lines = [`${idStr}:`]; for (const v of this.values) { lines.push(options.indentString + this.formatValue(v)); @@ -392,11 +472,19 @@ function _groupConsecutiveLinks(links) { const grouped = []; let i = 0; + // Helper to compare ids arrays + const idsEqual = (ids1, ids2) => { + if (ids1 === null && ids2 === null) return true; + if (ids1 === null || ids2 === null) return false; + if (ids1.length !== ids2.length) return false; + return ids1.every((id, idx) => id === ids2[idx]); + }; + while (i < links.length) { const current = links[i]; // Look ahead for consecutive links with same ID - if (current.id !== null && current.values && current.values.length > 0) { + if (current._ids !== null && current.values && current.values.length > 0) { // Collect all values with same ID const sameIdValues = [...current.values]; let j = i + 1; @@ -404,7 +492,7 @@ function _groupConsecutiveLinks(links) { while (j < links.length) { const nextLink = links[j]; if ( - nextLink.id === current.id && + idsEqual(nextLink._ids, current._ids) && nextLink.values && nextLink.values.length > 0 ) { @@ -417,7 +505,7 @@ function _groupConsecutiveLinks(links) { // If we found consecutive links, create grouped link if (j > i + 1) { - const groupedLink = new Link(current.id, sameIdValues); + const groupedLink = new Link(current._ids, sameIdValues); grouped.push(groupedLink); i = j; continue; diff --git a/js/src/grammar.pegjs b/js/src/grammar.pegjs index 40691c6a..621c26bb 100644 --- a/js/src/grammar.pegjs +++ b/js/src/grammar.pegjs @@ -122,15 +122,28 @@ singleLineValueAndWhitespace = __ value:referenceOrLink { return value; } singleLineValues = list:singleLineValueAndWhitespace+ { return list; } -singleLineLink = __ id:reference __ ":" v:singleLineValues { return { id: id, values: v }; } +// Multi-reference support: multiple space-separated references form a single multi-reference ID +// Example: "some example: some example is a link" -> id: ["some", "example"], values: [...] +singleLineLink = __ id:multiRefId __ ":" v:singleLineValues { return { id: id, values: v, isMultiRef: Array.isArray(id) && id.length > 1 }; } -multiLineLink = "(" _ id:reference _ ":" v:multiLineValues _ ")" { return { id: id, values: v }; } +multiLineLink = "(" _ id:multiRefId _ ":" v:multiLineValues _ ")" { return { id: id, values: v, isMultiRef: Array.isArray(id) && id.length > 1 }; } + +// Multi-reference ID: one or more references before the colon +// Returns array of strings for multi-word, or single string for backward compatibility +multiRefId = refs:multiRefIdParts { + if (refs.length === 1) { + return refs[0]; // Single reference: return as string for backward compatibility + } + return refs; // Multiple references: return as array +} + +multiRefIdParts = first:reference rest:(__ !(":" / eol / ")") r:reference { return r; })* { return [first].concat(rest); } singleLineValueLink = v:singleLineValues { return { values: v }; } multiLineValueLink = "(" v:multiLineValues _ ")" { return { values: v }; } -indentedIdLink = id:reference __ ":" eol { return { id: id, values: [] }; } +indentedIdLink = id:multiRefId __ ":" eol { return { id: id, values: [], isMultiRef: Array.isArray(id) && id.length > 1 }; } // Reference can be quoted (with any number of quotes N >= 1) or simple unquoted // Universal approach: use procedural parsing for all quote types and counts diff --git a/js/src/parser-generated.js b/js/src/parser-generated.js index b9030a9c..c0d47611 100644 --- a/js/src/parser-generated.js +++ b/js/src/parser-generated.js @@ -212,13 +212,21 @@ function peg$parse(input, options) { function peg$f17(list) { return list; } function peg$f18(value) { return value; } function peg$f19(list) { return list; } - function peg$f20(id, v) { return { id: id, values: v }; } - function peg$f21(id, v) { return { id: id, values: v }; } - function peg$f22(v) { return { values: v }; } - function peg$f23(v) { return { values: v }; } - function peg$f24(id) { return { id: id, values: [] }; } - function peg$f25(chars) { return chars.join(''); } - function peg$f26() { + function peg$f20(id, v) { return { id: id, values: v, isMultiRef: Array.isArray(id) && id.length > 1 }; } + function peg$f21(id, v) { return { id: id, values: v, isMultiRef: Array.isArray(id) && id.length > 1 }; } + function peg$f22(refs) { + if (refs.length === 1) { + return refs[0]; // Single reference: return as string for backward compatibility + } + return refs; // Multiple references: return as array + } + function peg$f23(first, r) { return r; } + function peg$f24(first, rest) { return [first].concat(rest); } + function peg$f25(v) { return { values: v }; } + function peg$f26(v) { return { values: v }; } + function peg$f27(id) { return { id: id, values: [], isMultiRef: Array.isArray(id) && id.length > 1 }; } + function peg$f28(chars) { return chars.join(''); } + function peg$f29() { const pos = offset(); const result = parseQuotedStringAt(input, pos, '"'); if (result) { @@ -228,11 +236,11 @@ function peg$parse(input, options) { } return false; } - function peg$f27(chars) { return parsedValue; } - function peg$f28(c, cs) { return [c].concat(cs).join(''); } - function peg$f29() { return parsedLength > 1 && (parsedLength--, true); } - function peg$f30(c) { return c; } - function peg$f31() { + function peg$f30(chars) { return parsedValue; } + function peg$f31(c, cs) { return [c].concat(cs).join(''); } + function peg$f32() { return parsedLength > 1 && (parsedLength--, true); } + function peg$f33(c) { return c; } + function peg$f34() { const pos = offset(); const result = parseQuotedStringAt(input, pos, "'"); if (result) { @@ -242,11 +250,11 @@ function peg$parse(input, options) { } return false; } - function peg$f32(chars) { return parsedValue; } - function peg$f33(c, cs) { return [c].concat(cs).join(''); } - function peg$f34() { return parsedLength > 1 && (parsedLength--, true); } - function peg$f35(c) { return c; } - function peg$f36() { + function peg$f35(chars) { return parsedValue; } + function peg$f36(c, cs) { return [c].concat(cs).join(''); } + function peg$f37() { return parsedLength > 1 && (parsedLength--, true); } + function peg$f38(c) { return c; } + function peg$f39() { const pos = offset(); const result = parseQuotedStringAt(input, pos, '`'); if (result) { @@ -256,14 +264,14 @@ function peg$parse(input, options) { } return false; } - function peg$f37(chars) { return parsedValue; } - function peg$f38(c, cs) { return [c].concat(cs).join(''); } - function peg$f39() { return parsedLength > 1 && (parsedLength--, true); } - function peg$f40(c) { return c; } - function peg$f41(spaces) { setBaseIndentation(spaces); } - function peg$f42(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } - function peg$f43(spaces) { pushIndentation(spaces); } - function peg$f44(spaces) { return checkIndentation(spaces); } + function peg$f40(chars) { return parsedValue; } + function peg$f41(c, cs) { return [c].concat(cs).join(''); } + function peg$f42() { return parsedLength > 1 && (parsedLength--, true); } + function peg$f43(c) { return c; } + function peg$f44(spaces) { setBaseIndentation(spaces); } + function peg$f45(spaces) { return normalizeIndentation(spaces) > getCurrentIndentation(); } + function peg$f46(spaces) { pushIndentation(spaces); } + function peg$f47(spaces) { return checkIndentation(spaces); } let peg$currPos = options.peg$currPos | 0; let peg$savedPos = peg$currPos; const peg$posDetailsCache = [{ line: 1, column: 1 }]; @@ -861,7 +869,7 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = peg$parse__(); - s2 = peg$parsereference(); + s2 = peg$parsemultiRefId(); if (s2 !== peg$FAILED) { s3 = peg$parse__(); if (input.charCodeAt(peg$currPos) === 58) { @@ -905,7 +913,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { s2 = peg$parse_(); - s3 = peg$parsereference(); + s3 = peg$parsemultiRefId(); if (s3 !== peg$FAILED) { s4 = peg$parse_(); if (input.charCodeAt(peg$currPos) === 58) { @@ -948,6 +956,126 @@ function peg$parse(input, options) { return s0; } + function peg$parsemultiRefId() { + let s0, s1; + + s0 = peg$currPos; + s1 = peg$parsemultiRefIdParts(); + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f22(s1); + } + s0 = s1; + + return s0; + } + + function peg$parsemultiRefIdParts() { + let s0, s1, s2, s3, s4, s5, s6; + + s0 = peg$currPos; + s1 = peg$parsereference(); + if (s1 !== peg$FAILED) { + s2 = []; + s3 = peg$currPos; + s4 = peg$parse__(); + s5 = peg$currPos; + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 58) { + s6 = peg$c0; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e2); } + } + if (s6 === peg$FAILED) { + s6 = peg$parseeol(); + if (s6 === peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 41) { + s6 = peg$c2; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e4); } + } + } + } + peg$silentFails--; + if (s6 === peg$FAILED) { + s5 = undefined; + } else { + peg$currPos = s5; + s5 = peg$FAILED; + } + if (s5 !== peg$FAILED) { + s6 = peg$parsereference(); + if (s6 !== peg$FAILED) { + peg$savedPos = s3; + s3 = peg$f23(s1, s6); + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + while (s3 !== peg$FAILED) { + s2.push(s3); + s3 = peg$currPos; + s4 = peg$parse__(); + s5 = peg$currPos; + peg$silentFails++; + if (input.charCodeAt(peg$currPos) === 58) { + s6 = peg$c0; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e2); } + } + if (s6 === peg$FAILED) { + s6 = peg$parseeol(); + if (s6 === peg$FAILED) { + if (input.charCodeAt(peg$currPos) === 41) { + s6 = peg$c2; + peg$currPos++; + } else { + s6 = peg$FAILED; + if (peg$silentFails === 0) { peg$fail(peg$e4); } + } + } + } + peg$silentFails--; + if (s6 === peg$FAILED) { + s5 = undefined; + } else { + peg$currPos = s5; + s5 = peg$FAILED; + } + if (s5 !== peg$FAILED) { + s6 = peg$parsereference(); + if (s6 !== peg$FAILED) { + peg$savedPos = s3; + s3 = peg$f23(s1, s6); + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + } else { + peg$currPos = s3; + s3 = peg$FAILED; + } + } + peg$savedPos = s0; + s0 = peg$f24(s1, s2); + } else { + peg$currPos = s0; + s0 = peg$FAILED; + } + + return s0; + } + function peg$parsesingleLineValueLink() { let s0, s1; @@ -955,7 +1083,7 @@ function peg$parse(input, options) { s1 = peg$parsesingleLineValues(); if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f22(s1); + s1 = peg$f25(s1); } s0 = s1; @@ -985,7 +1113,7 @@ function peg$parse(input, options) { } if (s4 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f23(s2); + s0 = peg$f26(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1002,7 +1130,7 @@ function peg$parse(input, options) { let s0, s1, s2, s3, s4; s0 = peg$currPos; - s1 = peg$parsereference(); + s1 = peg$parsemultiRefId(); if (s1 !== peg$FAILED) { s2 = peg$parse__(); if (input.charCodeAt(peg$currPos) === 58) { @@ -1016,7 +1144,7 @@ function peg$parse(input, options) { s4 = peg$parseeol(); if (s4 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f24(s1); + s0 = peg$f27(s1); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1060,7 +1188,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f25(s1); + s1 = peg$f28(s1); } s0 = s1; @@ -1103,7 +1231,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = peg$currPos; - s2 = peg$f26(); + s2 = peg$f29(); if (s2) { s2 = undefined; } else { @@ -1113,7 +1241,7 @@ function peg$parse(input, options) { s3 = peg$parseconsumeDouble(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f27(s3); + s0 = peg$f30(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1149,7 +1277,7 @@ function peg$parse(input, options) { s3 = peg$parseconsumeDoubleMore(); } peg$savedPos = s0; - s0 = peg$f28(s1, s2); + s0 = peg$f31(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1163,7 +1291,7 @@ function peg$parse(input, options) { s0 = peg$currPos; peg$savedPos = peg$currPos; - s1 = peg$f29(); + s1 = peg$f32(); if (s1) { s1 = undefined; } else { @@ -1179,7 +1307,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f30(s2); + s0 = peg$f33(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1214,7 +1342,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = peg$currPos; - s2 = peg$f31(); + s2 = peg$f34(); if (s2) { s2 = undefined; } else { @@ -1224,7 +1352,7 @@ function peg$parse(input, options) { s3 = peg$parseconsumeSingle(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f32(s3); + s0 = peg$f35(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1260,7 +1388,7 @@ function peg$parse(input, options) { s3 = peg$parseconsumeSingleMore(); } peg$savedPos = s0; - s0 = peg$f33(s1, s2); + s0 = peg$f36(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1274,7 +1402,7 @@ function peg$parse(input, options) { s0 = peg$currPos; peg$savedPos = peg$currPos; - s1 = peg$f34(); + s1 = peg$f37(); if (s1) { s1 = undefined; } else { @@ -1290,7 +1418,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f35(s2); + s0 = peg$f38(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1325,7 +1453,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = peg$currPos; - s2 = peg$f36(); + s2 = peg$f39(); if (s2) { s2 = undefined; } else { @@ -1335,7 +1463,7 @@ function peg$parse(input, options) { s3 = peg$parseconsumeBacktick(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f37(s3); + s0 = peg$f40(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1371,7 +1499,7 @@ function peg$parse(input, options) { s3 = peg$parseconsumeBacktickMore(); } peg$savedPos = s0; - s0 = peg$f38(s1, s2); + s0 = peg$f41(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1385,7 +1513,7 @@ function peg$parse(input, options) { s0 = peg$currPos; peg$savedPos = peg$currPos; - s1 = peg$f39(); + s1 = peg$f42(); if (s1) { s1 = undefined; } else { @@ -1401,7 +1529,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f40(s2); + s0 = peg$f43(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1437,7 +1565,7 @@ function peg$parse(input, options) { } } peg$savedPos = s0; - s1 = peg$f41(s1); + s1 = peg$f44(s1); s0 = s1; return s0; @@ -1466,7 +1594,7 @@ function peg$parse(input, options) { } } peg$savedPos = peg$currPos; - s2 = peg$f42(s1); + s2 = peg$f45(s1); if (s2) { s2 = undefined; } else { @@ -1474,7 +1602,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f43(s1); + s0 = peg$f46(s1); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1506,7 +1634,7 @@ function peg$parse(input, options) { } } peg$savedPos = peg$currPos; - s2 = peg$f44(s1); + s2 = peg$f47(s1); if (s2) { s2 = undefined; } else { diff --git a/js/tests/MultiRefTests.test.js b/js/tests/MultiRefTests.test.js new file mode 100644 index 00000000..32a1d8c0 --- /dev/null +++ b/js/tests/MultiRefTests.test.js @@ -0,0 +1,180 @@ +import { describe, test, expect } from 'bun:test'; +import { Parser, Link, formatLinks } from '../src/index.js'; + +/** + * Multi-Reference Feature Tests (Issue #184) + * + * Tests for multi-word references without quotes: + * - (some example: some example is a link) + * - IDs as array: ["some", "example"] + * - id property throws for multi-refs, use ids instead + */ + +describe('Multi-Reference Parsing', () => { + const parser = new Parser(); + + describe('Basic multi-reference ID parsing', () => { + test('parses two-word multi-reference ID', () => { + const result = parser.parse('(some example: value)'); + expect(result.length).toBe(1); + // Use ids property for multi-references + expect(Array.isArray(result[0].ids)).toBe(true); + expect(result[0].ids).toEqual(['some', 'example']); + expect(result[0].values.length).toBe(1); + expect(result[0].values[0].id).toBe('value'); + }); + + test('parses three-word multi-reference ID', () => { + const result = parser.parse('(new york city: value)'); + expect(result.length).toBe(1); + expect(result[0].ids).toEqual(['new', 'york', 'city']); + }); + + test('parses four-word multi-reference ID', () => { + const result = parser.parse('(a b c d: value)'); + expect(result.length).toBe(1); + expect(result[0].ids).toEqual(['a', 'b', 'c', 'd']); + }); + + test('single-word ID still accessible via id property (backward compatibility)', () => { + const result = parser.parse('(papa: value)'); + expect(result.length).toBe(1); + // Single-word: id returns string, ids returns array with single element + expect(typeof result[0].id).toBe('string'); + expect(result[0].id).toBe('papa'); + expect(result[0].ids).toEqual(['papa']); + }); + + test('quoted multi-word ID remains string (backward compatibility)', () => { + const result = parser.parse("('some example': value)"); + expect(result.length).toBe(1); + // Quoted multi-word is a single reference, so id works + expect(typeof result[0].id).toBe('string'); + expect(result[0].id).toBe('some example'); + expect(result[0].ids).toEqual(['some example']); + }); + + test('id property throws for multi-reference IDs', () => { + const result = parser.parse('(some example: value)'); + expect(() => result[0].id).toThrow( + /Use the 'ids' property instead of 'id'/ + ); + }); + }); + + describe('Multi-reference values are NOT context-aware', () => { + // Per issue #184 feedback: context-aware parsing is out of scope + test('values are parsed as separate references', () => { + const result = parser.parse('(some example: some example is a link)'); + expect(result[0].ids).toEqual(['some', 'example']); + // Values should be 5 separate references (no context-aware grouping) + expect(result[0].values.length).toBe(5); + expect(result[0].values[0].id).toBe('some'); + expect(result[0].values[1].id).toBe('example'); + expect(result[0].values[2].id).toBe('is'); + expect(result[0].values[3].id).toBe('a'); + expect(result[0].values[4].id).toBe('link'); + }); + + test('three-word multi-reference values are separate', () => { + const result = parser.parse('(new york city: new york city is great)'); + expect(result[0].ids).toEqual(['new', 'york', 'city']); + // Values should be 5 separate references + expect(result[0].values.length).toBe(5); + expect(result[0].values[0].id).toBe('new'); + expect(result[0].values[1].id).toBe('york'); + expect(result[0].values[2].id).toBe('city'); + expect(result[0].values[3].id).toBe('is'); + expect(result[0].values[4].id).toBe('great'); + }); + }); + + describe('Multi-reference formatting', () => { + test('formats multi-reference ID without quotes', () => { + const result = parser.parse('(some example: value)'); + const formatted = formatLinks(result, true); + // Multi-reference IDs need parentheses since they contain space-separated words + expect(formatted).toBe('(some example: value)'); + }); + + test('round-trip: parse then format preserves structure', () => { + const input = '(new york city: one two three)'; + const result = parser.parse(input); + const formatted = formatLinks(result, true); + expect(formatted).toBe('(new york city: one two three)'); + }); + }); + + describe('Multi-reference with indented syntax', () => { + test('parses indented multi-reference ID', () => { + const input = `some example: + value1 + value2`; + const result = parser.parse(input); + expect(result.length).toBe(1); + expect(result[0].ids).toEqual(['some', 'example']); + expect(result[0].values.length).toBe(2); + }); + }); + + describe('Edge cases', () => { + test('handles multi-reference with special characters in quoted parts', () => { + // Mixed: unquoted multi-ref ID, quoted value with special chars + const result = parser.parse("(some example: 'value:special')"); + expect(result[0].ids).toEqual(['some', 'example']); + expect(result[0].values[0].id).toBe('value:special'); + }); + + test('handles empty values with multi-reference ID', () => { + const result = parser.parse('(some example:)'); + expect(result[0].ids).toEqual(['some', 'example']); + expect(result[0].values.length).toBe(0); + }); + + test('multiple links with same multi-reference definition', () => { + const input = `(some example: first) +(some example: second)`; + const result = parser.parse(input); + expect(result.length).toBe(2); + expect(result[0].ids).toEqual(['some', 'example']); + expect(result[1].ids).toEqual(['some', 'example']); + }); + }); +}); + +describe('Backward Compatibility', () => { + const parser = new Parser(); + + test('existing single-line syntax still works', () => { + const result = parser.parse('papa: loves mama'); + expect(result[0].id).toBe('papa'); + expect(result[0].values[0].id).toBe('loves'); + expect(result[0].values[1].id).toBe('mama'); + }); + + test('existing parenthesized syntax still works', () => { + const result = parser.parse('(papa: loves mama)'); + expect(result[0].id).toBe('papa'); + expect(result[0].values[0].id).toBe('loves'); + expect(result[0].values[1].id).toBe('mama'); + }); + + test('existing quoted ID syntax still works', () => { + const result = parser.parse("('multi word id': value)"); + expect(result[0].id).toBe('multi word id'); + expect(result[0].values[0].id).toBe('value'); + }); + + test('existing nested links still work', () => { + const result = parser.parse('(outer: (inner: value))'); + expect(result[0].id).toBe('outer'); + expect(result[0].values[0].id).toBe('inner'); + expect(result[0].values[0].values[0].id).toBe('value'); + }); + + test('existing value-only links still work', () => { + const result = parser.parse('(a b c)'); + expect(result[0].ids).toBe(null); + expect(result[0].values.length).toBe(3); + }); +}); diff --git a/python/links_notation/link.py b/python/links_notation/link.py index 0bf1e36a..09df1d26 100644 --- a/python/links_notation/link.py +++ b/python/links_notation/link.py @@ -16,34 +16,92 @@ class Link: - A simple reference (id only, no values) - A link with id and values - A link with only values (no id) + + For multi-reference IDs (e.g., "some example" before colon), use the `ids` property. + The `id` property will throw an error for multi-reference IDs. """ - def __init__(self, link_id: Optional[str] = None, values: Optional[List["Link"]] = None): + def __init__(self, link_id: Optional[Union[str, List[str]]] = None, values: Optional[List["Link"]] = None): """ Initialize a Link. Args: - link_id: Optional identifier for the link + link_id: Optional identifier for the link (string or list of strings for multi-reference) values: Optional list of child links """ - self.id = link_id + # Store ids as a list internally (primary storage) + if link_id is None: + self._ids: Optional[List[str]] = None + elif isinstance(link_id, list): + self._ids = link_id + else: + self._ids = [link_id] + self.values = values if values is not None else [] self._is_from_path_combination = False + @property + def ids(self) -> Optional[List[str]]: + """Get the ids list (primary storage for reference identifiers).""" + return self._ids + + @ids.setter + def ids(self, value: Optional[List[str]]) -> None: + """Set the ids list.""" + self._ids = value + + @property + def id(self) -> Optional[str]: + """ + Get the id as a single string (backward compatibility). + + Raises: + ValueError: If ids has more than one element (use ids property instead) + + Returns: + Single reference string, or None if no id + """ + if self._ids is None: + return None + if len(self._ids) > 1: + raise ValueError( + f"This link has a multi-reference id with {len(self._ids)} parts. " + "Use the 'ids' property instead of 'id'." + ) + return self._ids[0] + + @id.setter + def id(self, value: Optional[Union[str, List[str]]]) -> None: + """Set the id (backward compatibility).""" + if value is None: + self._ids = None + elif isinstance(value, list): + self._ids = value + else: + self._ids = [value] + def __str__(self) -> str: """String representation using standard formatting.""" return self.format(False) def __repr__(self) -> str: """Developer-friendly representation.""" - return f"Link(id={self.id!r}, values={self.values!r})" + return f"Link(ids={self._ids!r}, values={self.values!r})" def __eq__(self, other) -> bool: """Check equality with another Link.""" if not isinstance(other, Link): return False - if self.id != other.id: + # Compare ids lists + if self._ids is None and other._ids is not None: + return False + if self._ids is not None and other._ids is None: return False + if self._ids is not None and other._ids is not None: + if len(self._ids) != len(other._ids): + return False + if not all(a == b for a, b in zip(self._ids, other._ids)): + return False if len(self.values) != len(other.values): return False return all(v1 == v2 for v1, v2 in zip(self.values, other.values)) @@ -67,7 +125,7 @@ def simplify(self) -> "Link": return self.values[0] else: new_values = [v.simplify() for v in self.values] - return Link(self.id, new_values) + return Link(self._ids, new_values) def combine(self, other: "Link") -> "Link": """Combine this link with another to create a compound link.""" @@ -79,33 +137,41 @@ def get_value_string(value: "Link") -> str: return value.to_link_or_id_string() @staticmethod - def escape_reference(reference: Optional[str]) -> str: + def escape_reference(reference: Optional[Union[str, List[str]]]) -> str: """ - Escape a reference string if it contains special characters. + Escape a reference string or multi-reference list if it contains special characters. Args: - reference: The reference string to escape + reference: The reference string or list of strings (multi-reference) to escape Returns: Escaped reference with quotes if needed """ - if not reference or not reference.strip(): + # Handle multi-reference (list of strings) + if isinstance(reference, list): + # Multi-reference: join with space, each part should be a simple reference + return " ".join(Link.escape_reference(r) for r in reference) + + if not reference or (isinstance(reference, str) and not reference.strip()): return "" + # Ensure reference is a string + ref_str = str(reference) + # Check if single quotes are needed - needs_single_quotes = any(c in reference for c in [":", "(", ")", " ", "\t", "\n", "\r", '"']) + needs_single_quotes = any(c in ref_str for c in [":", "(", ")", " ", "\t", "\n", "\r", '"']) if needs_single_quotes: - return f"'{reference}'" - elif "'" in reference: - return f'"{reference}"' + return f"'{ref_str}'" + elif "'" in ref_str: + return f'"{ref_str}"' else: - return reference + return ref_str def to_link_or_id_string(self) -> str: """Convert to string, using just ID if no values, otherwise full format.""" if not self.values: - return Link.escape_reference(self.id) if self.id is not None else "" + return Link.escape_reference(self._ids) if self._ids is not None else "" return str(self) def format(self, less_parentheses: Union[bool, "FormatConfig"] = False, is_compound_value: bool = False) -> str: @@ -127,37 +193,37 @@ def format(self, less_parentheses: Union[bool, "FormatConfig"] = False, is_compo # Original implementation for backward compatibility # Empty link - if self.id is None and not self.values: + if self._ids is None and not self.values: return "" if less_parentheses else "()" # Link with only ID, no values if not self.values: - escaped_id = Link.escape_reference(self.id) + escaped_id = Link.escape_reference(self._ids) # When used as a value in a compound link, wrap in parentheses if is_compound_value: return f"({escaped_id})" - return escaped_id if (less_parentheses and not self.needs_parentheses(self.id)) else f"({escaped_id})" + return escaped_id if (less_parentheses and not self.needs_parentheses(self._ids)) else f"({escaped_id})" # Format values recursively values_str = " ".join(self.format_value(v) for v in self.values) # Link with values only (null id) - if self.id is None: + if self._ids is None: if less_parentheses: # Check if all values are simple (no nested values) all_simple = all(not v.values for v in self.values) if all_simple: # Format each value without extra wrapping - return " ".join(Link.escape_reference(v.id) for v in self.values) + return " ".join(Link.escape_reference(v._ids) for v in self.values) # For mixed or complex values, return without outer wrapper return values_str # For normal mode, wrap in parentheses return f"({values_str})" # Link with ID and values - id_str = Link.escape_reference(self.id) + id_str = Link.escape_reference(self._ids) with_colon = f"{id_str}: {values_str}" - return with_colon if (less_parentheses and not self.needs_parentheses(self.id)) else f"({with_colon})" + return with_colon if (less_parentheses and not self.needs_parentheses(self._ids)) else f"({with_colon})" def format_value(self, value: "Link") -> str: """ @@ -178,13 +244,16 @@ def format_value(self, value: "Link") -> str: # Simple link with just an ID - don't wrap in parentheses when used as a value if not value.values: - return Link.escape_reference(value.id) + return Link.escape_reference(value._ids) # Complex value with its own structure - format it normally with parentheses return value.format(False, False) - def needs_parentheses(self, s: Optional[str]) -> bool: - """Check if a string needs to be wrapped in parentheses.""" + def needs_parentheses(self, s: Optional[Union[str, List[str]]]) -> bool: + """Check if a string or array needs to be wrapped in parentheses.""" + # Multi-reference arrays always need parentheses when formatted inline + if isinstance(s, list): + return len(s) > 1 return s and any(c in s for c in [" ", ":", "(", ")"]) def _format_with_config(self, config: "FormatConfig", is_compound_value: bool = False) -> str: @@ -203,16 +272,16 @@ def _format_with_config(self, config: "FormatConfig", is_compound_value: bool = from .format_config import FormatConfig # noqa: F401 # Empty link - if self.id is None and not self.values: + if self._ids is None and not self.values: return "" if config.less_parentheses else "()" # Link with only ID, no values if not self.values: - escaped_id = Link.escape_reference(self.id) + escaped_id = Link.escape_reference(self._ids) if is_compound_value: return f"({escaped_id})" return ( - escaped_id if (config.less_parentheses and not self.needs_parentheses(self.id)) else f"({escaped_id})" + escaped_id if (config.less_parentheses and not self.needs_parentheses(self._ids)) else f"({escaped_id})" ) # Check if we should use indented format @@ -222,8 +291,8 @@ def _format_with_config(self, config: "FormatConfig", is_compound_value: bool = else: # Try inline format first values_str = " ".join(self.format_value(v) for v in self.values) - if self.id is not None: - id_str = Link.escape_reference(self.id) + if self._ids is not None: + id_str = Link.escape_reference(self._ids) test_line = f"{id_str}: {values_str}" if config.less_parentheses else f"({id_str}: {values_str})" else: test_line = values_str if config.less_parentheses else f"({values_str})" @@ -239,18 +308,18 @@ def _format_with_config(self, config: "FormatConfig", is_compound_value: bool = values_str = " ".join(self.format_value(v) for v in self.values) # Link with values only (null id) - if self.id is None: + if self._ids is None: if config.less_parentheses: all_simple = all(not v.values for v in self.values) if all_simple: - return " ".join(Link.escape_reference(v.id) for v in self.values) + return " ".join(Link.escape_reference(v._ids) for v in self.values) return values_str return f"({values_str})" # Link with ID and values - id_str = Link.escape_reference(self.id) + id_str = Link.escape_reference(self._ids) with_colon = f"{id_str}: {values_str}" - return with_colon if (config.less_parentheses and not self.needs_parentheses(self.id)) else f"({with_colon})" + return with_colon if (config.less_parentheses and not self.needs_parentheses(self._ids)) else f"({with_colon})" def _format_indented(self, config: "FormatConfig") -> str: """ @@ -262,13 +331,13 @@ def _format_indented(self, config: "FormatConfig") -> str: Returns: Indented formatted string """ - if self.id is None: + if self._ids is None: # Values only - format each on separate line lines = [self.format_value(v) for v in self.values] return "\n".join(config.indent_string + line for line in lines) # Link with ID - format as id:\n value1\n value2 - id_str = Link.escape_reference(self.id) + id_str = Link.escape_reference(self._ids) lines = [f"{id_str}:"] for v in self.values: lines.append(config.indent_string + self.format_value(v)) diff --git a/python/links_notation/parser.py b/python/links_notation/parser.py index 7a02122e..b84f0431 100644 --- a/python/links_notation/parser.py +++ b/python/links_notation/parser.py @@ -21,7 +21,11 @@ class Parser: Handles both inline and indented syntax for defining links. """ - def __init__(self, max_input_size: int = 10 * 1024 * 1024, max_depth: int = 1000): + def __init__( + self, + max_input_size: int = 10 * 1024 * 1024, + max_depth: int = 1000, + ): """ Initialize the parser. @@ -212,21 +216,30 @@ def _parse_line_content(self, content: str) -> Dict: inner = content[1:-1].strip() return self._parse_parenthesized(inner) - # Try indented ID syntax: id: + # Try indented ID syntax: id: (or multi-word: some example:) if content.endswith(":"): id_part = content[:-1].strip() - ref = self._extract_reference(id_part) - return {"id": ref, "values": [], "is_indented_id": True} - - # Try single-line link: id: values + multi_ref = self._extract_multi_reference_id(id_part) + return { + "id": multi_ref, + "values": [], + "is_indented_id": True, + "is_multi_ref": isinstance(multi_ref, list) and len(multi_ref) > 1, + } + + # Try single-line link: id: values (or multi-word: some example: values) if ":" in content and not (content.startswith('"') or content.startswith("'")): - parts = content.split(":", 1) - if len(parts) == 2: - id_part = parts[0].strip() - values_part = parts[1].strip() - ref = self._extract_reference(id_part) + colon_pos = self._find_colon_outside_quotes(content) + if colon_pos >= 0: + id_part = content[:colon_pos].strip() + values_part = content[colon_pos + 1 :].strip() + multi_ref = self._extract_multi_reference_id(id_part) values = self._parse_values(values_part) - return {"id": ref, "values": values} + return { + "id": multi_ref, + "values": values, + "is_multi_ref": isinstance(multi_ref, list) and len(multi_ref) > 1, + } # Simple value list values = self._parse_values(content) @@ -239,9 +252,14 @@ def _parse_parenthesized(self, inner: str) -> Dict: if colon_pos >= 0: id_part = inner[:colon_pos].strip() values_part = inner[colon_pos + 1 :].strip() - ref = self._extract_reference(id_part) + # Try to extract multi-reference ID (multiple space-separated words) + multi_ref = self._extract_multi_reference_id(id_part) values = self._parse_values(values_part) - return {"id": ref, "values": values} + return { + "id": multi_ref, + "values": values, + "is_multi_ref": isinstance(multi_ref, list) and len(multi_ref) > 1, + } # Just values values = self._parse_values(inner) @@ -423,6 +441,39 @@ def _extract_reference(self, text: str) -> str: # Unquoted return text + def _extract_multi_reference_id(self, text: str) -> Any: + """ + Extract a multi-reference ID from text. + + Multi-reference IDs are multiple space-separated words before a colon. + For example: "some example" -> ["some", "example"] + + If the ID is a single word or a quoted string, returns the string directly + for backward compatibility. + + Args: + text: The ID portion (before the colon) + + Returns: + Either a string (single reference) or list of strings (multi-reference) + """ + text = text.strip() + + # If quoted, treat as single reference (existing behavior) + for quote_char in ['"', "'", "`"]: + if text.startswith(quote_char): + return self._extract_reference(text) + + # Split by whitespace to check for multi-word + parts = text.split() + + if len(parts) == 1: + # Single word - return as string for backward compatibility + return parts[0] + else: + # Multiple words - return as list (multi-reference) + return parts + def _parse_multi_quote_string(self, text: str, quote_char: str, quote_count: int) -> Optional[str]: """ Parse a multi-quote string. diff --git a/python/tests/test_multi_reference.py b/python/tests/test_multi_reference.py new file mode 100644 index 00000000..f6bac582 --- /dev/null +++ b/python/tests/test_multi_reference.py @@ -0,0 +1,202 @@ +""" +Multi-Reference Feature Tests (Issue #184) + +Tests for multi-word references without quotes: +- (some example: some example is a link) +- IDs as list: ["some", "example"] +- id property throws for multi-refs, use ids instead +""" + +import pytest + +from links_notation import Parser, format_links + + +class TestMultiReferenceParsing: + """Tests for basic multi-reference ID parsing.""" + + def test_parses_two_word_multi_reference_id(self): + """Test parsing two-word multi-reference ID.""" + parser = Parser() + result = parser.parse("(some example: value)") + assert len(result) == 1 + # Use ids property for multi-references + assert isinstance(result[0].ids, list) + assert result[0].ids == ["some", "example"] + assert len(result[0].values) == 1 + assert result[0].values[0].id == "value" + + def test_parses_three_word_multi_reference_id(self): + """Test parsing three-word multi-reference ID.""" + parser = Parser() + result = parser.parse("(new york city: value)") + assert len(result) == 1 + assert result[0].ids == ["new", "york", "city"] + + def test_parses_four_word_multi_reference_id(self): + """Test parsing four-word multi-reference ID.""" + parser = Parser() + result = parser.parse("(a b c d: value)") + assert len(result) == 1 + assert result[0].ids == ["a", "b", "c", "d"] + + def test_single_word_id_accessible_via_id_property(self): + """Test backward compatibility: single-word ID accessible via id property.""" + parser = Parser() + result = parser.parse("(papa: value)") + assert len(result) == 1 + # Single-word: id returns string, ids returns list with single element + assert isinstance(result[0].id, str) + assert result[0].id == "papa" + assert result[0].ids == ["papa"] + + def test_quoted_multi_word_id_remains_string(self): + """Test backward compatibility: quoted multi-word ID remains string.""" + parser = Parser() + result = parser.parse("('some example': value)") + assert len(result) == 1 + # Quoted multi-word is a single reference, so id works + assert isinstance(result[0].id, str) + assert result[0].id == "some example" + assert result[0].ids == ["some example"] + + def test_id_property_throws_for_multi_reference(self): + """Test that id property throws for multi-reference IDs.""" + parser = Parser() + result = parser.parse("(some example: value)") + with pytest.raises(ValueError, match="Use the 'ids' property instead of 'id'"): + _ = result[0].id + + +class TestNoContextAwareParsing: + """Tests that values are NOT context-aware (per issue #184 feedback).""" + + def test_values_parsed_as_separate_references(self): + """Test that values are parsed as separate references.""" + parser = Parser() + result = parser.parse("(some example: some example is a link)") + assert result[0].ids == ["some", "example"] + # Values should be 5 separate references (no context-aware grouping) + assert len(result[0].values) == 5 + assert result[0].values[0].id == "some" + assert result[0].values[1].id == "example" + assert result[0].values[2].id == "is" + assert result[0].values[3].id == "a" + assert result[0].values[4].id == "link" + + def test_three_word_multi_ref_values_separate(self): + """Test that three-word multi-ref values are separate.""" + parser = Parser() + result = parser.parse("(new york city: new york city is great)") + assert result[0].ids == ["new", "york", "city"] + # Values should be 5 separate references + assert len(result[0].values) == 5 + assert result[0].values[0].id == "new" + assert result[0].values[1].id == "york" + assert result[0].values[2].id == "city" + assert result[0].values[3].id == "is" + assert result[0].values[4].id == "great" + + +class TestMultiRefFormatting: + """Tests for multi-reference formatting.""" + + def test_formats_multi_reference_id(self): + """Test formatting multi-reference ID.""" + parser = Parser() + result = parser.parse("(some example: value)") + formatted = format_links(result, True) + assert formatted == "(some example: value)" + + def test_round_trip_preserves_structure(self): + """Test that parse then format preserves structure.""" + parser = Parser() + input_text = "(new york city: one two three)" + result = parser.parse(input_text) + formatted = format_links(result, True) + assert formatted == "(new york city: one two three)" + + +class TestMultiRefIndentedSyntax: + """Tests for multi-reference with indented syntax.""" + + def test_parses_indented_multi_reference_id(self): + """Test parsing indented multi-reference ID.""" + parser = Parser() + input_text = """some example: + value1 + value2""" + result = parser.parse(input_text) + assert len(result) == 1 + assert result[0].ids == ["some", "example"] + assert len(result[0].values) == 2 + + +class TestEdgeCases: + """Edge case tests for multi-reference feature.""" + + def test_multi_ref_with_quoted_value(self): + """Test multi-reference with special characters in quoted parts.""" + parser = Parser() + result = parser.parse("(some example: 'value:special')") + assert result[0].ids == ["some", "example"] + assert result[0].values[0].id == "value:special" + + def test_empty_values_with_multi_ref_id(self): + """Test empty values with multi-reference ID.""" + parser = Parser() + result = parser.parse("(some example:)") + assert result[0].ids == ["some", "example"] + assert len(result[0].values) == 0 + + def test_multiple_links_same_multi_ref(self): + """Test multiple links with same multi-reference definition.""" + parser = Parser() + input_text = """(some example: first) +(some example: second)""" + result = parser.parse(input_text) + assert len(result) == 2 + assert result[0].ids == ["some", "example"] + assert result[1].ids == ["some", "example"] + + +class TestBackwardCompatibility: + """Backward compatibility tests.""" + + def test_existing_single_line_syntax(self): + """Test existing single-line syntax still works.""" + parser = Parser() + result = parser.parse("papa: loves mama") + assert result[0].id == "papa" + assert result[0].values[0].id == "loves" + assert result[0].values[1].id == "mama" + + def test_existing_parenthesized_syntax(self): + """Test existing parenthesized syntax still works.""" + parser = Parser() + result = parser.parse("(papa: loves mama)") + assert result[0].id == "papa" + assert result[0].values[0].id == "loves" + assert result[0].values[1].id == "mama" + + def test_existing_quoted_id_syntax(self): + """Test existing quoted ID syntax still works.""" + parser = Parser() + result = parser.parse("('multi word id': value)") + assert result[0].id == "multi word id" + assert result[0].values[0].id == "value" + + def test_existing_nested_links(self): + """Test existing nested links still work.""" + parser = Parser() + result = parser.parse("(outer: (inner: value))") + assert result[0].id == "outer" + assert result[0].values[0].id == "inner" + assert result[0].values[0].values[0].id == "value" + + def test_existing_value_only_links(self): + """Test existing value-only links still work.""" + parser = Parser() + result = parser.parse("(a b c)") + assert result[0].ids is None + assert len(result[0].values) == 3 diff --git a/rust/src/lib.rs b/rust/src/lib.rs index c3be7c42..14cf91dd 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -28,9 +28,33 @@ impl fmt::Display for ParseError { impl StdError for ParseError {} +/// Error type for accessing `id` on a multi-reference Link. +#[derive(Debug, Clone, PartialEq)] +pub struct MultiRefError { + pub count: usize, +} + +impl fmt::Display for MultiRefError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "This link has a multi-reference id with {} parts. Use 'ids()' instead of 'id()'.", + self.count + ) + } +} + +impl StdError for MultiRefError {} + #[derive(Debug, Clone, PartialEq)] pub enum LiNo { - Link { id: Option, values: Vec }, + /// A link with optional multi-reference ids and values. + /// The `ids` field stores references as a vector (like JS/Python). + Link { + ids: Option>, + values: Vec, + }, + /// A simple reference value. Ref(T), } @@ -42,9 +66,42 @@ impl LiNo { pub fn is_link(&self) -> bool { matches!(self, LiNo::Link { .. }) } + + /// Get the ids array (primary storage for reference identifiers). + /// Returns None if this is a Ref variant or if ids is None. + pub fn ids(&self) -> Option<&Vec> { + match self { + LiNo::Link { ids, .. } => ids.as_ref(), + LiNo::Ref(_) => None, + } + } + + /// Get the id as a single reference (backward compatibility). + /// Returns an error if ids has more than one element. + /// Use `ids()` for multi-reference access. + pub fn id(&self) -> Result, MultiRefError> { + match self { + LiNo::Link { ids, .. } => match ids { + None => Ok(None), + Some(v) if v.len() > 1 => Err(MultiRefError { count: v.len() }), + Some(v) => Ok(v.first()), + }, + LiNo::Ref(_) => Ok(None), + } + } } impl LiNo { + /// Helper to get the id as a joined string (for formatting purposes). + fn ids_to_string(ids: &Option>) -> Option { + ids.as_ref().map(|v| { + v.iter() + .map(|t| t.to_string()) + .collect::>() + .join(" ") + }) + } + /// Format the link using FormatConfig configuration. /// /// # Arguments @@ -62,9 +119,9 @@ impl LiNo { format!("({})", escaped) } } - LiNo::Link { id, values } => { + LiNo::Link { ids, values } => { // Empty link - if id.is_none() && values.is_empty() { + if ids.is_none() && values.is_empty() { return if config.less_parentheses { String::new() } else { @@ -74,10 +131,9 @@ impl LiNo { // Link with only ID, no values if values.is_empty() { - if let Some(ref id_val) = id { - let escaped_id = escape_reference(&id_val.to_string()); - return if config.less_parentheses && !needs_parentheses(&id_val.to_string()) - { + if let Some(id_str) = Self::ids_to_string(ids) { + let escaped_id = escape_reference(&id_str); + return if config.less_parentheses && !needs_parentheses(&id_str) { escaped_id } else { format!("({})", escaped_id) @@ -102,12 +158,12 @@ impl LiNo { .collect::>() .join(" "); - let test_line = if let Some(ref id_val) = id { - let id_str = escape_reference(&id_val.to_string()); + let test_line = if let Some(id_str) = Self::ids_to_string(ids) { + let escaped_id = escape_reference(&id_str); if config.less_parentheses { - format!("{}: {}", id_str, values_str) + format!("{}: {}", escaped_id, values_str) } else { - format!("({}: {})", id_str, values_str) + format!("({}: {})", escaped_id, values_str) } } else if config.less_parentheses { values_str.clone() @@ -133,7 +189,7 @@ impl LiNo { .join(" "); // Link with values only (null id) - if id.is_none() { + if ids.is_none() { if config.less_parentheses { // Check if all values are simple (no nested values) let all_simple = values.iter().all(|v| matches!(v, LiNo::Ref(_))); @@ -153,10 +209,10 @@ impl LiNo { } // Link with ID and values - let id_str = escape_reference(&id.as_ref().unwrap().to_string()); - let with_colon = format!("{}: {}", id_str, values_str); - if config.less_parentheses && !needs_parentheses(&id.as_ref().unwrap().to_string()) - { + let id_str = Self::ids_to_string(ids).unwrap(); + let escaped_id = escape_reference(&id_str); + let with_colon = format!("{}: {}", escaped_id, values_str); + if config.less_parentheses && !needs_parentheses(&id_str) { with_colon } else { format!("({})", with_colon) @@ -172,8 +228,8 @@ impl LiNo { let escaped = escape_reference(&value.to_string()); format!("({})", escaped) } - LiNo::Link { id, values } => { - if id.is_none() { + LiNo::Link { ids, values } => { + if ids.is_none() { // Values only - format each on separate line values .iter() @@ -182,7 +238,7 @@ impl LiNo { .join("\n") } else { // Link with ID - format as id:\n value1\n value2 - let id_str = escape_reference(&id.as_ref().unwrap().to_string()); + let id_str = escape_reference(&Self::ids_to_string(ids).unwrap()); let mut lines = vec![format!("{}:", id_str)]; for v in values { lines.push(format!("{}{}", config.indent_string, format_value(v))); @@ -198,10 +254,17 @@ impl fmt::Display for LiNo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { LiNo::Ref(value) => write!(f, "{}", value.to_string()), - LiNo::Link { id, values } => { - let id_str = id + LiNo::Link { ids, values } => { + let id_str = ids .as_ref() - .map(|id| format!("{}: ", id.to_string())) + .map(|v| { + let joined = v + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(" "); + format!("{}: ", joined) + }) .unwrap_or_default(); if f.alternate() { @@ -237,17 +300,17 @@ impl From for LiNo { fn from(link: parser::Link) -> Self { if link.values.is_empty() && link.children.is_empty() { if let Some(id) = link.id { - LiNo::Ref(id) + LiNo::Ref(id.to_single_string()) } else { LiNo::Link { - id: None, + ids: None, values: vec![], } } } else { let values: Vec> = link.values.into_iter().map(|v| v.into()).collect(); LiNo::Link { - id: link.id, + ids: link.id.map(|id| id.parts()), values, } } @@ -288,7 +351,7 @@ fn flatten_link_recursive( { // Use if let to safely extract the ID instead of unwrap() if let Some(ref id) = child.values[0].id { - LiNo::Ref(id.clone()) + LiNo::Ref(id.to_single_string()) } else { // If no ID, create an empty link parser::Link { @@ -296,6 +359,7 @@ fn flatten_link_recursive( values: child.values.clone(), children: vec![], is_indented_id: false, + is_multi_ref: false, } .into() } @@ -305,6 +369,7 @@ fn flatten_link_recursive( values: child.values.clone(), children: vec![], is_indented_id: false, + is_multi_ref: false, } .into() } @@ -312,7 +377,7 @@ fn flatten_link_recursive( .collect(); let current = LiNo::Link { - id: link.id.clone(), + ids: link.id.as_ref().map(|id| id.parts()), values: child_values, }; @@ -320,14 +385,14 @@ fn flatten_link_recursive( // Wrap parent in parentheses if it's a reference let wrapped_parent = match parent { LiNo::Ref(ref_id) => LiNo::Link { - id: None, + ids: None, values: vec![LiNo::Ref(ref_id.clone())], }, link => link.clone(), }; LiNo::Link { - id: None, + ids: None, values: vec![wrapped_parent, current], } } else { @@ -341,10 +406,10 @@ fn flatten_link_recursive( // Create the current link without children let current = if link.values.is_empty() { if let Some(id) = &link.id { - LiNo::Ref(id.clone()) + LiNo::Ref(id.to_single_string()) } else { LiNo::Link { - id: None, + ids: None, values: vec![], } } @@ -358,12 +423,13 @@ fn flatten_link_recursive( values: v.values.clone(), children: vec![], is_indented_id: false, + is_multi_ref: false, } .into() }) .collect(); LiNo::Link { - id: link.id.clone(), + ids: link.id.as_ref().map(|id| id.parts()), values, } }; @@ -373,7 +439,7 @@ fn flatten_link_recursive( // Wrap parent in parentheses if it's a reference let wrapped_parent = match parent { LiNo::Ref(ref_id) => LiNo::Link { - id: None, + ids: None, values: vec![LiNo::Ref(ref_id.clone())], }, link => link.clone(), @@ -382,14 +448,14 @@ fn flatten_link_recursive( // Wrap current in parentheses if it's a reference let wrapped_current = match ¤t { LiNo::Ref(ref_id) => LiNo::Link { - id: None, + ids: None, values: vec![LiNo::Ref(ref_id.clone())], }, link => link.clone(), }; LiNo::Link { - id: None, + ids: None, values: vec![wrapped_parent, wrapped_current], } } else { @@ -408,7 +474,7 @@ pub fn parse_lino(document: &str) -> Result, ParseError> { // Handle empty or whitespace-only input by returning empty result if document.trim().is_empty() { return Ok(LiNo::Link { - id: None, + ids: None, values: vec![], }); } @@ -417,14 +483,14 @@ pub fn parse_lino(document: &str) -> Result, ParseError> { Ok((_, links)) => { if links.is_empty() { Ok(LiNo::Link { - id: None, + ids: None, values: vec![], }) } else { // Flatten the indented structure according to Lino spec let flattened = flatten_links(links); Ok(LiNo::Link { - id: None, + ids: None, values: flattened, }) } @@ -520,7 +586,7 @@ fn group_consecutive_links(links: &[LiNo]) -> Vec> { // Look ahead for consecutive links with same ID if let LiNo::Link { - id: Some(ref current_id), + ids: Some(ref current_ids), values: ref current_values, } = current { @@ -531,11 +597,11 @@ fn group_consecutive_links(links: &[LiNo]) -> Vec> { while j < links.len() { if let LiNo::Link { - id: Some(ref next_id), + ids: Some(ref next_ids), values: ref next_values, } = &links[j] { - if next_id == current_id && !next_values.is_empty() { + if next_ids == current_ids && !next_values.is_empty() { same_id_values.extend(next_values.clone()); j += 1; } else { @@ -549,7 +615,7 @@ fn group_consecutive_links(links: &[LiNo]) -> Vec> { // If we found consecutive links, create grouped link if j > i + 1 { grouped.push(LiNo::Link { - id: Some(current_id.clone()), + ids: Some(current_ids.clone()), values: same_id_values, }); i = j; @@ -618,11 +684,16 @@ fn needs_parentheses(s: &str) -> bool { fn format_value(value: &LiNo) -> String { match value { LiNo::Ref(r) => escape_reference(&r.to_string()), - LiNo::Link { id, values } => { + LiNo::Link { ids, values } => { // Simple link with just an ID - don't wrap in extra parentheses if values.is_empty() { - if let Some(ref id_val) = id { - return escape_reference(&id_val.to_string()); + if let Some(ref ids_vec) = ids { + let joined = ids_vec + .iter() + .map(|t| t.to_string()) + .collect::>() + .join(" "); + return escape_reference(&joined); } return String::new(); } diff --git a/rust/src/parser.rs b/rust/src/parser.rs index de8f23b4..1da94831 100644 --- a/rust/src/parser.rs +++ b/rust/src/parser.rs @@ -9,30 +9,82 @@ use nom::{ }; use std::cell::RefCell; +/// Represents a reference ID that can be either a single string or a multi-reference (multiple words). +#[derive(Debug, Clone, PartialEq)] +pub enum RefId { + /// Single-word reference + Single(String), + /// Multi-word reference (e.g., "some example" as vec!["some", "example"]) + Multi(Vec), +} + +impl RefId { + /// Check if this is a multi-reference + pub fn is_multi(&self) -> bool { + matches!(self, RefId::Multi(parts) if parts.len() > 1) + } + + /// Get the reference as a single string (joining with space for multi-ref) + pub fn to_single_string(&self) -> String { + match self { + RefId::Single(s) => s.clone(), + RefId::Multi(parts) => parts.join(" "), + } + } + + /// Get parts of the reference + pub fn parts(&self) -> Vec { + match self { + RefId::Single(s) => vec![s.clone()], + RefId::Multi(parts) => parts.clone(), + } + } +} + +impl From for RefId { + fn from(s: String) -> Self { + RefId::Single(s) + } +} + +impl From> for RefId { + fn from(v: Vec) -> Self { + if v.len() == 1 { + RefId::Single(v.into_iter().next().unwrap()) + } else { + RefId::Multi(v) + } + } +} + #[derive(Debug, Clone, PartialEq)] pub struct Link { - pub id: Option, + pub id: Option, pub values: Vec, pub children: Vec, pub is_indented_id: bool, + pub is_multi_ref: bool, } impl Link { pub fn new_singlet(id: String) -> Self { Link { - id: Some(id), + id: Some(RefId::Single(id)), values: vec![], children: vec![], is_indented_id: false, + is_multi_ref: false, } } - pub fn new_indented_id(id: String) -> Self { + pub fn new_indented_id(id: RefId) -> Self { + let is_multi = id.is_multi(); Link { id: Some(id), values: vec![], children: vec![], is_indented_id: true, + is_multi_ref: is_multi, } } @@ -42,15 +94,18 @@ impl Link { values, children: vec![], is_indented_id: false, + is_multi_ref: false, } } - pub fn new_link(id: Option, values: Vec) -> Self { + pub fn new_link(id: Option, values: Vec) -> Self { + let is_multi = id.as_ref().map(|i| i.is_multi()).unwrap_or(false); Link { id, values, children: vec![], is_indented_id: false, + is_multi_ref: is_multi, } } @@ -58,6 +113,11 @@ impl Link { self.children = children; self } + + /// Get ID as String (for backward compatibility) + pub fn id_string(&self) -> Option { + self.id.as_ref().map(|id| id.to_single_string()) + } } pub struct ParserState { @@ -234,6 +294,55 @@ fn reference(input: &str) -> IResult<&str, String> { .parse(input) } +/// Parse a multi-reference ID (multiple space-separated words before colon). +/// Returns RefId::Single for single words, RefId::Multi for multiple words. +/// Stops when it encounters ':' or ')'. +fn multi_ref_id(input: &str) -> IResult<&str, RefId> { + let (input, first) = reference(input)?; + let mut parts = vec![first]; + let mut remaining = input; + + // Try to parse more references (space-separated, not followed by ':' immediately) + loop { + // Skip horizontal whitespace + let (after_ws, _) = horizontal_whitespace(remaining)?; + + // Check if we've hit the colon or closing paren - stop here + if after_ws.starts_with(':') || after_ws.starts_with(')') || after_ws.is_empty() { + break; + } + + // Check for end-of-line + if after_ws.starts_with('\n') || after_ws.starts_with('\r') { + break; + } + + // Try to parse another reference + match reference(after_ws) { + Ok((rest, ref_str)) => { + // Check that the next reference is followed by space or colon + // (not immediately by something else that would indicate nested structure) + if rest.starts_with(':') + || rest.starts_with(')') + || rest.is_empty() + || rest.starts_with(' ') + || rest.starts_with('\t') + || rest.starts_with('\n') + || rest.starts_with('\r') + { + parts.push(ref_str); + remaining = rest; + } else { + break; + } + } + Err(_) => break, + } + } + + Ok((remaining, RefId::from(parts))) +} + fn eol(input: &str) -> IResult<&str, &str> { alt(( preceded(horizontal_whitespace, line_ending), @@ -279,7 +388,7 @@ fn single_line_values<'a>(input: &'a str, state: &ParserState) -> IResult<&'a st fn single_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, Link> { ( horizontal_whitespace, - reference, + multi_ref_id, horizontal_whitespace, char(':'), |i| single_line_values(i, state), @@ -292,7 +401,7 @@ fn multi_line_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a str, ( char('('), whitespace, - reference, + multi_ref_id, whitespace, char(':'), |i| multi_line_values(i, state), @@ -311,7 +420,7 @@ fn single_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&' && values[0].values.is_empty() && values[0].children.is_empty() { - Link::new_singlet(values[0].id.clone().unwrap()) + Link::new_singlet(values[0].id.as_ref().unwrap().to_single_string()) } else { Link::new_value(values) } @@ -320,7 +429,7 @@ fn single_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&' } fn indented_id_link<'a>(input: &'a str, _state: &ParserState) -> IResult<&'a str, Link> { - (reference, horizontal_whitespace, char(':'), eol) + (multi_ref_id, horizontal_whitespace, char(':'), eol) .map(|(id, _, _, _)| Link::new_indented_id(id)) .parse(input) } @@ -338,7 +447,7 @@ fn multi_line_value_link<'a>(input: &'a str, state: &ParserState) -> IResult<&'a && values[0].values.is_empty() && values[0].children.is_empty() { - Link::new_singlet(values[0].id.clone().unwrap()) + Link::new_singlet(values[0].id.as_ref().unwrap().to_single_string()) } else { Link::new_value(values) } diff --git a/rust/tests/api_tests.rs b/rust/tests/api_tests.rs index 9e521b8b..88db5a7c 100644 --- a/rust/tests/api_tests.rs +++ b/rust/tests/api_tests.rs @@ -10,7 +10,7 @@ fn test_is_ref() { #[test] fn test_is_link() { let link = LiNo::Link { - id: Some("id".to_string()), + ids: Some(vec!["id".to_string()]), values: vec![LiNo::Ref("child".to_string())], }; assert!(link.is_link()); @@ -29,7 +29,7 @@ fn test_is_ref_equivalent() { fn test_is_link_equivalent() { // Same as test_is_link, for API consistency with other languages let link = LiNo::Link { - id: Some("id".to_string()), + ids: Some(vec!["id".to_string()]), values: vec![LiNo::Ref("child".to_string())], }; assert!(link.is_link()); @@ -39,7 +39,7 @@ fn test_is_link_equivalent() { #[test] fn test_empty_link() { let link = LiNo::Link:: { - id: None, + ids: None, values: vec![], }; let output = link.to_string(); diff --git a/rust/tests/edge_case_parser_tests.rs b/rust/tests/edge_case_parser_tests.rs index 393306c5..4e5209c9 100644 --- a/rust/tests/edge_case_parser_tests.rs +++ b/rust/tests/edge_case_parser_tests.rs @@ -54,8 +54,8 @@ fn test_all_features_test() { let result = parse_lino(input); assert!(result.is_ok()); let parsed = result.unwrap(); - if let LiNo::Link { id, values } = parsed { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = parsed { + assert!(ids.is_none()); assert_eq!(values.len(), 1); if let LiNo::Ref(ref_id) = &values[0] { assert_eq!(ref_id, "singlet"); @@ -126,8 +126,8 @@ fn test_singlet_links() { let result = parse_lino(input); assert!(result.is_ok()); let parsed = result.unwrap(); - if let LiNo::Link { id, values } = parsed { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = parsed { + assert!(ids.is_none()); assert_eq!(values.len(), 1); if let LiNo::Ref(ref_id) = &values[0] { assert_eq!(ref_id, "1"); @@ -139,11 +139,11 @@ fn test_singlet_links() { let result = parse_lino(input); assert!(result.is_ok()); let parsed = result.unwrap(); - if let LiNo::Link { id, values } = parsed { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = parsed { + assert!(ids.is_none()); assert_eq!(values.len(), 1); - if let LiNo::Link { id, values } = &values[0] { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &values[0] { + assert!(ids.is_none()); assert_eq!(values.len(), 2); assert_eq!(values[0], LiNo::Ref("1".to_string())); assert_eq!(values[1], LiNo::Ref("2".to_string())); @@ -155,11 +155,11 @@ fn test_singlet_links() { let result = parse_lino(input); assert!(result.is_ok()); let parsed = result.unwrap(); - if let LiNo::Link { id, values } = parsed { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = parsed { + assert!(ids.is_none()); assert_eq!(values.len(), 1); - if let LiNo::Link { id, values } = &values[0] { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &values[0] { + assert!(ids.is_none()); assert_eq!(values.len(), 3); assert_eq!(values[0], LiNo::Ref("1".to_string())); assert_eq!(values[1], LiNo::Ref("2".to_string())); @@ -172,11 +172,11 @@ fn test_singlet_links() { let result = parse_lino(input); assert!(result.is_ok()); let parsed = result.unwrap(); - if let LiNo::Link { id, values } = parsed { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = parsed { + assert!(ids.is_none()); assert_eq!(values.len(), 1); - if let LiNo::Link { id, values } = &values[0] { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &values[0] { + assert!(ids.is_none()); assert_eq!(values.len(), 4); assert_eq!(values[0], LiNo::Ref("1".to_string())); assert_eq!(values[1], LiNo::Ref("2".to_string())); diff --git a/rust/tests/format_config_tests.rs b/rust/tests/format_config_tests.rs index 613737c9..c6e3ad53 100644 --- a/rust/tests/format_config_tests.rs +++ b/rust/tests/format_config_tests.rs @@ -105,7 +105,7 @@ fn should_indent_by_ref_count() { fn format_link_with_less_parentheses_integration() { // Create a link with ID and values let link: LiNo = LiNo::Link { - id: Some("id".to_string()), + ids: Some(vec!["id".to_string()]), values: vec![LiNo::Ref("value".to_string())], }; @@ -120,7 +120,7 @@ fn format_link_with_less_parentheses_integration() { fn format_link_with_max_inline_refs_integration() { // Create a link with 4 references let link: LiNo = LiNo::Link { - id: Some("id".to_string()), + ids: Some(vec!["id".to_string()]), values: vec![ LiNo::Ref("1".to_string()), LiNo::Ref("2".to_string()), @@ -147,7 +147,7 @@ fn format_link_with_max_inline_refs_integration() { fn format_link_with_line_length_limit_integration() { // Create a link with many references that exceeds line length let link: LiNo = LiNo::Link { - id: Some("sequence".to_string()), + ids: Some(vec!["sequence".to_string()]), values: (1..=10).map(|i| LiNo::Ref(i.to_string())).collect(), }; @@ -175,15 +175,15 @@ fn format_links_with_consecutive_grouping_integration() { // Create consecutive links with same ID let links: Vec> = vec![ LiNo::Link { - id: Some("SetA".to_string()), + ids: Some(vec!["SetA".to_string()]), values: vec![LiNo::Ref("a".to_string())], }, LiNo::Link { - id: Some("SetA".to_string()), + ids: Some(vec!["SetA".to_string()]), values: vec![LiNo::Ref("b".to_string())], }, LiNo::Link { - id: Some("SetA".to_string()), + ids: Some(vec!["SetA".to_string()]), values: vec![LiNo::Ref("c".to_string())], }, ]; @@ -203,7 +203,7 @@ fn format_links_with_consecutive_grouping_integration() { #[test] fn format_link_with_custom_indent_integration() { let link: LiNo = LiNo::Link { - id: Some("id".to_string()), + ids: Some(vec!["id".to_string()]), values: vec![ LiNo::Ref("1".to_string()), LiNo::Ref("2".to_string()), @@ -230,7 +230,7 @@ fn format_link_with_custom_indent_integration() { fn format_roundtrip_with_config_integration() { // Create a simple link let original_link: LiNo = LiNo::Link { - id: Some("test".to_string()), + ids: Some(vec!["test".to_string()]), values: vec![ LiNo::Ref("a".to_string()), LiNo::Ref("b".to_string()), diff --git a/rust/tests/link_tests.rs b/rust/tests/link_tests.rs index c4d5319a..c7357316 100644 --- a/rust/tests/link_tests.rs +++ b/rust/tests/link_tests.rs @@ -3,11 +3,11 @@ use links_notation::LiNo; #[test] fn link_constructor_with_id_only_test() { let link = LiNo::Link:: { - id: Some("test".to_string()), + ids: Some(vec!["test".to_string()]), values: vec![], }; - if let LiNo::Link { id, values } = link { - assert_eq!(id, Some("test".to_string())); + if let LiNo::Link { ids, values } = link { + assert_eq!(ids, Some(vec!["test".to_string()])); assert!(values.is_empty()); } else { panic!("Expected Link variant"); @@ -21,15 +21,15 @@ fn link_constructor_with_id_and_values_test() { LiNo::Ref("value2".to_string()), ]; let link = LiNo::Link { - id: Some("parent".to_string()), + ids: Some(vec!["parent".to_string()]), values: values.clone(), }; if let LiNo::Link { - id, + ids, values: link_values, } = link { - assert_eq!(id, Some("parent".to_string())); + assert_eq!(ids, Some(vec!["parent".to_string()])); assert_eq!(link_values.len(), 2); } else { panic!("Expected Link variant"); @@ -39,7 +39,7 @@ fn link_constructor_with_id_and_values_test() { #[test] fn link_to_string_with_id_only_test() { let link = LiNo::Link:: { - id: Some("test".to_string()), + ids: Some(vec!["test".to_string()]), values: vec![], }; assert_eq!(link.to_string(), "(test: )"); @@ -51,7 +51,7 @@ fn link_to_string_with_values_only_test() { LiNo::Ref("value1".to_string()), LiNo::Ref("value2".to_string()), ]; - let link = LiNo::Link:: { id: None, values }; + let link = LiNo::Link:: { ids: None, values }; assert_eq!(link.to_string(), "(value1 value2)"); } @@ -62,7 +62,7 @@ fn link_to_string_with_id_and_values_test() { LiNo::Ref("child2".to_string()), ]; let link = LiNo::Link { - id: Some("parent".to_string()), + ids: Some(vec!["parent".to_string()]), values, }; assert_eq!(link.to_string(), "(parent: child1 child2)"); @@ -71,15 +71,15 @@ fn link_to_string_with_id_and_values_test() { #[test] fn link_equals_test() { let link1 = LiNo::Link:: { - id: Some("test".to_string()), + ids: Some(vec!["test".to_string()]), values: vec![], }; let link2 = LiNo::Link:: { - id: Some("test".to_string()), + ids: Some(vec!["test".to_string()]), values: vec![], }; let link3 = LiNo::Link:: { - id: Some("other".to_string()), + ids: Some(vec!["other".to_string()]), values: vec![], }; @@ -93,12 +93,12 @@ fn link_combine_test() { let link1 = LiNo::Ref("first".to_string()); let link2 = LiNo::Ref("second".to_string()); let combined = LiNo::Link:: { - id: None, + ids: None, values: vec![link1, link2], }; - if let LiNo::Link { id, values } = combined { - assert_eq!(id, None); + if let LiNo::Link { ids, values } = combined { + assert_eq!(ids, None); assert_eq!(values.len(), 2); } else { panic!("Expected Link variant"); @@ -129,23 +129,23 @@ fn link_escape_reference_with_special_characters_test() { fn link_simplify_test() { // Test simplification behavior - empty values let link1 = LiNo::Link:: { - id: Some("test".to_string()), + ids: Some(vec!["test".to_string()]), values: vec![], }; // In Rust, we don't have a simplify method, but we can test the structure - if let LiNo::Link { id, values } = link1 { - assert_eq!(id, Some("test".to_string())); + if let LiNo::Link { ids, values } = link1 { + assert_eq!(ids, Some(vec!["test".to_string()])); assert!(values.is_empty()); } // Test with single value let single_ref = LiNo::Ref("single".to_string()); let link2 = LiNo::Link:: { - id: None, + ids: None, values: vec![single_ref.clone()], }; - if let LiNo::Link { id, values } = link2 { - assert_eq!(id, None); + if let LiNo::Link { ids, values } = link2 { + assert_eq!(ids, None); assert_eq!(values.len(), 1); assert_eq!(values[0], single_ref); } diff --git a/rust/tests/links_group_tests.rs b/rust/tests/links_group_tests.rs index 91eb7152..857fa8e7 100644 --- a/rust/tests/links_group_tests.rs +++ b/rust/tests/links_group_tests.rs @@ -27,15 +27,15 @@ fn links_group_constructor_equivalent_test() { LiNo::Ref("child2".to_string()), ]; let group = LiNo::Link { - id: Some("group".to_string()), + ids: Some(vec!["group".to_string()]), values: vec![root.clone()] .into_iter() .chain(children.clone()) .collect(), }; - if let LiNo::Link { id, values } = group { - assert_eq!(id, Some("group".to_string())); + if let LiNo::Link { ids, values } = group { + assert_eq!(ids, Some(vec!["group".to_string()])); assert_eq!(values.len(), 3); // root + 2 children assert_eq!(values[0], root); } else { @@ -53,29 +53,29 @@ fn links_group_to_list_flattens_structure_test() { // Create nested structure: root with child1 and (child2 with grandchild) let nested_child = LiNo::Link:: { - id: None, + ids: None, values: vec![child2.clone(), grandchild.clone()], }; let group = LiNo::Link { - id: None, + ids: None, values: vec![root.clone(), child1.clone(), nested_child], }; // Verify the structure - if let LiNo::Link { id, values } = group { - assert_eq!(id, None); + if let LiNo::Link { ids, values } = group { + assert_eq!(ids, None); assert_eq!(values.len(), 3); assert_eq!(values[0], root); assert_eq!(values[1], child1); // Check nested structure if let LiNo::Link { - id: nested_id, + ids: nested_ids, values: nested_values, } = &values[2] { - assert_eq!(*nested_id, None); + assert_eq!(*nested_ids, None); assert_eq!(nested_values.len(), 2); assert_eq!(nested_values[0], child2); assert_eq!(nested_values[1], grandchild); @@ -92,7 +92,7 @@ fn links_group_to_string_test() { LiNo::Ref("child2".to_string()), ]; let group = LiNo::Link:: { - id: None, + ids: None, values: vec![root].into_iter().chain(children).collect(), }; @@ -114,7 +114,7 @@ fn links_group_append_to_links_list_test() { // Create a group structure let group = LiNo::Link:: { - id: None, + ids: None, values: vec![element.clone()] .into_iter() .chain(children.clone()) @@ -125,7 +125,7 @@ fn links_group_append_to_links_list_test() { let mut list: Vec> = Vec::new(); list.push(group.clone()); - if let LiNo::Link { id: _, values } = group { + if let LiNo::Link { ids: _, values } = group { for value in values { list.push(value); } diff --git a/rust/tests/multi_quote_parser_tests.rs b/rust/tests/multi_quote_parser_tests.rs index 8b8d51b2..0bf7bbe7 100644 --- a/rust/tests/multi_quote_parser_tests.rs +++ b/rust/tests/multi_quote_parser_tests.rs @@ -4,16 +4,16 @@ use links_notation::{parse_lino, LiNo}; fn get_single_ref_id(lino: &LiNo) -> Option<&String> { match lino { LiNo::Ref(id) => Some(id), - LiNo::Link { id: None, values } if values.len() == 1 => { + LiNo::Link { ids: None, values } if values.len() == 1 => { if let LiNo::Ref(id) = &values[0] { Some(id) } else if let LiNo::Link { - id: Some(ref_id), + ids: Some(ref_ids), values: inner_values, } = &values[0] { - if inner_values.is_empty() { - Some(ref_id) + if inner_values.is_empty() && ref_ids.len() == 1 { + Some(&ref_ids[0]) } else { None } @@ -22,9 +22,9 @@ fn get_single_ref_id(lino: &LiNo) -> Option<&String> { } } LiNo::Link { - id: Some(ref_id), + ids: Some(ref_ids), values, - } if values.is_empty() => Some(ref_id), + } if values.is_empty() && ref_ids.len() == 1 => Some(&ref_ids[0]), _ => None, } } @@ -407,11 +407,11 @@ fn test_backtick_as_id_in_link() { let result = parse_lino("(`myId`: value1 value2)").unwrap(); if let LiNo::Link { values, .. } = &result { if let Some(LiNo::Link { - id, + ids, values: inner_values, }) = values.first() { - assert_eq!(id.as_deref(), Some("myId")); + assert_eq!(ids.as_ref().unwrap()[0], "myId"); assert_eq!(inner_values.len(), 2); return; } diff --git a/rust/tests/multi_ref_tests.rs b/rust/tests/multi_ref_tests.rs new file mode 100644 index 00000000..2936cc58 --- /dev/null +++ b/rust/tests/multi_ref_tests.rs @@ -0,0 +1,169 @@ +//! Multi-Reference Feature Tests (Issue #184) +//! +//! Tests for multi-word references without quotes: +//! - (some example: some example is a link) +//! - ID as multi-word string: "some example" + +use links_notation::{format_links, parse_lino_to_links, LiNo}; + +#[test] +fn test_parses_two_word_multi_reference_id() { + let result = parse_lino_to_links("(some example: value)").expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, values } => { + // Multi-word ID is now stored as separate words + assert_eq!( + ids.as_ref().unwrap(), + &vec!["some".to_string(), "example".to_string()] + ); + assert_eq!(values.len(), 1); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_parses_three_word_multi_reference_id() { + let result = parse_lino_to_links("(new york city: value)").expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, .. } => { + assert_eq!( + ids.as_ref().unwrap(), + &vec!["new".to_string(), "york".to_string(), "city".to_string()] + ); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_single_word_id_backward_compatible() { + let result = parse_lino_to_links("(papa: value)").expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, .. } => { + assert_eq!(ids.as_ref().unwrap(), &vec!["papa".to_string()]); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_quoted_multi_word_id_backward_compatible() { + let result = parse_lino_to_links("('some example': value)").expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, .. } => { + // Quoted ID should be preserved as-is + assert_eq!(ids.as_ref().unwrap(), &vec!["some example".to_string()]); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_format_multi_reference_id() { + let result = parse_lino_to_links("(some example: value)").expect("Failed to parse"); + let formatted = format_links(&result); + // Multi-reference IDs are formatted with space-separated words (new behavior) + // The formatted output may keep them unquoted if the formatter supports it + assert_eq!(formatted, "(some example: value)"); +} + +#[test] +fn test_round_trip_multi_reference() { + let input = "(new york city: great)"; + let result = parse_lino_to_links(input).expect("Failed to parse"); + let formatted = format_links(&result); + // Round-trip preserves the multi-word ID structure + assert_eq!(formatted, "(new york city: great)"); +} + +#[test] +fn test_indented_syntax_multi_reference() { + let input = "some example:\n value1\n value2"; + let result = parse_lino_to_links(input).expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, values } => { + assert_eq!( + ids.as_ref().unwrap(), + &vec!["some".to_string(), "example".to_string()] + ); + assert_eq!(values.len(), 2); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_values_include_multi_reference_context() { + // When the same multi-word pattern appears in values, + // it should be formatted consistently + let input = "(some example: some example is a link)"; + let result = parse_lino_to_links(input).expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, values } => { + assert_eq!( + ids.as_ref().unwrap(), + &vec!["some".to_string(), "example".to_string()] + ); + // Values should include "some", "example", "is", "a", "link" + // (context-aware grouping not implemented in Rust yet) + assert!(values.len() >= 4); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_backward_compatibility_single_line() { + let result = parse_lino_to_links("papa: loves mama").expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, values } => { + assert_eq!(ids.as_ref().unwrap(), &vec!["papa".to_string()]); + assert_eq!(values.len(), 2); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_backward_compatibility_parenthesized() { + let result = parse_lino_to_links("(papa: loves mama)").expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, values } => { + assert_eq!(ids.as_ref().unwrap(), &vec!["papa".to_string()]); + assert_eq!(values.len(), 2); + } + _ => panic!("Expected Link"), + } +} + +#[test] +fn test_backward_compatibility_nested() { + let result = parse_lino_to_links("(outer: (inner: value))").expect("Failed to parse"); + assert_eq!(result.len(), 1); + match &result[0] { + LiNo::Link { ids, values } => { + assert_eq!(ids.as_ref().unwrap(), &vec!["outer".to_string()]); + assert_eq!(values.len(), 1); + match &values[0] { + LiNo::Link { + ids: inner_ids, + values: inner_values, + } => { + assert_eq!(inner_ids.as_ref().unwrap(), &vec!["inner".to_string()]); + assert_eq!(inner_values.len(), 1); + } + _ => panic!("Expected nested Link"), + } + } + _ => panic!("Expected Link"), + } +} diff --git a/rust/tests/multiline_parser_tests.rs b/rust/tests/multiline_parser_tests.rs index f4188ebd..725cd0be 100644 --- a/rust/tests/multiline_parser_tests.rs +++ b/rust/tests/multiline_parser_tests.rs @@ -14,15 +14,19 @@ fn format_links(lino: &LiNo, less_parentheses: bool) -> String { value.clone() } } - LiNo::Link { id, values } => { + LiNo::Link { ids, values } => { if values.is_empty() { - if let Some(id) = id { - // Escape id same way as references - let escaped_id = format_links(&LiNo::Ref(id.clone()), false); - if less_parentheses { - escaped_id + if let Some(ids) = ids { + if ids.len() == 1 { + // Escape id same way as references + let escaped_id = format_links(&LiNo::Ref(ids[0].clone()), false); + if less_parentheses { + escaped_id + } else { + format!("({})", escaped_id) + } } else { - format!("({})", escaped_id) + "()".to_string() } } else { "()".to_string() @@ -34,18 +38,22 @@ fn format_links(lino: &LiNo, less_parentheses: bool) -> String { .collect::>() .join(" "); - if let Some(id) = id { - let escaped_id = format_links(&LiNo::Ref(id.clone()), false); - // Mirror JS/C#: if less_parentheses and id doesn't need parentheses, drop outer parens - if less_parentheses - && !escaped_id.contains(' ') - && !escaped_id.contains(':') - && !escaped_id.contains('(') - && !escaped_id.contains(')') - { - format!("{}: {}", escaped_id, formatted_values) + if let Some(ids) = ids { + if ids.len() == 1 { + let escaped_id = format_links(&LiNo::Ref(ids[0].clone()), false); + // Mirror JS/C#: if less_parentheses and id doesn't need parentheses, drop outer parens + if less_parentheses + && !escaped_id.contains(' ') + && !escaped_id.contains(':') + && !escaped_id.contains('(') + && !escaped_id.contains(')') + { + format!("{}: {}", escaped_id, formatted_values) + } else { + format!("({}: {})", escaped_id, formatted_values) + } } else { - format!("({}: {})", escaped_id, formatted_values) + format!("({})", formatted_values) } } else { // Values-only link: in less_parentheses mode always drop outer parentheses diff --git a/rust/tests/multiline_quoted_string_tests.rs b/rust/tests/multiline_quoted_string_tests.rs index 9e63b99a..57f8be4f 100644 --- a/rust/tests/multiline_quoted_string_tests.rs +++ b/rust/tests/multiline_quoted_string_tests.rs @@ -14,15 +14,15 @@ as another reference' let result = parse_lino(input).unwrap(); if let LiNo::Link { - id: outer_id, + ids: outer_ids, values: outer_values, } = &result { - assert!(outer_id.is_none()); + assert!(outer_ids.is_none()); assert_eq!(outer_values.len(), 1); - if let LiNo::Link { id, values } = &outer_values[0] { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &outer_values[0] { + assert!(ids.is_none()); assert_eq!(values.len(), 2); if let LiNo::Ref(ref first_value) = values[0] { @@ -56,8 +56,8 @@ fn test_simple_multiline_double_quoted() { line2")"#; let result = parse_lino(input).unwrap(); - if let LiNo::Link { id, values } = &result { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &result { + assert!(ids.is_none()); assert_eq!(values.len(), 1); if let LiNo::Ref(ref value) = values[0] { @@ -76,8 +76,8 @@ fn test_simple_multiline_single_quoted() { line2')"#; let result = parse_lino(input).unwrap(); - if let LiNo::Link { id, values } = &result { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &result { + assert!(ids.is_none()); assert_eq!(values.len(), 1); if let LiNo::Ref(ref value) = values[0] { @@ -98,15 +98,15 @@ id": value1 value2)"#; let result = parse_lino(input).unwrap(); if let LiNo::Link { - id: outer_id, + ids: outer_ids, values: outer_values, } = &result { - assert!(outer_id.is_none()); + assert!(outer_ids.is_none()); assert_eq!(outer_values.len(), 1); - if let LiNo::Link { id, values } = &outer_values[0] { - assert_eq!(id.as_ref().unwrap(), "multi\nline\nid"); + if let LiNo::Link { ids, values } = &outer_values[0] { + assert_eq!(ids.as_ref().unwrap(), &vec!["multi\nline\nid".to_string()]); assert_eq!(values.len(), 2); } else { panic!("Expected first value to be a Link"); diff --git a/rust/tests/nested_parser_tests.rs b/rust/tests/nested_parser_tests.rs index 584dcf66..43f77463 100644 --- a/rust/tests/nested_parser_tests.rs +++ b/rust/tests/nested_parser_tests.rs @@ -124,7 +124,7 @@ fn test_indentation_parser() { let input = "parent\n child1\n child2"; let result = parse_document(input).unwrap(); assert_eq!(result.1.len(), 1); - assert_eq!(result.1[0].id, Some("parent".to_string())); + assert_eq!(result.1[0].id_string(), Some("parent".to_string())); assert_eq!(result.1[0].children.len(), 2); } diff --git a/rust/tests/nested_self_reference_tests.rs b/rust/tests/nested_self_reference_tests.rs index 5f4306df..e7322fdc 100644 --- a/rust/tests/nested_self_reference_tests.rs +++ b/rust/tests/nested_self_reference_tests.rs @@ -13,9 +13,9 @@ fn test_nested_self_referenced_object_in_pair_value() { // Should parse exactly one top-level link assert_eq!(links.len(), 1); - if let LiNo::Link { id, values } = &links[0] { + if let LiNo::Link { ids, values } = &links[0] { // Top-level link should have ID "obj_0" - assert_eq!(id, &Some("obj_0".to_string())); + assert_eq!(ids, &Some(vec!["obj_0".to_string()])); // Should have: type marker + 2 pairs = 3 values assert_eq!(values.len(), 3); @@ -32,20 +32,20 @@ fn test_nested_self_referenced_object_in_pair_value() { // Pair 1: ((str bmFtZQ==) (str ZGljdDE=)) if let LiNo::Link { - id: pair1_id, + ids: pair1_ids, values: pair1_values, } = pair1 { - assert_eq!(pair1_id, &None); + assert_eq!(pair1_ids, &None); assert_eq!(pair1_values.len(), 2); // First element of pair1: (str bmFtZQ==) if let LiNo::Link { - id: elem1_id, + ids: elem1_ids, values: elem1_values, } = &pair1_values[0] { - assert_eq!(elem1_id, &None); + assert_eq!(elem1_ids, &None); assert_eq!(elem1_values.len(), 2); assert_eq!(elem1_values[0], LiNo::Ref("str".to_string())); assert_eq!(elem1_values[1], LiNo::Ref("bmFtZQ==".to_string())); @@ -55,11 +55,11 @@ fn test_nested_self_referenced_object_in_pair_value() { // Second element of pair1: (str ZGljdDE=) if let LiNo::Link { - id: elem2_id, + ids: elem2_ids, values: elem2_values, } = &pair1_values[1] { - assert_eq!(elem2_id, &None); + assert_eq!(elem2_ids, &None); assert_eq!(elem2_values.len(), 2); assert_eq!(elem2_values[0], LiNo::Ref("str".to_string())); assert_eq!(elem2_values[1], LiNo::Ref("ZGljdDE=".to_string())); @@ -73,20 +73,20 @@ fn test_nested_self_referenced_object_in_pair_value() { // Pair 2: ((str b3RoZXI=) (obj_1: dict ...)) // This is the critical test - the second element should be a self-referenced dict if let LiNo::Link { - id: pair2_id, + ids: pair2_ids, values: pair2_values, } = pair2 { - assert_eq!(pair2_id, &None); + assert_eq!(pair2_ids, &None); assert_eq!(pair2_values.len(), 2); // First element of pair2: (str b3RoZXI=) if let LiNo::Link { - id: key_id, + ids: key_ids, values: key_values, } = &pair2_values[0] { - assert_eq!(key_id, &None); + assert_eq!(key_ids, &None); assert_eq!(key_values.len(), 2); assert_eq!(key_values[0], LiNo::Ref("str".to_string())); assert_eq!(key_values[1], LiNo::Ref("b3RoZXI=".to_string())); @@ -97,13 +97,13 @@ fn test_nested_self_referenced_object_in_pair_value() { // Second element of pair2: (obj_1: dict ((str bmFtZQ==) (str ZGljdDI=)) ((str b3RoZXI=) obj_0)) // THIS IS THE KEY TEST - obj_1 should have its ID preserved if let LiNo::Link { - id: obj1_id, + ids: obj1_ids, values: obj1_values, } = &pair2_values[1] { assert_eq!( - obj1_id, - &Some("obj_1".to_string()), + obj1_ids, + &Some(vec!["obj_1".to_string()]), "obj_1 should have its ID preserved" ); assert_eq!( @@ -179,17 +179,17 @@ fn test_self_reference_as_direct_child_works_correctly() { assert_eq!(links.len(), 1); - if let LiNo::Link { id, values } = &links[0] { - assert_eq!(id, &Some("obj_0".to_string())); + if let LiNo::Link { ids, values } = &links[0] { + assert_eq!(ids, &Some(vec!["obj_0".to_string()])); assert_eq!(values.len(), 4); // list + 1 + 2 + obj_1 // The fourth value should be obj_1 with a self-reference if let LiNo::Link { - id: obj1_id, + ids: obj1_ids, values: obj1_values, } = &values[3] { - assert_eq!(obj1_id, &Some("obj_1".to_string())); + assert_eq!(obj1_ids, &Some(vec!["obj_1".to_string()])); assert_eq!(obj1_values.len(), 4); // list + 3 + 4 + obj_0 assert_eq!(obj1_values[3], LiNo::Ref("obj_0".to_string())); } else { diff --git a/rust/tests/single_line_parser_tests.rs b/rust/tests/single_line_parser_tests.rs index fcb00369..7d9ba68e 100644 --- a/rust/tests/single_line_parser_tests.rs +++ b/rust/tests/single_line_parser_tests.rs @@ -15,15 +15,19 @@ fn format_links(lino: &LiNo, less_parentheses: bool) -> String { value.clone() } } - LiNo::Link { id, values } => { + LiNo::Link { ids, values } => { if values.is_empty() { - if let Some(id) = id { - // Escape id same as references - let escaped_id = format_links(&LiNo::Ref(id.clone()), false); - if less_parentheses { - escaped_id + if let Some(ids) = ids { + if ids.len() == 1 { + // Escape id same as references + let escaped_id = format_links(&LiNo::Ref(ids[0].clone()), false); + if less_parentheses { + escaped_id + } else { + format!("({})", escaped_id) + } } else { - format!("({})", escaped_id) + "()".to_string() } } else { "()".to_string() @@ -35,12 +39,16 @@ fn format_links(lino: &LiNo, less_parentheses: bool) -> String { .collect::>() .join(" "); - if let Some(id) = id { - let escaped_id = format_links(&LiNo::Ref(id.clone()), false); - if less_parentheses && values.len() == 1 { - format!("{}: {}", escaped_id, formatted_values) + if let Some(ids) = ids { + if ids.len() == 1 { + let escaped_id = format_links(&LiNo::Ref(ids[0].clone()), false); + if less_parentheses && values.len() == 1 { + format!("{}: {}", escaped_id, formatted_values) + } else { + format!("({}: {})", escaped_id, formatted_values) + } } else { - format!("({}: {})", escaped_id, formatted_values) + format!("({})", formatted_values) } } else if less_parentheses && values.iter().all(|v| matches!(v, LiNo::Ref(_))) { // All values are references, can skip parentheses @@ -96,8 +104,8 @@ fn quoted_references_test() { assert!(parsed.is_link()); if let LiNo::Link { values, .. } = &parsed { assert_eq!(values.len(), 1); - if let LiNo::Link { id, values } = &values[0] { - assert_eq!(id.as_deref(), Some("a")); + if let LiNo::Link { ids, values } = &values[0] { + assert_eq!(ids.as_ref().unwrap()[0], "a"); assert_eq!(values.len(), 2); } } @@ -110,8 +118,8 @@ fn quoted_references_with_spaces_test() { assert!(parsed.is_link()); if let LiNo::Link { values, .. } = &parsed { assert_eq!(values.len(), 1); - if let LiNo::Link { id, values } = &values[0] { - assert_eq!(id.as_deref(), Some("a a")); + if let LiNo::Link { ids, values } = &values[0] { + assert_eq!(ids.as_ref().unwrap()[0], "a a"); assert_eq!(values.len(), 2); } } @@ -122,8 +130,8 @@ fn parse_simple_reference() { let input = "test"; let result = parse_lino(input).unwrap(); assert!(result.is_link()); - if let LiNo::Link { id, values } = &result { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &result { + assert!(ids.is_none()); assert_eq!(values.len(), 1); if let LiNo::Ref(id) = &values[0] { assert_eq!(id, "test"); @@ -138,8 +146,8 @@ fn parse_reference_with_colon_and_values() { assert!(result.is_link()); if let LiNo::Link { values, .. } = &result { assert_eq!(values.len(), 1); - if let LiNo::Link { id, values } = &values[0] { - assert_eq!(id.as_deref(), Some("parent")); + if let LiNo::Link { ids, values } = &values[0] { + assert_eq!(ids.as_ref().unwrap()[0], "parent"); assert_eq!(values.len(), 2); } } @@ -152,8 +160,8 @@ fn parse_multiline_link() { assert!(result.is_link()); if let LiNo::Link { values, .. } = &result { assert_eq!(values.len(), 1); - if let LiNo::Link { id, values } = &values[0] { - assert_eq!(id.as_deref(), Some("parent")); + if let LiNo::Link { ids, values } = &values[0] { + assert_eq!(ids.as_ref().unwrap()[0], "parent"); assert_eq!(values.len(), 2); } } @@ -210,8 +218,8 @@ fn test_singlet_link() { let input = "(singlet)"; let result = parse_lino(input).unwrap(); assert!(result.is_link()); - if let LiNo::Link { id, values } = &result { - assert!(id.is_none()); + if let LiNo::Link { ids, values } = &result { + assert!(ids.is_none()); assert_eq!(values.len(), 1); if let LiNo::Ref(ref_id) = &values[0] { assert_eq!(ref_id, "singlet"); @@ -285,21 +293,21 @@ fn test_multiple_words_in_quotes() { fn test_simple_reference() { let result = parse_document("hello").unwrap(); assert_eq!(result.1.len(), 1); - assert_eq!(result.1[0].id, Some("hello".to_string())); + assert_eq!(result.1[0].id_string(), Some("hello".to_string())); } #[test] fn test_quoted_reference() { let result = parse_document("\"hello world\"").unwrap(); assert_eq!(result.1.len(), 1); - assert_eq!(result.1[0].id, Some("hello world".to_string())); + assert_eq!(result.1[0].id_string(), Some("hello world".to_string())); } #[test] fn test_singlet_link_parser() { let result = parse_document("(singlet)").unwrap(); assert_eq!(result.1.len(), 1); - assert_eq!(result.1[0].id, Some("singlet".to_string())); + assert_eq!(result.1[0].id_string(), Some("singlet".to_string())); assert_eq!(result.1[0].values.len(), 0); assert_eq!(result.1[0].children.len(), 0); } @@ -315,7 +323,7 @@ fn test_value_link_parser() { fn test_link_with_id() { let result = parse_document("(id: a b c)").unwrap(); assert_eq!(result.1.len(), 1); - assert_eq!(result.1[0].id, Some("id".to_string())); + assert_eq!(result.1[0].id_string(), Some("id".to_string())); assert_eq!(result.1[0].values.len(), 3); } @@ -323,7 +331,7 @@ fn test_link_with_id() { fn test_single_line_link() { let result = parse_document("id: value1 value2").unwrap(); assert_eq!(result.1.len(), 1); - assert_eq!(result.1[0].id, Some("id".to_string())); + assert_eq!(result.1[0].id_string(), Some("id".to_string())); assert_eq!(result.1[0].values.len(), 2); } @@ -336,8 +344,8 @@ fn test_parse_quoted_references_values_only() { assert_eq!(links.len(), 1); // Should have 2 values match &links[0] { - links_notation::LiNo::Link { id, values } => { - assert_eq!(id, &None); + links_notation::LiNo::Link { ids, values } => { + assert_eq!(ids, &None); assert_eq!(values.len(), 2); } _ => panic!("Expected Link"), @@ -400,8 +408,8 @@ fn test_quoted_references_with_spaces_in_link() { assert_eq!(result.len(), 1); match &result[0] { - links_notation::LiNo::Link { id, values } => { - assert_eq!(id, &Some("id".to_string())); + links_notation::LiNo::Link { ids, values } => { + assert_eq!(ids, &Some(vec!["id".to_string()])); assert_eq!(values.len(), 1); } _ => panic!("Expected Link"), @@ -417,8 +425,8 @@ fn test_quoted_references_with_special_chars() { assert_eq!(result.len(), 1); match &result[0] { - links_notation::LiNo::Link { id, values } => { - assert_eq!(id, &None); + links_notation::LiNo::Link { ids, values } => { + assert_eq!(ids, &None); assert_eq!(values.len(), 2); } _ => panic!("Expected Link"), @@ -434,8 +442,8 @@ fn test_single_line_with_id() { assert!(!result.is_empty()); match &result[0] { - links_notation::LiNo::Link { id, values } => { - assert_eq!(id, &Some("myid".to_string())); + links_notation::LiNo::Link { ids, values } => { + assert_eq!(ids, &Some(vec!["myid".to_string()])); assert_eq!(values.len(), 2); } _ => panic!("Expected Link"), @@ -451,8 +459,8 @@ fn test_single_line_without_id() { assert_eq!(result.len(), 1); match &result[0] { - links_notation::LiNo::Link { id, values } => { - assert_eq!(id, &None); + links_notation::LiNo::Link { ids, values } => { + assert_eq!(ids, &None); assert_eq!(values.len(), 2); } _ => panic!("Expected Link"),
Tests for multi-word references without quotes: + * + *