From 0f0f19326e2dacf9abb12f755b17348205142a01 Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Fri, 13 Jun 2025 16:46:32 -0700 Subject: [PATCH 1/2] Updated Unikemet regular expressions; however, RegEx for kEH_FVal doesn't validate against current data, so hack for now. --- .../data/ucdxml/dev/ucd.nounihan.grouped.xml | 30 +++++++++---------- .../org/unicode/xml/AttributeResolver.java | 1 + .../org/unicode/props/IndexPropertyRegex.txt | 13 ++++---- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/unicodetools/data/ucdxml/dev/ucd.nounihan.grouped.xml b/unicodetools/data/ucdxml/dev/ucd.nounihan.grouped.xml index e7e4a03a7..d68533433 100644 --- a/unicodetools/data/ucdxml/dev/ucd.nounihan.grouped.xml +++ b/unicodetools/data/ucdxml/dev/ucd.nounihan.grouped.xml @@ -425,7 +425,7 @@ - + @@ -36420,7 +36420,7 @@ - + @@ -37315,7 +37315,7 @@ - + @@ -37455,7 +37455,7 @@ - + @@ -38351,7 +38351,7 @@ - + @@ -38792,7 +38792,7 @@ - + @@ -38837,7 +38837,7 @@ - + @@ -39164,7 +39164,7 @@ - + @@ -39406,7 +39406,7 @@ - + @@ -39609,7 +39609,7 @@ - + @@ -40100,7 +40100,7 @@ - + @@ -40527,7 +40527,7 @@ - + @@ -40710,7 +40710,7 @@ - + @@ -40833,7 +40833,7 @@ - + @@ -41091,7 +41091,7 @@ - + diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index 95a25a6b5..04ed5348f 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -212,6 +212,7 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { } return Optional.ofNullable(resolvedValue).orElse(""); case kDefinition: + case kEH_FVal: return resolvedValue; default: if (resolvedValue != null) { diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt index abf2c2356..9809bce10 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt @@ -216,14 +216,15 @@ Names_List_Cross_Ref ; MULTI_VALUED ; .* # Regex patterns from UAX #57 -kEH_Cat ; SINGLE_VALUED ; ([A-IK-Z]|AA)-[0-9]{2}-[0-9]{3} +kEH_Cat ; SINGLE_VALUED ; ([A-IK-Z]|AA)-\d{2}-\d{3} kEH_Desc ; SINGLE_VALUED ; [^\t"]+ kEH_Func ; MULTI_VALUED ; [^\t"]+ -kEH_FVal ; MULTI_VALUED ; [\x{A723}\x{A7BD}y\x{A725}wbpfmnrh\x{1E25}\x{1E2B}\x{1E96}s\x{0161}\x{1E33}kgt\x{1E6F}d\x{1E0F}./|\-;=\(\)\s]+ -kEH_HG ; MULTI_VALUED ; ([A-IK-Z]|AA)[0-9]{1,3}[A-Za-z]{0,2} -kEH_IFAO ; MULTI_VALUED ; [0-9]{1,3},[0-9]{1,2} -kEH_JSesh ; MULTI_VALUED ; ([A-IK-Z]|Aa|NL|NU|Ff)[0-9]{1,3}[A-Za-z]{0,5}|(US1|US22|US248|US685)([A-IK-Z]|Aa|NL|NU)[0-9]{1,3}[A-Za-z]{0,5} -kEH_UniK ; SINGLE_VALUED ; ([A-IK-Z]|AA|NL|NU)[0-9]{3}[A-Z]{0,2}|HJ ([A-IK-Z]|AA)[0-9]{3}[A-Z]{0,2} +# Revisit once the spec settles down. +kEH_FVal ; SINGLE_VALUED ; [^\t"]+ +kEH_HG ; MULTI_VALUED ; ([A-IK-Z]|AA)\d{1,3}[A-Za-z]{0,2} +kEH_IFAO ; MULTI_VALUED ; \d{1,3},\d{1,2}[ab]? +kEH_JSesh ; MULTI_VALUED ; ([A-IK-Z]|Aa|NL|NU|Ff)\d{1,3}[A-Za-z]{0,5}|(US1|US22|US248|US685)([A-IK-Z]|Aa|NL|NU)\d{1,3}[A-Za-z]{0,5} +kEH_UniK ; SINGLE_VALUED ; ([A-IK-Z]|AA|NL|NU)\d{3}[A-Z]{0,2}|HJ ([A-IK-Z]|AA)\d{3}[A-Z]{0,2} # ============================= # Catalog/Enum/Binary Properties From dee0f388203a5961b77bf60b7fd5421861e011fe Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Fri, 13 Jun 2025 17:15:58 -0700 Subject: [PATCH 2/2] Generate enums + spotless --- .../src/main/java/org/unicode/props/UcdProperty.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java index 687c2ab5c..53606a32d 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java @@ -217,12 +217,7 @@ public enum UcdProperty { kEACC(PropertyType.Miscellaneous, DerivedPropertyStatus.Provisional, "cjkEACC"), kEH_Cat(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "kEH_Cat"), kEH_Desc(PropertyType.Miscellaneous, DerivedPropertyStatus.Approved, "kEH_Desc"), - kEH_FVal( - PropertyType.Miscellaneous, - DerivedPropertyStatus.Provisional, - null, - ValueCardinality.Unordered, - "kEH_FVal"), + kEH_FVal(PropertyType.Miscellaneous, DerivedPropertyStatus.Provisional, "kEH_FVal"), kEH_Func( PropertyType.Miscellaneous, DerivedPropertyStatus.Provisional,