Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions gen-blocks.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Script to regenerate src/unicode_blocks.rs from Blocks.txt
#
# Usage awk -f gen-blocks.awk Blocks.txt > src/unicode_blocks.rs
#
# where Blocks.txt is fetched from:
# https://www.unicode.org/Public/UNIDATA/Blocks.txt

BEGIN { FS=";" }

/^# Blocks-[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.txt/ {
pos = match($0, "[[:digit:]]+\\.[[:digit:]]+\\.[[:digit:]]+");
if (pos < 1) {
printf("unable to extract version from: %s\n", $0);
exit(1);
}

version = substr($0, RSTART, RLENGTH);

print "// The dataset is from https://www.unicode.org/Public/UNIDATA/Blocks.txt"
print ""
print "use crate::UnicodeBlock;"
print ""
printf("pub const VERSION: &str = \"%s\";\n", version);

idx = 1
}

# Only process non-comment lines
/^[[:digit:]A-F]/ {
split($1, range, "\\.\\.");
start = range[1];
end = range[2];

name = $2
# trim leading spaces
if (match(name, "^ +") > 0) {
name = substr(name, RLENGTH + 1);
}

# build Rust constant name
const = name
gsub("[ -]", "_", const);
const = toupper(const);

printf("pub const %s: UnicodeBlock = UnicodeBlock {\n", const);
printf(" name: \"%s\", start: 0x%s, end: 0x%s\n", name, start, end);
print "};"

# skip blocks that end in _SURROGATES as these are not valid as Unicode
# escapes in Rust.
if (match(const, "_SURROGATES$") < 1) {
match_line = sprintf(" '\\u{%s}'..='\\u{%s}' => Some(%s),", start, end, const);
match_lines[idx++] = match_line;
}
}

END {
print ""
print "/// Given a character, determine what unicode block contains it."
print "pub fn find_unicode_block(c: char) -> Option<UnicodeBlock> {"
print " match c {"
for (i = 1; i < idx; i++) {
print match_lines[i];
}
print " _ => None,"
print " }"
print "}"
}
Loading