Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f2c4711

Browse files
author
Skip Montanaro
committed
Search for Unicode character names using regular expressions.
1 parent decc6a4 commit f2c4711

1 file changed

Lines changed: 40 additions & 0 deletions

File tree

Demo/scripts/find-uname.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
For each argument on the command line, look for it in the set of all Unicode
5+
names. Arguments are treated as case-insensitive regular expressions, e.g.:
6+
7+
% find-uname 'small letter a$' 'horizontal line'
8+
*** small letter a$ matches ***
9+
LATIN SMALL LETTER A (97)
10+
COMBINING LATIN SMALL LETTER A (867)
11+
CYRILLIC SMALL LETTER A (1072)
12+
PARENTHESIZED LATIN SMALL LETTER A (9372)
13+
CIRCLED LATIN SMALL LETTER A (9424)
14+
FULLWIDTH LATIN SMALL LETTER A (65345)
15+
*** horizontal line matches ***
16+
HORIZONTAL LINE EXTENSION (9135)
17+
"""
18+
19+
import unicodedata
20+
import sys
21+
import re
22+
23+
def main(args):
24+
unicode_names= []
25+
for ix in range(sys.maxunicode+1):
26+
try:
27+
unicode_names.append( (ix, unicodedata.name(unichr(ix))) )
28+
except ValueError: # no name for the character
29+
pass
30+
for arg in args:
31+
pat = re.compile(arg, re.I)
32+
matches = [(x,y) for (x,y) in unicode_names
33+
if pat.search(y) is not None]
34+
if matches:
35+
print "***", arg, "matches", "***"
36+
for (x,y) in matches:
37+
print "%s (%d)" % (y,x)
38+
39+
if __name__ == "__main__":
40+
main(sys.argv[1:])

0 commit comments

Comments
 (0)