Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c5bb9c2

Browse files
committed
Marc-Andre Lemburg <[email protected]>:
Generator for the new ucnhash module (ucnhash.h|c). Uses perfect_hash.py to create the ucnhash module.
1 parent 93c409a commit c5bb9c2

1 file changed

Lines changed: 109 additions & 0 deletions

File tree

Tools/perfecthash/GenUCNHash.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#! /usr/bin/env python
2+
import sys
3+
import string
4+
import perfect_hash
5+
6+
# This is a user of perfect_hash.py
7+
# that takes as input the UnicodeData.txt file available from:
8+
# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
9+
10+
# It generates a hash table from Unicode Character Name ->
11+
# unicode code space value.
12+
13+
# These variables determine which hash function is tried first.
14+
# Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/
15+
f1Seed = 1694245428
16+
f2Seed = -1917331657
17+
18+
# Maximum allowed multipler, if this isn't None then instead of continually
19+
# increasing C, it resets it back to initC to keep searching for
20+
# a solution.
21+
minC = 1.7875
22+
# Initial multiplier for trying to find a perfect hash function.
23+
initC = 1.7875
24+
25+
moduleName = "ucnhash"
26+
dataArrayName = "aucn"
27+
dataArrayType = "_Py_UnicodeCharacterName"
28+
headerFileName = "ucnhash.h"
29+
cFileName = "ucnhash.c"
30+
structName = "_Py_UCNHashAPI"
31+
32+
keys = []
33+
hashData = {}
34+
35+
def generateOutputFiles(perfHash, hashData):
36+
header = perfHash.generate_header(structName)
37+
header = header + """
38+
typedef struct
39+
{
40+
const char *pszUCN;
41+
unsigned int uiValue;
42+
} _Py_UnicodeCharacterName;
43+
44+
"""
45+
46+
code = perfHash.generate_code(moduleName,
47+
dataArrayName,
48+
dataArrayType,
49+
structName)
50+
out = open(headerFileName, "w")
51+
out.write(header)
52+
out = open(cFileName, "w")
53+
out.write("#include <%s>\n" % headerFileName)
54+
out.write(code)
55+
perfHash.generate_graph(out)
56+
out.write("""
57+
58+
static const _Py_UnicodeCharacterName aucn[] =
59+
{
60+
""")
61+
for i in xrange(len(keys)):
62+
v = hashData[keys[i][0]]
63+
out.write(' { "' + keys[i][0] + '", ' + hex(v) + " }," + "\n")
64+
out.write("};\n\n")
65+
sys.stderr.write('\nGenerated output files: \n')
66+
sys.stderr.write('%s\n%s\n' % (headerFileName, cFileName))
67+
68+
def main():
69+
# Suck in UnicodeData.txt and spit out the generated files.
70+
input = open(sys.argv[1], 'r')
71+
i = 0
72+
while 1:
73+
line = input.readline()
74+
if line == "": break
75+
fields = string.split(line, ';')
76+
if len(fields) < 2:
77+
sys.stderr.write('Ill-formated line!\n')
78+
sys.stderr.write('line #: %d\n' % (i + 1))
79+
sys.exit()
80+
data, key = fields[:2]
81+
key = string.strip( key )
82+
# Any name starting with '<' is a control, or start/end character,
83+
# so skip it...
84+
if key[0] == "<":
85+
continue
86+
hashcode = i
87+
i = i + 1
88+
# force the name to uppercase
89+
keys.append( (string.upper(key),hashcode) )
90+
data = string.atoi(data, 16)
91+
hashData[key] = data
92+
93+
input.close()
94+
sys.stderr.write('%i key/hash pairs read\n' % len(keys) )
95+
perfHash = perfect_hash.generate_hash(keys, 1,
96+
minC, initC,
97+
f1Seed, f2Seed,
98+
# increment, tries
99+
0.0025, 50)
100+
generateOutputFiles(perfHash, hashData)
101+
102+
if __name__ == '__main__':
103+
if len(sys.argv) == 1:
104+
sys.stdout = sys.stderr
105+
print 'Usage: %s <input filename>' % sys.argv[0]
106+
print ' The input file needs to be UnicodeData.txt'
107+
sys.exit()
108+
main()
109+

0 commit comments

Comments
 (0)