Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit aef8371

Browse files
committed
Added the 7 new top level domains, and reworded the nameorgs output.
Not sure this is better in all cases. parse(): Fixed a bug in the output; the dict is referred to in the code as `countries' not `country'. Also added no-case-fold for the string "U.S." since the Virgin Islands name no longer wraps those in parentheses. main(): Fixed the argument parsing to agree with the docstring, i.e. --outputdict instead of --output. In the module docstring: - updated my email address - we don't need to explain about Python 1.5 regexps <wink> We also don't need to wrap the import of re with a try/except. Other style fixes: - untabification - revert back to <> style everywhere (and consistently)
1 parent 9e9d4f8 commit aef8371

1 file changed

Lines changed: 110 additions & 113 deletions

File tree

Tools/world/world

Lines changed: 110 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""world -- Print mappings between country names and DNS country codes.
44
55
Contact: Barry Warsaw
6-
Email: bwarsaw@python.org
6+
Email: barry@python.org
77
Version: %(__version__)s
88
99
This script will take a list of Internet addresses and print out where in the
@@ -14,9 +14,9 @@ code found in the address. Addresses can be in any of the following forms:
1414
host.domain.xx -- any Internet host or network name
1515
[email protected] -- an Internet email address
1616
17-
If no match is found, the address is interpreted as a regular expression [*]
18-
and a reverse lookup is attempted. This script will search the country names
19-
and print a list of matching entries. You can force reverse mappings with the
17+
If no match is found, the address is interpreted as a regular expression and a
18+
reverse lookup is attempted. This script will search the country names and
19+
print a list of matching entries. You can force reverse mappings with the
2020
`-r' flag (see below).
2121
2222
For example:
@@ -34,10 +34,6 @@ For example:
3434
tz: Tanzania, United Republic of
3535
gb: United Kingdom
3636
37-
38-
[*] Note that regular expressions must conform to Python 1.5's re.py module
39-
syntax. The comparison is done with the search() method.
40-
4137
Country codes are maintained by the RIPE Network Coordination Centre,
4238
in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
4339
authoritative source of country code mappings is:
@@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
6965
When used in conjunction with the `-p' option, output is in the form
7066
of a Python dictionary, and country names are normalized
7167
w.r.t. capitalization. This makes it appropriate for cutting and
72-
pasting back into this file.
68+
pasting back into this file. Output is always to standard out.
7369
7470
--reverse
7571
-r
@@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
8278
-h
8379
--help
8480
Print this message.
85-
8681
"""
8782
__version__ = '$Revision$'
8883

8984

9085
import sys
9186
import getopt
92-
try:
93-
import re
94-
except ImportError:
95-
print sys.argv[0], 'requires Python 1.5'
96-
sys.exit(1)
87+
import re
9788

9889
PROGRAM = sys.argv[0]
9990

@@ -110,105 +101,102 @@ def usage(code, msg=''):
110101
def resolve(rawaddr):
111102
parts = rawaddr.split('.')
112103
if not len(parts):
113-
# no top level domain found, bounce it to the next step
114-
return rawaddr
104+
# no top level domain found, bounce it to the next step
105+
return rawaddr
115106
addr = parts[-1]
116107
if nameorgs.has_key(addr):
117-
if nameorgs[addr][0].lower() in 'aeiou':
118-
ana = 'an'
119-
else:
120-
ana = 'a'
121-
print rawaddr, 'is from', ana, nameorgs[addr], 'organization'
122-
return None
108+
print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
109+
return None
123110
elif countries.has_key(addr):
124-
print rawaddr, 'originated from', countries[addr]
125-
return None
111+
print rawaddr, 'originated from', countries[addr]
112+
return None
126113
else:
127-
# Not resolved, bounce it to the next step
128-
return rawaddr
114+
# Not resolved, bounce it to the next step
115+
return rawaddr
129116

130117

131118

132119
def reverse(regexp):
133120
matches = []
134121
cre = re.compile(regexp, re.IGNORECASE)
135122
for code, country in all.items():
136-
mo = cre.search(country)
137-
if mo:
138-
matches.append(code)
123+
mo = cre.search(country)
124+
if mo:
125+
matches.append(code)
139126
# print results
140127
if not matches:
141-
# not resolved, bounce it to the next step
142-
return regexp
128+
# not resolved, bounce it to the next step
129+
return regexp
143130
if len(matches) == 1:
144-
code = matches[0]
145-
print regexp, "matches code `%s', %s" % (code, all[code])
131+
code = matches[0]
132+
print regexp, "matches code `%s', %s" % (code, all[code])
146133
else:
147-
print regexp, 'matches %d countries:' % len(matches)
148-
for code in matches:
149-
print " %s: %s" % (code, all[code])
134+
print regexp, 'matches %d countries:' % len(matches)
135+
for code in matches:
136+
print " %s: %s" % (code, all[code])
150137
return None
151138

152139

153140

154141
def parse(file, normalize):
155142
try:
156-
fp = open(file)
143+
fp = open(file)
157144
except IOError, (err, msg):
158-
print msg, ':', file
145+
print msg, ':', file
159146

160147
cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
161148
scanning = 0
162149

163150
if normalize:
164-
print 'country = {'
151+
print 'countries = {'
165152

166153
while 1:
167-
line = fp.readline()
168-
if line == '':
169-
break # EOF
170-
if scanning:
171-
mo = cre.match(line)
172-
if not mo:
173-
line = line.strip()
174-
if not line:
175-
continue
176-
elif line[0] == '-':
177-
break
178-
else:
179-
print 'Could not parse line:', line
180-
continue
181-
country, code = mo.group(1, 2)
182-
if normalize:
183-
words = country.split()
184-
for i in range(len(words)):
185-
w = words[i]
186-
# XXX special cases
187-
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
188-
words[i] = w.lower()
189-
elif w == 'THE' and i != 1:
190-
words[i] = w.lower()
191-
elif len(w) > 3 and w[1] == "'":
192-
words[i] = w[0:3].upper() + w[3:].lower()
193-
elif w == '(U.S.)':
194-
pass
195-
elif w[0] == '(' and w != '(local':
196-
words[i] = '(' + w[1:].capitalize()
197-
elif w.find('-') != -1:
198-
words[i] = '-'.join([s.capitalize() for s in w.split('-')])
199-
else:
200-
words[i] = w.capitalize()
201-
code = code.lower()
202-
country = ' '.join(words)
203-
print ' "%s": "%s",' % (code, country)
204-
else:
205-
print code, country
206-
207-
elif line[0] == '-':
208-
scanning = 1
154+
line = fp.readline()
155+
if line == '':
156+
break # EOF
157+
if scanning:
158+
mo = cre.match(line)
159+
if not mo:
160+
line = line.strip()
161+
if not line:
162+
continue
163+
elif line[0] == '-':
164+
break
165+
else:
166+
print 'Could not parse line:', line
167+
continue
168+
country, code = mo.group(1, 2)
169+
if normalize:
170+
words = country.split()
171+
for i in range(len(words)):
172+
w = words[i]
173+
# XXX special cases
174+
if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
175+
words[i] = w.lower()
176+
elif w == 'THE' and i <> 1:
177+
words[i] = w.lower()
178+
elif len(w) > 3 and w[1] == "'":
179+
words[i] = w[0:3].upper() + w[3:].lower()
180+
elif w in ('(U.S.)', 'U.S.'):
181+
pass
182+
elif w[0] == '(' and w <> '(local':
183+
words[i] = '(' + w[1:].capitalize()
184+
elif w.find('-') <> -1:
185+
words[i] = '-'.join(
186+
[s.capitalize() for s in w.split('-')])
187+
else:
188+
words[i] = w.capitalize()
189+
code = code.lower()
190+
country = ' '.join(words)
191+
print ' "%s": "%s",' % (code, country)
192+
else:
193+
print code, country
194+
195+
elif line[0] == '-':
196+
scanning = 1
209197

210198
if normalize:
211-
print ' }'
199+
print ' }'
212200

213201

214202
def main():
@@ -228,53 +216,62 @@ def main():
228216
usage(1, msg)
229217

230218
for opt, arg in opts:
231-
if opt in ('-h', '--help'):
232-
help = 1
233-
elif opt in ('-d', '--dump'):
234-
dump = 1
235-
elif opt in ('-p', '--parse'):
236-
parsefile = arg
237-
elif opt in ('-o', '--output'):
238-
normalize = 1
239-
elif opt in ('-r', '--reverse'):
240-
forcerev = 1
219+
if opt in ('-h', '--help'):
220+
help = 1
221+
elif opt in ('-d', '--dump'):
222+
dump = 1
223+
elif opt in ('-p', '--parse'):
224+
parsefile = arg
225+
elif opt in ('-o', '--outputdict'):
226+
normalize = 1
227+
elif opt in ('-r', '--reverse'):
228+
forcerev = 1
241229

242230
if help:
243-
usage(status)
231+
usage(status)
244232

245233
if dump:
246-
print 'Non-geographic domains:'
247-
codes = nameorgs.keys()
248-
codes.sort()
249-
for code in codes:
250-
print ' %4s:' % code, nameorgs[code]
251-
252-
print '\nCountry coded domains:'
253-
codes = countries.keys()
254-
codes.sort()
255-
for code in codes:
256-
print ' %2s:' % code, countries[code]
234+
print 'Non-geographic domains:'
235+
codes = nameorgs.keys()
236+
codes.sort()
237+
for code in codes:
238+
print ' %4s:' % code, nameorgs[code]
239+
240+
print '\nCountry coded domains:'
241+
codes = countries.keys()
242+
codes.sort()
243+
for code in codes:
244+
print ' %2s:' % code, countries[code]
257245
elif parsefile:
258-
parse(parsefile, normalize)
246+
parse(parsefile, normalize)
259247
else:
260-
if not forcerev:
261-
args = filter(None, map(resolve, args))
262-
args = filter(None, map(reverse, args))
263-
for arg in args:
264-
print 'Where in the world is %s?' % arg
248+
if not forcerev:
249+
args = filter(None, map(resolve, args))
250+
args = filter(None, map(reverse, args))
251+
for arg in args:
252+
print 'Where in the world is %s?' % arg
265253

266254

267255

268256
# The mappings
269257
nameorgs = {
258+
# New top level domains as described by ICANN
259+
# http://www.icann.org/tlds/
260+
"aero": "air-transport industry",
270261
"arpa": "Arpanet",
262+
"biz": "business",
271263
"com": "commercial",
264+
"coop": "cooperatives",
272265
"edu": "educational",
273266
"gov": "government",
267+
"info": "unrestricted `info'",
268+
"int": "international",
274269
"mil": "military",
270+
"museum": "museums",
271+
"name": "`name' (for registration by individuals)",
275272
"net": "networking",
276273
"org": "non-commercial",
277-
"int": "international",
274+
"pro": "professionals",
278275
# This isn't in the same class as those above, but is included here
279276
# because `uk' is the common practice country code for the United Kingdom.
280277
# AFAICT, the official `gb' code is routinely ignored!
@@ -525,7 +522,7 @@ countries = {
525522
"ve": "Venezuela",
526523
"vn": "Viet Nam",
527524
"vg": "Virgin Islands, British",
528-
"vi": "Virgin Islands, U.s.",
525+
"vi": "Virgin Islands, U.S.",
529526
"wf": "Wallis and Futuna",
530527
"eh": "Western Sahara",
531528
"ye": "Yemen",

0 commit comments

Comments
 (0)