33"""world -- Print mappings between country names and DNS country codes.
44
55Contact: Barry Warsaw
6- Email: bwarsaw @python.org
6+ Email: barry @python.org
77Version: %(__version__)s
88
99This script will take a list of Internet addresses and print out where in the
@@ -14,9 +14,9 @@ code found in the address. Addresses can be in any of the following forms:
1414 host.domain.xx -- any Internet host or network name
1515 [email protected] -- an Internet email address 1616
17- If no match is found, the address is interpreted as a regular expression [*]
18- and a reverse lookup is attempted. This script will search the country names
19- and print a list of matching entries. You can force reverse mappings with the
17+ If no match is found, the address is interpreted as a regular expression and a
18+ reverse lookup is attempted. This script will search the country names and
19+ print a list of matching entries. You can force reverse mappings with the
2020`-r' flag (see below).
2121
2222For example:
@@ -34,10 +34,6 @@ For example:
3434 tz: Tanzania, United Republic of
3535 gb: United Kingdom
3636
37-
38- [*] Note that regular expressions must conform to Python 1.5's re.py module
39- syntax. The comparison is done with the search() method.
40-
4137Country codes are maintained by the RIPE Network Coordination Centre,
4238in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
4339authoritative source of country code mappings is:
@@ -69,7 +65,7 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
6965 When used in conjunction with the `-p' option, output is in the form
7066 of a Python dictionary, and country names are normalized
7167 w.r.t. capitalization. This makes it appropriate for cutting and
72- pasting back into this file.
68+ pasting back into this file. Output is always to standard out.
7369
7470 --reverse
7571 -r
@@ -82,18 +78,13 @@ Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
8278 -h
8379 --help
8480 Print this message.
85-
8681"""
8782__version__ = '$Revision$'
8883
8984
9085import sys
9186import getopt
92- try :
93- import re
94- except ImportError :
95- print sys .argv [0 ], 'requires Python 1.5'
96- sys .exit (1 )
87+ import re
9788
9889PROGRAM = sys .argv [0 ]
9990
@@ -110,105 +101,102 @@ def usage(code, msg=''):
110101def resolve (rawaddr ):
111102 parts = rawaddr .split ('.' )
112103 if not len (parts ):
113- # no top level domain found, bounce it to the next step
114- return rawaddr
104+ # no top level domain found, bounce it to the next step
105+ return rawaddr
115106 addr = parts [- 1 ]
116107 if nameorgs .has_key (addr ):
117- if nameorgs [addr ][0 ].lower () in 'aeiou' :
118- ana = 'an'
119- else :
120- ana = 'a'
121- print rawaddr , 'is from' , ana , nameorgs [addr ], 'organization'
122- return None
108+ print rawaddr , 'is in the' , nameorgs [addr ], 'top level domain'
109+ return None
123110 elif countries .has_key (addr ):
124- print rawaddr , 'originated from' , countries [addr ]
125- return None
111+ print rawaddr , 'originated from' , countries [addr ]
112+ return None
126113 else :
127- # Not resolved, bounce it to the next step
128- return rawaddr
114+ # Not resolved, bounce it to the next step
115+ return rawaddr
129116
130117
131118
132119def reverse (regexp ):
133120 matches = []
134121 cre = re .compile (regexp , re .IGNORECASE )
135122 for code , country in all .items ():
136- mo = cre .search (country )
137- if mo :
138- matches .append (code )
123+ mo = cre .search (country )
124+ if mo :
125+ matches .append (code )
139126 # print results
140127 if not matches :
141- # not resolved, bounce it to the next step
142- return regexp
128+ # not resolved, bounce it to the next step
129+ return regexp
143130 if len (matches ) == 1 :
144- code = matches [0 ]
145- print regexp , "matches code `%s', %s" % (code , all [code ])
131+ code = matches [0 ]
132+ print regexp , "matches code `%s', %s" % (code , all [code ])
146133 else :
147- print regexp , 'matches %d countries:' % len (matches )
148- for code in matches :
149- print " %s: %s" % (code , all [code ])
134+ print regexp , 'matches %d countries:' % len (matches )
135+ for code in matches :
136+ print " %s: %s" % (code , all [code ])
150137 return None
151138
152139
153140
154141def parse (file , normalize ):
155142 try :
156- fp = open (file )
143+ fp = open (file )
157144 except IOError , (err , msg ):
158- print msg , ':' , file
145+ print msg , ':' , file
159146
160147 cre = re .compile ('(.*?)[ \t ]+([A-Z]{2})[ \t ]+[A-Z]{3}[ \t ]+[0-9]{3}' )
161148 scanning = 0
162149
163150 if normalize :
164- print 'country = {'
151+ print 'countries = {'
165152
166153 while 1 :
167- line = fp .readline ()
168- if line == '' :
169- break # EOF
170- if scanning :
171- mo = cre .match (line )
172- if not mo :
173- line = line .strip ()
174- if not line :
175- continue
176- elif line [0 ] == '-' :
177- break
178- else :
179- print 'Could not parse line:' , line
180- continue
181- country , code = mo .group (1 , 2 )
182- if normalize :
183- words = country .split ()
184- for i in range (len (words )):
185- w = words [i ]
186- # XXX special cases
187- if w in ('AND' , 'OF' , 'OF)' , 'name:' , 'METROPOLITAN' ):
188- words [i ] = w .lower ()
189- elif w == 'THE' and i != 1 :
190- words [i ] = w .lower ()
191- elif len (w ) > 3 and w [1 ] == "'" :
192- words [i ] = w [0 :3 ].upper () + w [3 :].lower ()
193- elif w == '(U.S.)' :
194- pass
195- elif w [0 ] == '(' and w != '(local' :
196- words [i ] = '(' + w [1 :].capitalize ()
197- elif w .find ('-' ) != - 1 :
198- words [i ] = '-' .join ([s .capitalize () for s in w .split ('-' )])
199- else :
200- words [i ] = w .capitalize ()
201- code = code .lower ()
202- country = ' ' .join (words )
203- print ' "%s": "%s",' % (code , country )
204- else :
205- print code , country
206-
207- elif line [0 ] == '-' :
208- scanning = 1
154+ line = fp .readline ()
155+ if line == '' :
156+ break # EOF
157+ if scanning :
158+ mo = cre .match (line )
159+ if not mo :
160+ line = line .strip ()
161+ if not line :
162+ continue
163+ elif line [0 ] == '-' :
164+ break
165+ else :
166+ print 'Could not parse line:' , line
167+ continue
168+ country , code = mo .group (1 , 2 )
169+ if normalize :
170+ words = country .split ()
171+ for i in range (len (words )):
172+ w = words [i ]
173+ # XXX special cases
174+ if w in ('AND' , 'OF' , 'OF)' , 'name:' , 'METROPOLITAN' ):
175+ words [i ] = w .lower ()
176+ elif w == 'THE' and i <> 1 :
177+ words [i ] = w .lower ()
178+ elif len (w ) > 3 and w [1 ] == "'" :
179+ words [i ] = w [0 :3 ].upper () + w [3 :].lower ()
180+ elif w in ('(U.S.)' , 'U.S.' ):
181+ pass
182+ elif w [0 ] == '(' and w <> '(local' :
183+ words [i ] = '(' + w [1 :].capitalize ()
184+ elif w .find ('-' ) <> - 1 :
185+ words [i ] = '-' .join (
186+ [s .capitalize () for s in w .split ('-' )])
187+ else :
188+ words [i ] = w .capitalize ()
189+ code = code .lower ()
190+ country = ' ' .join (words )
191+ print ' "%s": "%s",' % (code , country )
192+ else :
193+ print code , country
194+
195+ elif line [0 ] == '-' :
196+ scanning = 1
209197
210198 if normalize :
211- print ' }'
199+ print ' }'
212200
213201
214202def main ():
@@ -228,53 +216,62 @@ def main():
228216 usage (1 , msg )
229217
230218 for opt , arg in opts :
231- if opt in ('-h' , '--help' ):
232- help = 1
233- elif opt in ('-d' , '--dump' ):
234- dump = 1
235- elif opt in ('-p' , '--parse' ):
236- parsefile = arg
237- elif opt in ('-o' , '--output ' ):
238- normalize = 1
239- elif opt in ('-r' , '--reverse' ):
240- forcerev = 1
219+ if opt in ('-h' , '--help' ):
220+ help = 1
221+ elif opt in ('-d' , '--dump' ):
222+ dump = 1
223+ elif opt in ('-p' , '--parse' ):
224+ parsefile = arg
225+ elif opt in ('-o' , '--outputdict ' ):
226+ normalize = 1
227+ elif opt in ('-r' , '--reverse' ):
228+ forcerev = 1
241229
242230 if help :
243- usage (status )
231+ usage (status )
244232
245233 if dump :
246- print 'Non-geographic domains:'
247- codes = nameorgs .keys ()
248- codes .sort ()
249- for code in codes :
250- print ' %4s:' % code , nameorgs [code ]
251-
252- print '\n Country coded domains:'
253- codes = countries .keys ()
254- codes .sort ()
255- for code in codes :
256- print ' %2s:' % code , countries [code ]
234+ print 'Non-geographic domains:'
235+ codes = nameorgs .keys ()
236+ codes .sort ()
237+ for code in codes :
238+ print ' %4s:' % code , nameorgs [code ]
239+
240+ print '\n Country coded domains:'
241+ codes = countries .keys ()
242+ codes .sort ()
243+ for code in codes :
244+ print ' %2s:' % code , countries [code ]
257245 elif parsefile :
258- parse (parsefile , normalize )
246+ parse (parsefile , normalize )
259247 else :
260- if not forcerev :
261- args = filter (None , map (resolve , args ))
262- args = filter (None , map (reverse , args ))
263- for arg in args :
264- print 'Where in the world is %s?' % arg
248+ if not forcerev :
249+ args = filter (None , map (resolve , args ))
250+ args = filter (None , map (reverse , args ))
251+ for arg in args :
252+ print 'Where in the world is %s?' % arg
265253
266254
267255
268256# The mappings
269257nameorgs = {
258+ # New top level domains as described by ICANN
259+ # http://www.icann.org/tlds/
260+ "aero" : "air-transport industry" ,
270261 "arpa" : "Arpanet" ,
262+ "biz" : "business" ,
271263 "com" : "commercial" ,
264+ "coop" : "cooperatives" ,
272265 "edu" : "educational" ,
273266 "gov" : "government" ,
267+ "info" : "unrestricted `info'" ,
268+ "int" : "international" ,
274269 "mil" : "military" ,
270+ "museum" : "museums" ,
271+ "name" : "`name' (for registration by individuals)" ,
275272 "net" : "networking" ,
276273 "org" : "non-commercial" ,
277- "int " : "international " ,
274+ "pro " : "professionals " ,
278275 # This isn't in the same class as those above, but is included here
279276 # because `uk' is the common practice country code for the United Kingdom.
280277 # AFAICT, the official `gb' code is routinely ignored!
@@ -525,7 +522,7 @@ countries = {
525522 "ve" : "Venezuela" ,
526523 "vn" : "Viet Nam" ,
527524 "vg" : "Virgin Islands, British" ,
528- "vi" : "Virgin Islands, U.s ." ,
525+ "vi" : "Virgin Islands, U.S ." ,
529526 "wf" : "Wallis and Futuna" ,
530527 "eh" : "Western Sahara" ,
531528 "ye" : "Yemen" ,
0 commit comments