|
| 1 | + |
| 2 | + |
| 3 | + |
| 4 | +## This is not an RE. You can use simple string methods to perform actions on a string |
| 5 | +## however you're limited in the actions you can perrom |
| 6 | +>>> s = '100 NORTH MAIN ROAD' |
| 7 | +>>> s.replace('ROAD', 'RD.') ① |
| 8 | +'100 NORTH MAIN RD.' |
| 9 | + |
| 10 | +### Doing substitutions with REGEx |
| 11 | +>>> s = '100 NORTH BROAD ROAD' |
| 12 | +>>> import re ④ |
| 13 | +>>> re.sub('ROAD$', 'RD.', s) ⑤ |
| 14 | +'100 NORTH BROAD RD.' |
| 15 | + |
| 16 | + |
| 17 | +### this example displays use of a raw string. it means "do not escape characters". This is required because |
| 18 | +### in python strings, backslaches '\' must be escaped. You can avoid this in a raw string. |
| 19 | +>>> s = '100 BROAD ROAD APT. 3' |
| 20 | +>>> re.sub(r'\bROAD\b', 'RD.', s) ④ |
| 21 | +'100 BROAD RD. APT 3' |
| 22 | + |
| 23 | +#### a straight forward match |
| 24 | +>>> import re |
| 25 | +>>> pattern = '^M?M?M?$' ① |
| 26 | +>>> re.search(pattern, 'M') ② |
| 27 | + |
| 28 | +>>> pattern = '^M?M?M?(CM|CD|D?C?C?C?)$' ① |
| 29 | +>>> re.search(pattern, 'MMMCCC') ④ |
| 30 | +<_sre.SRE_Match object at 010748A8> |
| 31 | + |
| 32 | + |
| 33 | + |
| 34 | +#### Verbose Regex: This is a feature Python allows. It ignores spaces and comments. It lets you do |
| 35 | +#### something pretty like this. Notice you need to passed the 're.VERBOSE' argument to enable this feature. |
| 36 | + |
| 37 | +>>> pattern = ''' |
| 38 | + ^ # beginning of string |
| 39 | + M{0,3} # thousands - 0 to 3 Ms |
| 40 | + (CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 Cs), |
| 41 | + # or 500-800 (D, followed by 0 to 3 Cs) |
| 42 | + (XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 Xs), |
| 43 | + # or 50-80 (L, followed by 0 to 3 Xs) |
| 44 | + (IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 Is), |
| 45 | + # or 5-8 (V, followed by 0 to 3 Is) |
| 46 | + $ # end of string |
| 47 | + ''' |
| 48 | +>>> re.search(pattern, 'M', re.VERBOSE) ① |
| 49 | +<_sre.SRE_Match object at 0x008EEB48> |
| 50 | +>>> re.search(pattern, 'MCMLXXXIX', re.VERBOSE) ② |
| 51 | +<_sre.SRE_Match object at 0x008EEB48> |
| 52 | + |
| 53 | + |
| 54 | +>>> phonePattern = re.compile(r'(\d{3})\D*(\d{3})\D*(\d{4})\D*(\d*)$') ① |
| 55 | +>>> phonePattern.search('work 1-(800) 555.1212 #1234').groups() ② |
| 56 | +('800', '555', '1212', '1234') |
| 57 | + |
| 58 | + |
| 59 | +>>> phonePattern = re.compile(r''' |
| 60 | + # don't match beginning of string, number can start anywhere |
| 61 | + (\d{3}) # area code is 3 digits (e.g. '800') |
| 62 | + \D* # optional separator is any number of non-digits |
| 63 | + (\d{3}) # trunk is 3 digits (e.g. '555') |
| 64 | + \D* # optional separator |
| 65 | + (\d{4}) # rest of number is 4 digits (e.g. '1212') |
| 66 | + \D* # optional separator |
| 67 | + (\d*) # extension is optional and can be any number of digits |
| 68 | + $ # end of string |
| 69 | + ''', re.VERBOSE) |
| 70 | +>>> phonePattern.search('work 1-(800) 555.1212 #1234').groups() ① |
| 71 | +('800', '555', '1212', '1234') |
| 72 | +>>> phonePattern.search('800-555-1212') ② |
| 73 | +('800', '555', '1212', '') |
| 74 | +① Other than being spread out over multiple lines, this is exactly the same regular expression as the last step, so it’s no surprise that it parses the same inputs. |
| 75 | +② Final sanity check. Yes, this still works. You’re done. |
| 76 | + |
| 77 | + |
| 78 | + |
| 79 | + |
| 80 | +## Reference |
| 81 | +http://www.diveintopython3.net/regular-expressions.html |
0 commit comments