1
+ # import csv
2
+ # import pandas as pd
3
+
4
+
5
+ # csv.reader("lba_model2/RemoteChoice_001a.txt")
6
+ # pd.read_csv("lba_model2/RemoteChoice_001a.txt")
7
+
8
+ # data = []
9
+ # drt = []
10
+
11
+ # try:
12
+ # with open('lba_model2/RemoteChoice_001a.txt', 'rb') as f:
13
+ # reader = csv.reader(f)
14
+ # for row in reader:
15
+ # data.append(row)
16
+ # print data[0:1]
17
+ # for line in data:
18
+ # if line[0:1] == 'DRT':
19
+ # drt.append(line)
20
+ # print len(drt)
21
+ # finally:
22
+ # print "done"
23
+
24
+ """ fields to it and finally write to CSV. Here's Python 3.x solution (I think Python 2.7+ should suffice):"""
1
25
import csv
2
- import pandas as pd
26
+ import re
27
+
28
+
29
+ def read_general ("lba_model2/RemoteChoice_" ):
30
+ # Read general info to dict with 'PR 123'-like keys
31
+
32
+ # Gerexp that will split row into ready-to-use dict
33
+ re_name = re .compile (r'''
34
+ (?P<Name>.+)
35
+ \ --\ # Separator + space
36
+ (?P<Division>.+)
37
+ \ # Space
38
+ \(
39
+ (?P<Division_Abbreviation>.*)
40
+ \)
41
+ \ # Space
42
+ (?P<Id>\d+)
43
+ \ # Space
44
+ \[Age:\ # Space at the end
45
+ (?P<Age>\d+)
46
+ \]
47
+ ''' , re .X )
48
+
49
+ general = {}
50
+
51
+ with open (fname , 'rt' ) as f :
52
+ for line in f :
53
+ line = line .strip ()
54
+ m = re_name .match (line )
55
+
56
+ if m :
57
+ # Name line, start new man
58
+ man = m .groupdict ()
59
+ key = '%s %s' % (m .group ('Division_Abbreviation' ), m .group ('Id' ))
60
+ general [key ] = man
61
+
62
+ elif line :
63
+ # Non empty lines
64
+ # Add values to dict
65
+ key , value = line .split (': ' , 1 )
66
+ man [key ] = value
67
+
68
+ return general
69
+
70
+
71
+ def add_bool_criteria (fname , field , general ):
72
+ # Append a field with YES/NO value
73
+
74
+ with open (fname , 'rt' ) as f :
75
+ yes_keys = set ()
76
+
77
+ # Phase one, gather all keys
78
+ for line in f :
79
+ line = line .strip ()
80
+ _ , keys = line .split (': ' , 1 )
81
+
82
+ yes_keys .update (keys .split (', ' ))
83
+
84
+ # Fill data
85
+ for key , man in general .items (): # iteritems() will be faster in Python 2.x
86
+ man [field ] = 'YES' if key in yes_keys else 'NO'
87
+
88
+
89
+ def save_csv (fname , general ):
90
+ with open (fname , 'wt' ) as f :
91
+ # Gather field names
92
+ all_fields = set ()
93
+ for value in general .values ():
94
+ all_fields .update (value .keys ())
95
+
96
+ # Write to csv
97
+ w = csv .DictWriter (f , all_fields )
98
+ w .writeheader ()
99
+ w .writerows (general .values ())
100
+
101
+
102
+ def main ():
103
+ general = read_general ('general.txt' )
104
+ add_bool_criteria ('cars.txt' , 'Car?' , general )
105
+ add_bool_criteria ('house.txt' , 'House?' , general )
106
+ from pprint import pprint
107
+ pprint (general )
108
+ save_csv ('result.csv' , general )
109
+
3
110
4
- pd .read_csv ()
111
+ if __name__ == '__main__' :
112
+ main ()
0 commit comments