Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ebc5d50

Browse files
committed
Added data for parsing to python_projects. Git is trying to make lba_model2 its own repository.
1 parent ba89c76 commit ebc5d50

File tree

1 file changed

+110
-2
lines changed

1 file changed

+110
-2
lines changed

lba_model_parse.py

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,112 @@
1+
# import csv
2+
# import pandas as pd
3+
4+
5+
# csv.reader("lba_model2/RemoteChoice_001a.txt")
6+
# pd.read_csv("lba_model2/RemoteChoice_001a.txt")
7+
8+
# data = []
9+
# drt = []
10+
11+
# try:
12+
# with open('lba_model2/RemoteChoice_001a.txt', 'rb') as f:
13+
# reader = csv.reader(f)
14+
# for row in reader:
15+
# data.append(row)
16+
# print data[0:1]
17+
# for line in data:
18+
# if line[0:1] == 'DRT':
19+
# drt.append(line)
20+
# print len(drt)
21+
# finally:
22+
# print "done"
23+
24+
""" fields to it and finally write to CSV. Here's Python 3.x solution (I think Python 2.7+ should suffice):"""
125
import csv
2-
import pandas as pd
26+
import re
27+
28+
29+
def read_general("lba_model2/RemoteChoice_"):
30+
# Read general info to dict with 'PR 123'-like keys
31+
32+
# Gerexp that will split row into ready-to-use dict
33+
re_name = re.compile(r'''
34+
(?P<Name>.+)
35+
\ --\ # Separator + space
36+
(?P<Division>.+)
37+
\ # Space
38+
\(
39+
(?P<Division_Abbreviation>.*)
40+
\)
41+
\ # Space
42+
(?P<Id>\d+)
43+
\ # Space
44+
\[Age:\ # Space at the end
45+
(?P<Age>\d+)
46+
\]
47+
''', re.X)
48+
49+
general = {}
50+
51+
with open(fname, 'rt') as f:
52+
for line in f:
53+
line = line.strip()
54+
m = re_name.match(line)
55+
56+
if m:
57+
# Name line, start new man
58+
man = m.groupdict()
59+
key = '%s %s' % (m.group('Division_Abbreviation'), m.group('Id'))
60+
general[key] = man
61+
62+
elif line:
63+
# Non empty lines
64+
# Add values to dict
65+
key, value = line.split(': ', 1)
66+
man[key] = value
67+
68+
return general
69+
70+
71+
def add_bool_criteria(fname, field, general):
72+
# Append a field with YES/NO value
73+
74+
with open(fname, 'rt') as f:
75+
yes_keys = set()
76+
77+
# Phase one, gather all keys
78+
for line in f:
79+
line = line.strip()
80+
_, keys = line.split(': ', 1)
81+
82+
yes_keys.update(keys.split(', '))
83+
84+
# Fill data
85+
for key, man in general.items(): # iteritems() will be faster in Python 2.x
86+
man[field] = 'YES' if key in yes_keys else 'NO'
87+
88+
89+
def save_csv(fname, general):
90+
with open(fname, 'wt') as f:
91+
# Gather field names
92+
all_fields = set()
93+
for value in general.values():
94+
all_fields.update(value.keys())
95+
96+
# Write to csv
97+
w = csv.DictWriter(f, all_fields)
98+
w.writeheader()
99+
w.writerows(general.values())
100+
101+
102+
def main():
103+
general = read_general('general.txt')
104+
add_bool_criteria('cars.txt', 'Car?', general)
105+
add_bool_criteria('house.txt', 'House?', general)
106+
from pprint import pprint
107+
pprint(general)
108+
save_csv('result.csv', general)
109+
3110

4-
pd.read_csv()
111+
if __name__ == '__main__':
112+
main()

0 commit comments

Comments
 (0)