Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0f49e0b

Browse files
committed
Fixed input - now consumes less memory.
The input used to be all pickled to one file and read into memory at once. Now each tableau is pickled to a separate file and a generator unpickles one at a time. Because it's an online learner, only one is needed at a time.
1 parent 4966d12 commit 0f49e0b

File tree

6 files changed

+269
-25
lines changed

6 files changed

+269
-25
lines changed

GEN.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import cPickle, csv, copy, numpy, random
1+
import cPickle, csv, copy, numpy, random, glob, os, errno
22
from mapping import Mapping, Change
33

44
class Input:
@@ -15,19 +15,29 @@ def __init__(self, feature_dict, input_file, remake_input, gen_type, gen_args =
1515
self.feature_dict = feature_dict
1616
self.Generator = RandomGen if gen_type == 'random' else DeterministicGen
1717
self.gen_args = gen_args
18-
try:
19-
assert remake_input == False
20-
saved_file = open('save-' + input_file, 'rb')
21-
self.allinputs = cPickle.load(saved_file)
18+
input_dir = 'save-' + input_file
19+
self.make_dir(input_dir)
20+
self.input_files = [x for x in glob.glob(input_dir + '/*')]
21+
if remake_input == False and self.input_files != []:
22+
#saved_file = open('save-' + input_file, 'rb')
23+
#self.allinputs = cPickle.load(saved_file)
2224
print 'read from file'
23-
except (IOError, AssertionError):
24-
self.allinputs = self.make_input(input_file)
25-
saved_file = open('save-' + input_file, 'wb')
26-
cPickle.dump(self.allinputs, saved_file)
27-
finally:
28-
saved_file.close()
25+
else:
26+
self.input_files = self.make_input(input_file, input_dir)
27+
#saved_file = open('save-' + input_file, 'wb')
28+
#cPickle.dump(self.allinputs, saved_file)
29+
#finally:
30+
#saved_file.close()
2931
print 'done making input'
3032

33+
def make_dir(self, input_dir):
34+
'''Make the directory if it doesn't already exist so that the files will have a place to go.'''
35+
try:
36+
os.makedirs(input_dir)
37+
except OSError as exception:
38+
if exception.errno != errno.EEXIST:
39+
raise exception
40+
3141
def find_stem(self, affixes):
3242
for i, affix in enumerate(affixes):
3343
for j, position in enumerate(affix):
@@ -37,26 +47,29 @@ def find_stem(self, affixes):
3747
if len(segment_class) > 1:
3848
affixes[i][j] = segment_class[0].intersection(*segment_class[1:])
3949

40-
def make_input(self, infile):
50+
def make_input(self, input_file, input_dir):
4151
"""Based on file of lines of the form "1,underlyingform,surfaceform,changes"
4252
create Mapping objects with those attributes.
4353
Bundle mappings into tableaux."""
44-
allinputs = []
45-
with open(infile, 'r') as f:
54+
input_files = []
55+
with open(input_file, 'r') as f:
4656
fread = list(csv.reader(f))
4757
if self.gen_args:
4858
gen = self.Generator(self.feature_dict, *self.gen_args)
4959
else:
5060
gen = self.Generator(self.feature_dict)
51-
for line in fread:
61+
for i, line in enumerate(fread):
5262
mapping = Mapping(self.feature_dict, line)
5363
mapping.to_data()
5464
negatives = gen.ungrammaticalize(mapping)
5565
mapping.add_boundaries()
5666
mapping.set_ngrams()
5767
tableau = [mapping] + negatives
58-
allinputs.append(tableau)
59-
return allinputs
68+
tableau_file = ''.join([input_dir, '/', str(i), '.txt'])
69+
with open(tableau_file, 'w') as f:
70+
cPickle.dump(tableau, f)
71+
input_files.append(tableau_file)
72+
return input_files
6073

6174
class Gen:
6275
"""Generates input-output mappings."""
@@ -253,3 +266,12 @@ def make_new_mapping(self, old_mapping, locus, features):
253266
new_mapping.set_ngrams()
254267
return new_mapping
255268

269+
270+
271+
def open_tableau(file_list):
272+
"""Generator that yields one tableau at a time."""
273+
for tableau_file in file_list:
274+
with open(tableau_file, 'r') as f:
275+
tableau = cPickle.load(f)
276+
yield tableau
277+

TurkishSmallInput.csv

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
1,+red+,ret,stem change voi 1 d
2+
1,+rySd+,rySt,stem change voi 1 d
3+
1,+derd+,dert,stem change voi 1 d
4+
1,+ferd+,fert,stem change voi 1 d
5+
1,+dqrd+,dqrt,stem change voi 1 d
6+
1,+Deng+,Denk,stem change voi 1 g
7+
1,+deng+,denk,stem change voi 1 g
8+
1,+reng+,renk,stem change voi 1 g
9+
1,+fing+,fink,stem change voi 1 g
10+
1,+gong+,gonk,stem change voi 1 g
11+
1,+Dartlag+,Dartlak,stem change voi 1 g
12+
1,+myeddeb+,myeddep,stem change voi 1 b
13+
1,+muazzeb+,muazzep,stem change voi 1 b
14+
1,+muakkib+,muakkip,stem change voi 1 b
15+
1,+taassub+,taassup,stem change voi 1 b
16+
1,+taaDDyb+,taaDDyp,stem change voi 1 b
17+
1,+teeddyb+,teeddyp,stem change voi 1 b
18+
1,+akwtaD+,akwtaT,change voi 1 T
19+
1,+wSwtaD+,wSwtaT,change voi 1 T
20+
1,+okutaD+,okutaT,change voi 1 T
21+
1,+yreteD+,yreteT,change voi 1 T
22+
1,+gqt+,gqt,none
23+
1,+hqt+,hqt,none
24+
1,+but+,but,none
25+
1,+Dob+im,Dobum,change round -1 i;change back -1 i
26+
1,+rob+im,robum,change round -1 i;change back -1 i
27+
1,+kyb+im,kybym,change round -1 i
28+
1,+tyb+im,tybym,change round -1 i
29+
1,+aabaad+im,aabaadwm,change back -1 i
30+
1,+iiraad+im,iiraadwm,change back -1 i

cluster_jobs.sh

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
2+
python one_run.py 0 0 1a
3+
python one_run.py 0 0 2a
4+
python one_run.py 0 0 3a
5+
python one_run.py 0 0 4a
6+
python one_run.py 0 0 5a
7+
python one_run.py 0 0 6a
8+
python one_run.py 0 0 7a
9+
python one_run.py 0 0 8a
10+
python one_run.py 0 0 9a
11+
python one_run.py 0 0 10a
12+
python one_run.py 0 0 11a
13+
python one_run.py 0 0 12a
14+
python one_run.py 0 0 13a
15+
python one_run.py 0 0 14a
16+
python one_run.py 0 0 15a
17+
python one_run.py 0 0 16a
18+
python one_run.py 0 0 17a
19+
python one_run.py 0 0 18a
20+
python one_run.py 0 0 19a
21+
python one_run.py 0 0 20a
22+
23+
python one_run.py 0 .4 1b
24+
python one_run.py 0 .4 2b
25+
python one_run.py 0 .4 3b
26+
python one_run.py 0 .4 4b
27+
python one_run.py 0 .4 5b
28+
python one_run.py 0 .4 6b
29+
python one_run.py 0 .4 7b
30+
python one_run.py 0 .4 8b
31+
python one_run.py 0 .4 9b
32+
python one_run.py 0 .4 10b
33+
python one_run.py 0 .4 11b
34+
python one_run.py 0 .4 12b
35+
python one_run.py 0 .4 13b
36+
python one_run.py 0 .4 14b
37+
python one_run.py 0 .4 15b
38+
python one_run.py 0 .4 16b
39+
python one_run.py 0 .4 17b
40+
python one_run.py 0 .4 18b
41+
python one_run.py 0 .4 19b
42+
python one_run.py 0 .4 20b
43+
44+
python one_run.py 0 .8 1c
45+
python one_run.py 0 .8 2c
46+
python one_run.py 0 .8 3c
47+
python one_run.py 0 .8 4c
48+
python one_run.py 0 .8 5c
49+
python one_run.py 0 .8 6c
50+
python one_run.py 0 .8 7c
51+
python one_run.py 0 .8 8c
52+
python one_run.py 0 .8 9c
53+
python one_run.py 0 .8 10c
54+
python one_run.py 0 .8 11c
55+
python one_run.py 0 .8 12c
56+
python one_run.py 0 .8 13c
57+
python one_run.py 0 .8 14c
58+
python one_run.py 0 .8 15c
59+
python one_run.py 0 .8 16c
60+
python one_run.py 0 .8 17c
61+
python one_run.py 0 .8 18c
62+
python one_run.py 0 .8 19c
63+
python one_run.py 0 .8 20c
64+
65+
python one_run.py .4 0 1d
66+
python one_run.py .4 0 2d
67+
python one_run.py .4 0 3d
68+
python one_run.py .4 0 4d
69+
python one_run.py .4 0 5d
70+
python one_run.py .4 0 6d
71+
python one_run.py .4 0 7d
72+
python one_run.py .4 0 8d
73+
python one_run.py .4 0 9d
74+
python one_run.py .4 0 10d
75+
python one_run.py .4 0 11d
76+
python one_run.py .4 0 12d
77+
python one_run.py .4 0 13d
78+
python one_run.py .4 0 14d
79+
python one_run.py .4 0 15d
80+
python one_run.py .4 0 16d
81+
python one_run.py .4 0 17d
82+
python one_run.py .4 0 18d
83+
python one_run.py .4 0 19d
84+
python one_run.py .4 0 20d
85+
86+
python one_run.py .4 .4 1e
87+
python one_run.py .4 .4 2e
88+
python one_run.py .4 .4 3e
89+
python one_run.py .4 .4 4e
90+
python one_run.py .4 .4 5e
91+
python one_run.py .4 .4 6e
92+
python one_run.py .4 .4 7e
93+
python one_run.py .4 .4 8e
94+
python one_run.py .4 .4 9e
95+
python one_run.py .4 .4 10e
96+
python one_run.py .4 .4 11e
97+
python one_run.py .4 .4 12e
98+
python one_run.py .4 .4 13e
99+
python one_run.py .4 .4 14e
100+
python one_run.py .4 .4 15e
101+
python one_run.py .4 .4 16e
102+
python one_run.py .4 .4 17e
103+
python one_run.py .4 .4 18e
104+
python one_run.py .4 .4 19e
105+
python one_run.py .4 .4 20e
106+
107+
python one_run.py .4 .8 1f
108+
python one_run.py .4 .8 2f
109+
python one_run.py .4 .8 3f
110+
python one_run.py .4 .8 4f
111+
python one_run.py .4 .8 5f
112+
python one_run.py .4 .8 6f
113+
python one_run.py .4 .8 7f
114+
python one_run.py .4 .8 8f
115+
python one_run.py .4 .8 9f
116+
python one_run.py .4 .8 10f
117+
python one_run.py .4 .8 11f
118+
python one_run.py .4 .8 12f
119+
python one_run.py .4 .8 13f
120+
python one_run.py .4 .8 14f
121+
python one_run.py .4 .8 15f
122+
python one_run.py .4 .8 16f
123+
python one_run.py .4 .8 17f
124+
python one_run.py .4 .8 18f
125+
python one_run.py .4 .8 19f
126+
python one_run.py .4 .8 20f
127+
128+
python one_run.py .8 0 1g
129+
python one_run.py .8 0 2g
130+
python one_run.py .8 0 3g
131+
python one_run.py .8 0 4g
132+
python one_run.py .8 0 5g
133+
python one_run.py .8 0 6g
134+
python one_run.py .8 0 7g
135+
python one_run.py .8 0 8g
136+
python one_run.py .8 0 9g
137+
python one_run.py .8 0 10g
138+
python one_run.py .8 0 11g
139+
python one_run.py .8 0 12g
140+
python one_run.py .8 0 13g
141+
python one_run.py .8 0 14g
142+
python one_run.py .8 0 15g
143+
python one_run.py .8 0 16g
144+
python one_run.py .8 0 17g
145+
python one_run.py .8 0 18g
146+
python one_run.py .8 0 19g
147+
python one_run.py .8 0 20g
148+
149+
python one_run.py .8 .4 1h
150+
python one_run.py .8 .4 2h
151+
python one_run.py .8 .4 3h
152+
python one_run.py .8 .4 4h
153+
python one_run.py .8 .4 5h
154+
python one_run.py .8 .4 6h
155+
python one_run.py .8 .4 7h
156+
python one_run.py .8 .4 8h
157+
python one_run.py .8 .4 9h
158+
python one_run.py .8 .4 10h
159+
python one_run.py .8 .4 11h
160+
python one_run.py .8 .4 12h
161+
python one_run.py .8 .4 13h
162+
python one_run.py .8 .4 14h
163+
python one_run.py .8 .4 15h
164+
python one_run.py .8 .4 16h
165+
python one_run.py .8 .4 17h
166+
python one_run.py .8 .4 18h
167+
python one_run.py .8 .4 19h
168+
python one_run.py .8 .4 20h
169+
170+
python one_run.py .8 .8 1i
171+
python one_run.py .8 .8 2i
172+
python one_run.py .8 .8 3i
173+
python one_run.py .8 .8 4i
174+
python one_run.py .8 .8 5i
175+
python one_run.py .8 .8 6i
176+
python one_run.py .8 .8 7i
177+
python one_run.py .8 .8 8i
178+
python one_run.py .8 .8 9i
179+
python one_run.py .8 .8 10i
180+
python one_run.py .8 .8 11i
181+
python one_run.py .8 .8 12i
182+
python one_run.py .8 .8 13i
183+
python one_run.py .8 .8 14i
184+
python one_run.py .8 .8 15i
185+
python one_run.py .8 .8 16i
186+
python one_run.py .8 .8 17i
187+
python one_run.py .8 .8 18i
188+
python one_run.py .8 .8 19i
189+
python one_run.py .8 .8 20i

learner-test3.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,12 @@
6060
#print faith
6161
assert [] == dgen.make_faithful_cand(faithfulword)
6262

63-
inputs = Input(fd, 'TurkishInput4NoVHExceptions.csv', True, 'deterministic')
63+
#with open('TurkishSmallInput.csv', 'r') as f:
64+
#fread = list(csv.reader(f))
65+
#print fread
66+
inputs = Input(fd, 'TurkishSmallInput.csv', True, 'deterministic')
6467
tablengths = []
65-
for tableau in inputs.allinputs:
68+
for tableau in open_tableau(inputs.input_files):
6669
tablengths.append(len(tableau))
6770
for mapping in tableau:
6871
print mapping

learner.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#!/usr/bin/env python
2-
import copy, numpy, random, datetime, csv
2+
import copy, numpy, random, datetime
33
#import matplotlib.pyplot as pyplot
44
from featuredict import FeatureDict
5-
from GEN import Input
5+
from GEN import Input, open_tableau
66
from CON import Con
77
#from matplotlib.backends.backend_pdf import PdfPages
88

@@ -53,7 +53,7 @@ def update(self, grammatical_winner, computed_winner):
5353
def train(self, inputs):
5454
self.errors = []
5555
self.error_numbers = []
56-
for i, tableau in enumerate(inputs):
56+
for i, tableau in enumerate(open_tableau(inputs)):
5757
random.shuffle(tableau)
5858
self.train_tableau(tableau, i)
5959
return (self.errors, self.error_numbers)
@@ -143,9 +143,9 @@ def __init__(self, feature_chart, input_file, remake_input = False,
143143
'Frequency of Induction Upon Error: ' + str(induction_freq)]))
144144

145145
def make_input(self):
146-
"""Use Input class to convert input file to data structure or access previously saved data structure."""
146+
"""Create or access candidate sets; get list of their filenames."""
147147
inputs = Input(**self.input_args)
148-
self.all_input = inputs.allinputs
148+
self.all_input = inputs.input_files
149149

150150
def divide_input(self):
151151
"""Choose training set and test set."""

one_run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# usage: python one_run.py 0 0.4 1
1+
# usage: python one_run.py 0 0.4 1-0-40
22

33
import learner, sys, csv
44

0 commit comments

Comments
 (0)