15
15
from argparse import ArgumentParser
16
16
from configparser import ConfigParser
17
17
from pathlib import Path
18
+ from datetime import datetime
19
+ from sklearn import model_selection
18
20
19
21
# from string import printable
20
22
import pandas as pd
@@ -43,17 +45,22 @@ def __init__(self):
43
45
self .config = self .util .load_config ("config/classifier.yaml" )
44
46
self .model = None
45
47
self .history = None
48
+ self .arch = ModelArchs (self .config )
46
49
47
50
def update_config_args (self , paras ):
48
51
"""create model dir to store it
49
52
set the config args from the command line if provided """
50
53
self .config ["model" ]["name" ] = paras .model if paras .model else self .config ["model" ]["name" ]
51
54
self .config ["data_file" ] = paras .data if paras .data else self .config ["data_file" ]
55
+
52
56
if self .config ['debug' ]:
53
57
self .config ['dnn' ]['epochs' ] = self .config ['dnn' ]['debug_epochs' ]
58
+ time_now = ''
59
+ else :
60
+ time_now = datetime .now ().strftime ('%Y-%m-%d_%H.%Mm' )
54
61
55
62
mdir = self .config ["model" ]["name" ] + "-" + str (self .config ["dnn" ]["epochs" ]) + \
56
- "-" + Path (self .config ["data_file" ]).stem + "/"
63
+ "-" + Path (self .config ["data_file" ]).stem + "-" + time_now + " /"
57
64
58
65
self .config ["model" ]["path" ] = self .config ["model" ]["path" ] + mdir
59
66
@@ -120,7 +127,7 @@ def apply_checkpoints(self, model, cp_path, patience):
120
127
121
128
def select_model_arch (self ):
122
129
"""Choose ML model"""
123
- arch = ModelArchs ( self . config )
130
+
124
131
model_name = self .config ["model" ]["name" ]
125
132
126
133
print ("\n \n " + "=" * 25 + " " + model_name +
@@ -129,15 +136,15 @@ def select_model_arch(self):
129
136
print ("-" * 50 )
130
137
131
138
if model_name == "RNN" :
132
- model = arch .apply_RNN ()
139
+ model = self . arch .apply_RNN ()
133
140
elif model_name == "CNN" :
134
- model = arch .apply_CNN ()
141
+ model = self . arch .apply_CNN ()
135
142
elif model_name == "LSTM" :
136
- model = arch .apply_LSTM ()
143
+ model = self . arch .apply_LSTM ()
137
144
elif model_name == "RF" :
138
- model = arch .apply_RF (df )
145
+ model = self . arch .apply_RF (df )
139
146
elif model_name == "multiDNN" :
140
- model = arch .apply_multiDNN ()
147
+ model = self . arch .apply_multiDNN ()
141
148
else :
142
149
print ("Invalid model! Please select a valid model!" )
143
150
exit (1 )
@@ -148,18 +155,18 @@ def train_model(self, model_file, X_train, y_train, X_test, y_test):
148
155
model_name = self .config ["model" ]["name" ]
149
156
epochs = self .config ["dnn" ]["epochs" ]
150
157
151
- # Select the model architecture
152
- model = self .select_model_arch ()
153
-
154
- # store metadata to neptune.ai
155
- if self .config ["model" ]["use_neptune" ]:
156
- from neptune .integrations .tensorflow_keras import NeptuneCallback
157
- nt_run = self .init_neptune (
158
- model_name , epochs , self .config ["data_file" ])
159
-
160
158
# Apply callbacks for training to store the best model checkpoint
161
159
# and apply early stopping.
162
160
if model_name != "RF" :
161
+ # Select the model architecture
162
+ model = self .select_model_arch ()
163
+
164
+ # store metadata to neptune.ai
165
+ if self .config ["model" ]["use_neptune" ]:
166
+ from neptune .integrations .tensorflow_keras import NeptuneCallback
167
+ nt_run = self .init_neptune (
168
+ model_name , epochs , self .config ["data_file" ])
169
+
163
170
tf_callbacks = self .apply_checkpoints (
164
171
model = model ,
165
172
cp_path = self .config ["model" ]["path" ],
@@ -196,18 +203,15 @@ def train_model(self, model_file, X_train, y_train, X_test, y_test):
196
203
else :
197
204
# TODO: log non-DNN models output to Neptune
198
205
# nt_run["acc"] = ?? or params=dict
206
+ # Fitting
207
+ model = self .arch .apply_RF (df .code )
208
+ # model = self.select_model_arch()
209
+ model .fit (X_train , y_train )
210
+ acc = model .score (X_test , y_test )
211
+ print (f"Accuracy: { acc } " )
199
212
print (f"Trained with non-DNN model: { model_name } " )
200
213
return model
201
214
202
- # def load_model(self, model_JSON, file_weights):
203
- # """Load model from disk"""
204
- # with open(model_JSON, "r") as f:
205
- # model_json = json.load(f)
206
- # model = model_from_json(model_json)
207
-
208
- # model.load_weights(file_weights)
209
- # return model
210
-
211
215
def load_tf_model (self , model_file ):
212
216
"""
213
217
Load model from disk
@@ -230,12 +234,11 @@ def evaluate_model(self, model_file, X_eval, y_eval):
230
234
if self .config ["model" ]["name" ] != "RF" :
231
235
if Path (model_file ).is_file ():
232
236
model = self .load_tf_model (model_file )
233
-
237
+ print ( " \n Evaluating the model... \n " )
234
238
# evaluate the model
235
- # loss, acc = model.evaluate(X_eval, y_eval, verbose=0 )
239
+ loss , acc = model .evaluate (X_eval , y_eval , verbose = 1 )
236
240
237
- print ("\n Evaluating the model...\n " )
238
- y_pred = model .predict (X_eval )
241
+ # y_pred = model.predict(X_eval)
239
242
# print(f'y_pred: {y_pred}')
240
243
# print(f'y_eval: {y_eval}')
241
244
# print(f'\ny_pred.shape: {y_pred.shape}')
@@ -251,14 +254,13 @@ def evaluate_model(self, model_file, X_eval, y_eval):
251
254
252
255
# cls_report = classification_report(y_eval, y_pred)
253
256
# print(f"Classification Report: \n{cls_report}")
254
- # print('loss: ', loss)
255
- # print('acc: ', acc)
257
+ print ('loss: ' , loss )
258
+ print ('acc: ' , acc )
256
259
else :
257
260
print (f"\n \n Model file: { model_file } not found!" )
258
261
print ("Please train the model first!" )
259
262
else :
260
- train_model = pickle .load (open (model_file , "RF" ))
261
- result = train_model .score (X_eval , y_eval )
263
+ result = model_file .score (X_eval , y_eval )
262
264
print ("Result: " , result )
263
265
print ("\n " + "-" * 35 + "Testing Completed" + "-" * 35 + "\n " )
264
266
@@ -298,6 +300,13 @@ def parse_args(self):
298
300
preprocess .save_model (model , model_file )
299
301
300
302
# TODO: Evaluation of the trained model
303
+ df_eval = preprocess .load_data (data_file = config ["eval_data" ])
304
+
301
305
if config ["test" ]:
302
- output_size = len (set (list (y_train )))
303
- classfr .evaluate_model (model_file , X_test , y_test )
306
+ if config ["model" ]["name" ] != "RF" :
307
+ X_eval , y_eval = preprocess .tokenize_data (
308
+ df = df_eval , max_len = config ["preprocess" ]["max_len" ])
309
+ # output_size = len(set(list(y_train)))
310
+ classfr .evaluate_model (model_file , X_eval , y_eval )
311
+ else :
312
+ classfr .evaluate_model (model , X_eval , y_eval )
0 commit comments