Started building the LSTM model

f334d8dd · Paktalin · 7a49bc01 · f334d8dd · f334d8dd
Commit f334d8dd authored Jan 05, 2019 by Paktalin
Showing with 49 additions and 0 deletions
main.py
sequences
--- a/main.py
+++ b/main.py
+import numpy as np
+from sklearn.model_selection import train_test_split
+from tqdm import tqdm
+from keras.models import Sequential
+from keras.layers import Bidirectional, Dense, Activation, LSTM, Dropout
+import pickle
+
+# load the input array
+sentences = np.genfromtxt('encoded_forms.csv', delimiter='~')
+# set sequence length and step for sentences splitting
+SEQUENCE_LEN = 3
+STEP = 1
+forms = 114
+batch_size = 128
+# create ampty lists
+sequences = []
+next_words = []
+
+# set sequences and next_words (x, y)
+for i in tqdm(range(len(sentences))):
+	sentence = sentences[i]
+	# loop over each sentence splitting it into sequences
+	for j in range(0, len(sentence) - SEQUENCE_LEN, STEP):
+		# split the sentences into sequences of SEQUENCE_LEN
+		sequences.append(sentence[j: j + SEQUENCE_LEN])
+		# set next words for the current sequence
+		next_words.append(sentence[j + SEQUENCE_LEN])
+
+#save the lists
+with open('sequences', 'wb') as fp:
+	pickle.dump(sequences, fp)
+with open('next_words', 'wb') as fp:
+	pickle.dump(next_words, fp)
+
+# split training and test sets
+x_train, x_test, y_train, y_test = train_test_split(sequences, next_words, test_size=0.33)
+
+dropout = 0.2
+model = Sequential()
+model.add(Bidirectional(LSTM(128), input_shape=(SEQUENCE_LEN, forms)))
+if dropout > 0:
+    model.add(Dropout(dropout))
+model.add(Dense(forms))
+model.add(Activation('softmax'))
+
+model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
+model.fit(x_train, y_train, batch_size=batch_size, epochs=15, validation_data=(x_test, y_test))
+model.save('lstm.h5')
\ No newline at end of file
--- a/sequences
+++ b/sequences