I am pretty much completely new to Keras and I have been trying to make an LSTM model that predicts the next word given some text (Kind of like GPT-2 or 3 but obviously not as good). My model has two inputs: 1. The seed text. 2. The TextBlob/NLTK POS tags for each word.
I'm trying to make the training data formated like this: x1 = [word1,word2,word3...] + x2= [tag1,tag2...] -> y = [word5]
The problem is that no matter how I adjust the model the val_loss goes up after reaching anywhere from 6.5 to 7.5. This happens no matter how I set up the dropout layers or how many parameters it has. And is pretty much the same with any optimizer I have tried. And is the same with most learning rates.
from textblob import TextBlob
import keras
import numpy as np
from keras.utils import np_utils
import re
np.set_printoptions(threshold=100)
text = open("/content/drive/MyDrive/data.txt","r").read().lower()[:1000000]
text = re.sub("[^ a-zA-Z]+", "", text)
text = re.sub(' +', ' ', text)
print("words: ",len(text.split(" ")))
a = TextBlob(text).tags
a,b = map(list,zip(*a))
a = np.asarray(a)
b = np.asarray(b)
uw = list(np.unique(a)) # [1,1,2] = 1
print("vocab size",len(uw))
ut = list(np.unique(b))
print("first")
for i in uw:
a[a==i] = uw.index(i) #where a == i it replaces it with the index.
print("second")
for i in ut:
b[b==i] = ut.index(i)
a = a.astype(np.float)
b = b.astype(np.float)
print(len(a))
print(len(b))
dim = 10
class GetNext:
def __init__(self,lis,rep):
#this way the get_next func can be used in a map function
self.lis = np.asarray(lis)
self.rep = rep
def get_next(self,item):
try: #only becuase an exception will be thrown on the last item
#finds the element in position 0 of the element after item eg item = [1,2,3] lis = [[1,2,3],[4,5,6]] then n would be 4
n = self.lis[self.lis==item]
print(item)
#print(list(uw)[int(n)])
except IndexError:
print("___")
n = self.rep
return np.array([n])
def MakeList(x,intt):
lol = lambda lst, sz: [lst[i:i+sz] for i in range(0, len(lst), sz)] #https://stackoverflow.com/questions/4119070/how-to-divide-a-list-into-n-equal-parts-python
r = lol(x,intt)
r.pop(-1) #the odds of the list being exactly divisable by intt are low so this gets rid of the last element that wont be as long as the other elements.
return r
print("words")
words = MakeList(a,dim)#not
print("tags")
tags = MakeList(b,dim)
print("len")
print(len(words))
print(len(tags))
print("last")
last = []
words = np.asarray(words)
words= words.tolist()
for i in words:
try:
ind = words[words.index(i)+1][0]
last.append(ind)
except:
last.append(0)
words = np.asarray(words)
tags = np.asarray(tags)
print("reshape")
words = np.reshape(words, (len(words), dim, 1))
tags = np.reshape(tags, (len(tags), dim, 1))
print(words.shape)
print(tags.shape)
print(len(uw))
#last = np_utils.to_categorical(last)
last = np.asarray(last)
print(last.shape)
print(len(last))
input_words = keras.layers.Input(shape=(words.shape[1],words.shape[2]))
input_tags = keras.layers.Input(shape=(tags.shape[1],tags.shape[2]))
#model 1
w = keras.layers.LSTM(1000,return_sequences=1, return_state=0,recurrent_activation="sigmoid")(input_words)
w = keras.layers.Dropout(.5)(w)
w = keras.layers.GRU(100,return_sequences=0, return_state=0,recurrent_activation="sigmoid")(w)
w = keras.layers.Dropout(.5)(w)
#model 2
t = keras.layers.LSTM(20,return_sequences=0)(input_tags)
t = keras.layers.BatchNormalization()(t)
both = keras.layers.concatenate([w, t],axis=-1)
soft = keras.layers.Dense(len(uw),activation="softmax")(both)
model = keras.Model(inputs=[input_words,input_tags],outputs=soft)
def savem():
global model
modeljs = model.to_json()
with open("model.json","w") as js:
js.write(modeljs)
model.save_weights("model.h5")
optimizer = keras.optimizers.RMSprop(learning_rate=0.001)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=1)
optimizer = keras.optimizers.Adam(learning_rate=.001)
model.compile(optimizer=optimizer, loss=loss, metrics="acc",)
model.summary()
def gen(text,num):
for i in range(num):
a = TextBlob(text).tags
a,b = map(list,zip(*a))
a = np.asarray(a)
b = np.asarray(b)
for i in uw:
a[a==i] = uw.index(i) #where a == i it replaces it with the index. Maybe i should implement batchnormalization?
for i in ut:
b[b==i] = ut.index(i)
a = a.astype(np.float)
b = b.astype(np.float)
a = model.predict(x=[a,b])[0]
a /= a.sum()
a = np.random.choice(len(a), p=a)
return uw[a]
for i in range(100):
model.fit(x = [words,tags], y=last, epochs=1, batch_size=12,validation_split=.2)
tt = "harry "
for i in range(10):
tt = tt + " " + gen(tt,1)
print(tt)
savem()
while 1:
try:
print(gen(input("text
"),1))
except Exception as e:
print(str(e))
Here is my model summary.
Model: "model_3"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_7 (InputLayer) [(None, 10, 1)] 0
__________________________________________________________________________________________________
lstm_6 (LSTM) (None, 10, 1000) 4008000 input_7[0][0]
__________________________________________________________________________________________________
dropout_6 (Dropout) (None, 10, 1000) 0 lstm_6[0][0]
__________________________________________________________________________________________________
input_8 (InputLayer) [(None, 10, 1)] 0
__________________________________________________________________________________________________
gru_3 (GRU) (None, 100) 330600 dropout_6[0][0]
__________________________________________________________________________________________________
lstm_7 (LSTM) (None, 20) 1760 input_8[0][0]
__________________________________________________________________________________________________
dropout_7 (Dropout) (None, 100) 0 gru_3[0][0]
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 20) 80 lstm_7[0][0]
__________________________________________________________________________________________________
concatenate_3 (Concatenate) (None, 120) 0 dropout_7[0][0]
batch_normalization_3[0][0]
__________________________________________________________________________________________________
dense_3 (Dense) (None, 10366) 1254286 concatenate_3[0][0]
==================================================================================================
Total params: 5,594,726
Trainable params: 5,594,686
Non-trainable params: 40
And here is the output.
WARNING:tensorflow:Model was constructed with shape (None, 10, 1) for input KerasTensor(type_spec=TensorSpec(shape=(None, 10, 1), dtype=tf.float32, name='input_7'), name='input_7', description="created by layer 'input_7'"), but it was called on an input with incompatible shape (None, 1, 1).
WARNING:tensorflow:Model was constructed with shape (None, 10, 1) for input KerasTensor(type_spec=TensorSpec(shape=(None, 10, 1), dtype=tf.float32, name='input_8'), name='input_8', description="created by layer 'input_8'"), but it was called on an input with incompatible shape (None, 1, 1).
982/982 [==============================] - 11s 11ms/step - loss: 6.6465 - acc: 0.0701 - val_loss: 7.0506 - val_acc: 0.0812
982/982 [==============================] - 11s 11ms/step - loss: 6.3581 - acc: 0.0739 - val_loss: 7.3579 - val_acc: 0.0808
982/982 [==============================] - 10s 11ms/step - loss: 6.1189 - acc: 0.0720 - val_loss: 7.3154 - val_acc: 0.0768
982/982 [==============================] - 11s 11ms/step - loss: 5.9741 - acc: 0.0738 - val_loss: 7.6783 - val_acc: 0.0859
982/982 [==============================] - 11s 11ms/step - loss: 5.8758 - acc: 0.0738 - val_loss: 7.9023 - val_acc: 0.0791
982/982 [==============================] - 11s 11ms/step - loss: 5.7558 - acc: 0.0744 - val_loss: 8.1748 - val_acc: 0.0802
982/982 [==============================] - 10s 11ms/step - loss: 5.6648 - acc: 0.0747 - val_loss: 8.2139 - val_acc: 0.0795
982/982 [==============================] - 11s 11ms/step - loss: 5.5871 - acc: 0.0766 - val_loss: 8.4580 - val_acc: 0.0802
982/982 [==============================] - 11s 11ms/step - loss: 5.5192 - acc: 0.0769 - val_loss: 8.5914 - val_acc: 0.0754
982/982 [==============================] - 11s 11ms/step - loss: 5.5013 - acc: 0.0785 - val_loss: 8.5338 - val_acc: 0.0764
982/982 [==============================] - 11s 11ms/step - loss: 5.4233 - acc: 0.0761 - val_loss: 8.5524 - val_acc: 0.0829
982/982 [==============================] - 11s 11ms/step - loss: 5.4017 - acc: 0.0779 - val_loss: 9.0169 - val_acc: 0.0856
982/982 [==============================] - 11s 11ms/step - loss: 5.3471 - acc: 0.0768 - val_loss: 8.7826 - val_acc: 0.0825
982/982 [==============================] - 11s 11ms/step - loss: 5.2953 - acc: 0.0764 - val_loss: 9.0312 - val_acc: 0.0819
982/982 [==============================] - 11s 11ms/step - loss: 5.2535 - acc: 0.0786 - val_loss: 9.2496 - val_acc: 0.0747
982/982 [==============================] - 10s 11ms/step - loss: 5.2090 - acc: 0.0794 - val_loss: 8.9501 - val_acc: 0.0788
982/982 [==============================] - 11s 11ms/step - loss: 5.1795 - acc: 0.0799 - val_loss: 9.2284 - val_acc: 0.0774
982/982 [==============================] - 11s 11ms/step - loss: 5.1437 - acc: 0.0814 - val_loss: 9.3294 - val_acc: 0.0815
982/982 [==============================] - 11s 11ms/step - loss: 5.1217 - acc: 0.0805 - val_loss: 9.4078 - val_acc: 0.0771
982/982 [==============================] - 11s 11ms/step - loss: 5.1028 - acc: 0.0815 - val_loss: 9.5992 - val_acc: 0.0778
982/982 [==============================] - 11s 11ms/step - loss: 5.0631 - acc: 0.0854 - val_loss: 9.1880 - val_acc: 0.0764
982/982 [==============================] - 11s 11ms/step - loss: 5.0476 - acc: 0.0826 - val_loss: 9.4368 - val_acc: 0.0785
982/982 [==============================] - 11s 11ms/step - loss: 5.0191 - acc: 0.0865 - val_loss: 9.5599 - val_acc: 0.0778
982/982 [==============================] - 10s 11ms/step - loss: 5.0093 - acc: 0.0888 - val_loss: 9.6991 - val_acc: 0.0781
982/982 [==============================] - 11s 11ms/step - loss: 4.9796 - acc: 0.0873 - val_loss: 9.6294 - val_acc: 0.0730
982/982 [==============================] - 11s 11ms/step - loss: 4.9695 - acc: 0.0871 - val_loss: 9.8313 - val_acc: 0.0757
982/982 [==============================] - 11s 11ms/step - loss: 4.9562 - acc: 0.0900 - val_loss: 9.8005 - val_acc: 0.0795
982/982 [==============================] - 11s 11ms/step - loss: 4.9330 - acc: 0.0905 - val_loss: 9