Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
213 views
in Technique[技术] by (71.8m points)

python - My LSTM Keras model won't stop overfitting

I am pretty much completely new to Keras and I have been trying to make an LSTM model that predicts the next word given some text (Kind of like GPT-2 or 3 but obviously not as good). My model has two inputs: 1. The seed text. 2. The TextBlob/NLTK POS tags for each word. I'm trying to make the training data formated like this: x1 = [word1,word2,word3...] + x2= [tag1,tag2...] -> y = [word5] The problem is that no matter how I adjust the model the val_loss goes up after reaching anywhere from 6.5 to 7.5. This happens no matter how I set up the dropout layers or how many parameters it has. And is pretty much the same with any optimizer I have tried. And is the same with most learning rates.

from textblob import TextBlob
import keras
import numpy as np
from keras.utils import np_utils
import re
np.set_printoptions(threshold=100)
text = open("/content/drive/MyDrive/data.txt","r").read().lower()[:1000000]

text = re.sub("[^ a-zA-Z]+", "", text)
text = re.sub(' +', ' ', text)
print("words: ",len(text.split(" ")))
a = TextBlob(text).tags

a,b = map(list,zip(*a))
a = np.asarray(a)
b = np.asarray(b)
uw = list(np.unique(a)) # [1,1,2] = 1
print("vocab size",len(uw))
ut = list(np.unique(b))
print("first")

for i in uw:
  a[a==i] = uw.index(i) #where a == i it replaces it with the index.
print("second")
for i in ut:
  b[b==i] = ut.index(i)
a = a.astype(np.float)
b = b.astype(np.float)

print(len(a))
print(len(b))
dim = 10
class GetNext:
  def __init__(self,lis,rep):
    #this way the get_next func can be used in a map function
    self.lis = np.asarray(lis)
    self.rep = rep 
  def get_next(self,item):
    try: #only becuase an exception will be thrown on the last item
      #finds the element in position 0 of the element after item eg item = [1,2,3] lis = [[1,2,3],[4,5,6]] then n would be 4
      n = self.lis[self.lis==item]
      print(item)
      
      #print(list(uw)[int(n)])    
    except IndexError:
      print("___")
      n = self.rep
    return np.array([n])
def MakeList(x,intt):
  lol = lambda lst, sz: [lst[i:i+sz] for i in range(0, len(lst), sz)] #https://stackoverflow.com/questions/4119070/how-to-divide-a-list-into-n-equal-parts-python
  r = lol(x,intt)
  r.pop(-1) #the odds of the list being exactly divisable by intt are low so this gets rid of the last element that wont be as long as the other elements.

  return r
print("words")
words = MakeList(a,dim)#not
print("tags")

tags = MakeList(b,dim)
print("len")
print(len(words))
print(len(tags))
print("last")
last = []
words = np.asarray(words)
words= words.tolist()
for i in words:
  try:
    ind = words[words.index(i)+1][0]
    last.append(ind)
  except:
    last.append(0)
words = np.asarray(words)
tags = np.asarray(tags)
print("reshape")
words = np.reshape(words, (len(words), dim, 1))
tags = np.reshape(tags, (len(tags), dim, 1))
print(words.shape)
print(tags.shape)
print(len(uw))
#last = np_utils.to_categorical(last)
last = np.asarray(last)
print(last.shape)
print(len(last))
input_words = keras.layers.Input(shape=(words.shape[1],words.shape[2]))
input_tags = keras.layers.Input(shape=(tags.shape[1],tags.shape[2]))
#model 1

w = keras.layers.LSTM(1000,return_sequences=1, return_state=0,recurrent_activation="sigmoid")(input_words)
w = keras.layers.Dropout(.5)(w)

w = keras.layers.GRU(100,return_sequences=0, return_state=0,recurrent_activation="sigmoid")(w)

w = keras.layers.Dropout(.5)(w)


#model 2
t = keras.layers.LSTM(20,return_sequences=0)(input_tags)

t = keras.layers.BatchNormalization()(t)


both = keras.layers.concatenate([w, t],axis=-1)
soft = keras.layers.Dense(len(uw),activation="softmax")(both)
model = keras.Model(inputs=[input_words,input_tags],outputs=soft)


def savem():
  global model
  modeljs = model.to_json()
  with open("model.json","w") as js:
    js.write(modeljs)
  model.save_weights("model.h5")

optimizer = keras.optimizers.RMSprop(learning_rate=0.001)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=1)
optimizer = keras.optimizers.Adam(learning_rate=.001)
model.compile(optimizer=optimizer, loss=loss, metrics="acc",)
model.summary()
def gen(text,num):
  for i in range(num):
    a = TextBlob(text).tags
    a,b = map(list,zip(*a))
    a = np.asarray(a)
    b = np.asarray(b)
    for i in uw:
      a[a==i] = uw.index(i) #where a == i it replaces it with the index. Maybe i should implement batchnormalization?
    for i in ut:
      b[b==i] = ut.index(i)
    a = a.astype(np.float)
    b = b.astype(np.float)
    a = model.predict(x=[a,b])[0]
    a /= a.sum()
    a = np.random.choice(len(a), p=a)
    return uw[a]


   
for i in range(100):
  model.fit(x = [words,tags], y=last, epochs=1, batch_size=12,validation_split=.2)
  tt = "harry "
  for i in range(10):
    tt = tt + " " + gen(tt,1)
  print(tt)

  savem()



while 1:
  try:
    print(gen(input("text
"),1))
  except Exception as e:
    print(str(e))
 

Here is my model summary.

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_7 (InputLayer)            [(None, 10, 1)]      0                                            
__________________________________________________________________________________________________
lstm_6 (LSTM)                   (None, 10, 1000)     4008000     input_7[0][0]                    
__________________________________________________________________________________________________
dropout_6 (Dropout)             (None, 10, 1000)     0           lstm_6[0][0]                     
__________________________________________________________________________________________________
input_8 (InputLayer)            [(None, 10, 1)]      0                                            
__________________________________________________________________________________________________
gru_3 (GRU)                     (None, 100)          330600      dropout_6[0][0]                  
__________________________________________________________________________________________________
lstm_7 (LSTM)                   (None, 20)           1760        input_8[0][0]                    
__________________________________________________________________________________________________
dropout_7 (Dropout)             (None, 100)          0           gru_3[0][0]                      
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 20)           80          lstm_7[0][0]                     
__________________________________________________________________________________________________
concatenate_3 (Concatenate)     (None, 120)          0           dropout_7[0][0]                  
                                                                 batch_normalization_3[0][0]      
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 10366)        1254286     concatenate_3[0][0]              
==================================================================================================
Total params: 5,594,726
Trainable params: 5,594,686
Non-trainable params: 40

And here is the output.

WARNING:tensorflow:Model was constructed with shape (None, 10, 1) for input KerasTensor(type_spec=TensorSpec(shape=(None, 10, 1), dtype=tf.float32, name='input_7'), name='input_7', description="created by layer 'input_7'"), but it was called on an input with incompatible shape (None, 1, 1).
WARNING:tensorflow:Model was constructed with shape (None, 10, 1) for input KerasTensor(type_spec=TensorSpec(shape=(None, 10, 1), dtype=tf.float32, name='input_8'), name='input_8', description="created by layer 'input_8'"), but it was called on an input with incompatible shape (None, 1, 1).
982/982 [==============================] - 11s 11ms/step - loss: 6.6465 - acc: 0.0701 - val_loss: 7.0506 - val_acc: 0.0812
982/982 [==============================] - 11s 11ms/step - loss: 6.3581 - acc: 0.0739 - val_loss: 7.3579 - val_acc: 0.0808
982/982 [==============================] - 10s 11ms/step - loss: 6.1189 - acc: 0.0720 - val_loss: 7.3154 - val_acc: 0.0768
982/982 [==============================] - 11s 11ms/step - loss: 5.9741 - acc: 0.0738 - val_loss: 7.6783 - val_acc: 0.0859
982/982 [==============================] - 11s 11ms/step - loss: 5.8758 - acc: 0.0738 - val_loss: 7.9023 - val_acc: 0.0791
982/982 [==============================] - 11s 11ms/step - loss: 5.7558 - acc: 0.0744 - val_loss: 8.1748 - val_acc: 0.0802
982/982 [==============================] - 10s 11ms/step - loss: 5.6648 - acc: 0.0747 - val_loss: 8.2139 - val_acc: 0.0795
982/982 [==============================] - 11s 11ms/step - loss: 5.5871 - acc: 0.0766 - val_loss: 8.4580 - val_acc: 0.0802
982/982 [==============================] - 11s 11ms/step - loss: 5.5192 - acc: 0.0769 - val_loss: 8.5914 - val_acc: 0.0754
982/982 [==============================] - 11s 11ms/step - loss: 5.5013 - acc: 0.0785 - val_loss: 8.5338 - val_acc: 0.0764
982/982 [==============================] - 11s 11ms/step - loss: 5.4233 - acc: 0.0761 - val_loss: 8.5524 - val_acc: 0.0829
982/982 [==============================] - 11s 11ms/step - loss: 5.4017 - acc: 0.0779 - val_loss: 9.0169 - val_acc: 0.0856
982/982 [==============================] - 11s 11ms/step - loss: 5.3471 - acc: 0.0768 - val_loss: 8.7826 - val_acc: 0.0825
982/982 [==============================] - 11s 11ms/step - loss: 5.2953 - acc: 0.0764 - val_loss: 9.0312 - val_acc: 0.0819
982/982 [==============================] - 11s 11ms/step - loss: 5.2535 - acc: 0.0786 - val_loss: 9.2496 - val_acc: 0.0747
982/982 [==============================] - 10s 11ms/step - loss: 5.2090 - acc: 0.0794 - val_loss: 8.9501 - val_acc: 0.0788
982/982 [==============================] - 11s 11ms/step - loss: 5.1795 - acc: 0.0799 - val_loss: 9.2284 - val_acc: 0.0774
982/982 [==============================] - 11s 11ms/step - loss: 5.1437 - acc: 0.0814 - val_loss: 9.3294 - val_acc: 0.0815
982/982 [==============================] - 11s 11ms/step - loss: 5.1217 - acc: 0.0805 - val_loss: 9.4078 - val_acc: 0.0771
982/982 [==============================] - 11s 11ms/step - loss: 5.1028 - acc: 0.0815 - val_loss: 9.5992 - val_acc: 0.0778
982/982 [==============================] - 11s 11ms/step - loss: 5.0631 - acc: 0.0854 - val_loss: 9.1880 - val_acc: 0.0764
982/982 [==============================] - 11s 11ms/step - loss: 5.0476 - acc: 0.0826 - val_loss: 9.4368 - val_acc: 0.0785
982/982 [==============================] - 11s 11ms/step - loss: 5.0191 - acc: 0.0865 - val_loss: 9.5599 - val_acc: 0.0778
982/982 [==============================] - 10s 11ms/step - loss: 5.0093 - acc: 0.0888 - val_loss: 9.6991 - val_acc: 0.0781
982/982 [==============================] - 11s 11ms/step - loss: 4.9796 - acc: 0.0873 - val_loss: 9.6294 - val_acc: 0.0730
982/982 [==============================] - 11s 11ms/step - loss: 4.9695 - acc: 0.0871 - val_loss: 9.8313 - val_acc: 0.0757
982/982 [==============================] - 11s 11ms/step - loss: 4.9562 - acc: 0.0900 - val_loss: 9.8005 - val_acc: 0.0795
982/982 [==============================] - 11s 11ms/step - loss: 4.9330 - acc: 0.0905 - val_loss: 9

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)
Waitting for answers

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...