Actually, I'm trying to convert this old Tensorflow 1 code to TF2 using keras:
def init_net(batch_size=256, num_feats=30, hidden_size=100):
with tf.name_scope('network'):
with tf.name_scope('inputs'):
inputs = tf.placeholder(tf.int32, shape=[batch_size, ], name='inputs')
labels = tf.placeholder(tf.int32, shape=[batch_size, ], name='labels')
embeddings = tf.Variable(
tf.random_uniform([len(NODE_MAP), num_feats]), name='embeddings'
)
embed = tf.nn.embedding_lookup(embeddings, inputs)
onehot_labels = tf.one_hot(labels, len(NODE_MAP), dtype=tf.float32)
with tf.name_scope('hidden'):
weights = tf.Variable(
tf.truncated_normal(
[num_feats, hidden_size], stddev=1.0 / math.sqrt(num_feats)
),
name='weights'
)
biases = tf.Variable(
tf.zeros((hidden_size,)),
name='biases'
)
hidden = tf.tanh(tf.matmul(embed, weights) + biases)
with tf.name_scope('softmax'):
weights = tf.Variable(
tf.truncated_normal(
[hidden_size, len(NODE_MAP)],
stddev=1.0 / math.sqrt(hidden_size)
),
name='weights'
)
biases = tf.Variable(
tf.zeros((len(NODE_MAP),), name='biases')
)
logits = tf.matmul(hidden, weights) + biases
with tf.name_scope('error'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=onehot_labels, logits=logits, name='cross_entropy'
)
loss = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
return inputs, labels, embeddings, loss
NODE_MAP here is the vocabulary. This network should be able to learn a programming language. My version is the following:
network = tf.keras.models.Sequential()
embedding_layer = tf.keras.layers.Embedding(input_dim=len(NODE_MAP),
output_dim=30,
input_length=256,
embeddings_initializer=tf.keras.initializers.RandomUniform())
network.add(embedding_layer)
hidden_layer = tf.keras.layers.Dense(100, activation='tanh')
network.add(hidden_layer)
softmax_layer = tf.keras.layers.Softmax()
network.add(softmax_layer)
network.compile(optimizer='SGD', loss='categorical_crossentropy')
But this code raises the "ValueError: Shapes (None, 256) and (None, 256, 100) are incompatible" error.
If I add an extra Flatten layer between the Embedding and Dense layers the error changes to "ValueError: Shapes (None, 256) and (None, 100) are incompatible". Then if I change the number of units in Dense layer from 100 to 256 the network starts to work, but doesn't learn (the training process does not improve the accuracy).
What am I missing?