python - Why is my RNN learning to classify all inputs as only 1 of 2 possible classifications? -
i have written first tensorflow implementation of rnn takes input random sequences either increasing or decreasing. training labels single integer corresponding each sequence 1 being increasing sequence , 0 decreasing. model trains, leans towards classifying every sequence decreasing , cannot figure out why. here code:
from __future__ import print_function import tensorflow tf tensorflow.contrib import rnn import random sequencelength = 5 # input dimension maxnum = 1000 # must >= (sequencelength - 1) outputdim = 1 hiddendim = 16 learningrate = 0.1 trainingiterations = 10000 batchsize = 10 displaystep = 1000 def generatedata(): data = [] labels = [] _ in range(batchsize): type = (1 if random.random() < 0.5 else 0) temp = [] if type == 1: labels.append([1]) temp.append(random.randint(0, maxnum - sequencelength + 1)) in range(1, sequencelength): temp.append(random.randint(temp[i - 1] + 1, maxnum - sequencelength + + 1)) data.append(temp) if type == 0: labels.append([0]) temp.append(random.randint(0 + sequencelength - 1, maxnum)) in range(1, sequencelength): temp.append(random.randint( 0 + sequencelength - - 1, temp[i - 1] - 1)) data.append(temp) return data, labels x = tf.placeholder(tf.float32, [batchsize, sequencelength], name="input") y = tf.placeholder(tf.float32, [batchsize, outputdim], name="label") w = tf.variable(tf.random_normal([hiddendim, outputdim])) b = tf.variable(tf.random_normal([outputdim])) cell = rnn.basicrnncell(hiddendim) outputs, states = tf.nn.static_rnn(cell, [x], dtype=tf.float32) prediction = tf.sigmoid(tf.matmul(outputs[0], w + b)) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=prediction, labels=y)) optimiser = tf.train.adamoptimizer(learning_rate=learningrate).minimize(loss) correctprediction = tf.equal(tf.round(prediction), y) accuracy = tf.reduce_mean(tf.cast(correctprediction, tf.float32)) tf.session() session: session.run(tf.global_variables_initializer()) in range(trainingiterations): batchx, batchy = generatedata() dict = {x: batchx, y : batchy} session.run(optimiser, feed_dict=dict) if % displaystep == 0: print("predictions:\t" + str(session.run(tf.transpose(tf.round(prediction)), dict))) print("labels:\t\t" + str(session.run(tf.transpose(y), dict)) + "\n") # batchaccuracy = session.run(accuracy, feed_dict=dict) # batchloss = session.run(loss, feed_dict=dict) # print("iteration: " + str(i) + "\naccuracy: " + str(batchaccuracy) + "\nloss: " + str(batchloss) + "\n") as have said, first implementation using tensorflow so, although aware of how rnn works, still quite lost high-level abstraction interact tensorflow on. calculations of prediction, loss, correctprediction , accuracy unsure about. way using sigmoid function twice ok? 1 time produce probability prediction, , again calculate cross entropy between prediction (as probability) , label.
edit
i have noticed that, on rare occasions, without changing of code, rnn learns classify sequences correctly.
your learning rate large. decreased learning rate
learningrate = 0.01 in addition, don't need apply sigmoid here
prediction = tf.sigmoid(tf.matmul(outputs[0], w + b)) as loss incorporates sigmoid:
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=prediction, labels=y)) i modified code above changes (as couple additional changes in printing section, adjust format), , got following output (you can see predictions become perfect starting second printing segment):
predictions: [[ 0. 1. 0. 0. 0. 0. 0. 1. 0. 1.]] labels: [[ 1. 0. 1. 1. 1. 0. 1. 0. 0. 0.]] iteration: 0 accuracy: 0.2 loss: 3.27201 predictions: [[ 0. 1. 0. 0. 1. 1. 0. 0. 0. 0.]] labels: [[ 0. 1. 0. 0. 1. 1. 0. 0. 0. 0.]] iteration: 1000 accuracy: 1.0 loss: 0.000647951 predictions: [[ 0. 1. 1. 1. 1. 1. 0. 1. 0. 1.]] labels: [[ 0. 1. 1. 1. 1. 1. 0. 1. 0. 1.]] iteration: 2000 accuracy: 1.0 loss: 0.000801496 predictions: [[ 1. 0. 1. 1. 0. 0. 1. 0. 1. 0.]] labels: [[ 1. 0. 1. 1. 0. 0. 1. 0. 1. 0.]] iteration: 3000 accuracy: 1.0 loss: 0.000515367 predictions: [[ 1. 1. 1. 1. 1. 1. 1. 0. 0. 0.]] labels: [[ 1. 1. 1. 1. 1. 1. 1. 0. 0. 0.]] iteration: 4000 accuracy: 1.0 loss: 0.000312456 predictions: [[ 0. 0. 0. 0. 1. 0. 0. 1. 0. 0.]] labels: [[ 0. 0. 0. 0. 1. 0. 0. 1. 0. 0.]] iteration: 5000 accuracy: 1.0 loss: 5.86302e-05 predictions: [[ 1. 0. 1. 0. 0. 0. 0. 0. 0. 1.]] labels: [[ 1. 0. 1. 0. 0. 0. 0. 0. 0. 1.]] iteration: 6000 accuracy: 1.0 loss: 5.79187e-05 predictions: [[ 1. 0. 0. 1. 1. 0. 1. 0. 0. 1.]] labels: [[ 1. 0. 0. 1. 1. 0. 1. 0. 0. 1.]] iteration: 7000 accuracy: 1.0 loss: 0.000136576 predictions: [[ 1. 0. 1. 1. 0. 0. 1. 1. 0. 1.]] labels: [[ 1. 0. 1. 1. 0. 0. 1. 1. 0. 1.]] iteration: 8000 accuracy: 1.0 loss: 4.11543e-05 predictions: [[ 0. 1. 0. 0. 0. 0. 0. 1. 0. 0.]] labels: [[ 0. 1. 0. 0. 0. 0. 0. 1. 0. 0.]] iteration: 9000 accuracy: 1.0 loss: 7.28511e-06 here modified code:
from __future__ import print_function import tensorflow tf tensorflow.contrib import rnn import random sequencelength = 5 # input dimension maxnum = 1000 # must >= (sequencelength - 1) outputdim = 1 hiddendim = 16 learningrate = 0.01 trainingiterations = 10000 batchsize = 10 displaystep = 1000 def generatedata(): data = [] labels = [] _ in range(batchsize): type = (1 if random.random() < 0.5 else 0) temp = [] if type == 1: labels.append([1]) temp.append(random.randint(0, maxnum - sequencelength + 1)) in range(1, sequencelength): temp.append(random.randint(temp[i - 1] + 1, maxnum - sequencelength + + 1)) data.append(temp) if type == 0: labels.append([0]) temp.append(random.randint(0 + sequencelength - 1, maxnum)) in range(1, sequencelength): temp.append(random.randint( 0 + sequencelength - - 1, temp[i - 1] - 1)) data.append(temp) return data, labels x = tf.placeholder(tf.float32, [batchsize, sequencelength], name="input") y = tf.placeholder(tf.float32, [batchsize, outputdim], name="label") w = tf.variable(tf.random_normal([hiddendim, outputdim])) b = tf.variable(tf.random_normal([outputdim])) cell = rnn.basicrnncell(hiddendim) outputs, states = tf.nn.static_rnn(cell, [x], dtype=tf.float32) prediction = tf.matmul(outputs[0], w + b) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=prediction, labels=y)) optimiser = tf.train.adamoptimizer(learning_rate=learningrate).minimize(loss) correctprediction = tf.equal(tf.round(tf.sigmoid(prediction)), y) accuracy = tf.reduce_mean(tf.cast(correctprediction, tf.float32)) tf.session() session: session.run(tf.global_variables_initializer()) in range(trainingiterations): batchx, batchy = generatedata() dict = {x: batchx, y : batchy} session.run(optimiser, feed_dict=dict) if % displaystep == 0: print("predictions:\t" + str(session.run(tf.transpose(tf.round(tf.sigmoid(prediction))), dict))) print("labels:\t\t" + str(session.run(tf.transpose(y), dict)) + "\n") batchaccuracy = session.run(accuracy, feed_dict=dict) batchloss = session.run(loss, feed_dict=dict) print("iteration: " + str(i) + "\naccuracy: " + str(batchaccuracy) + "\nloss: " + str(batchloss) + "\n")
Comments
Post a Comment