I write the following code for extract features from two images with deep CNN usinf tensorflow:
# -*- coding: utf-8 -*-
# Implementation of Wang et al 2017: Automatic Brain Tumor Segmentation using Cascaded Anisotropic Convolutional Neural Networks. https://arxiv.org/abs/1709.00382
# Author: Guotai Wang
# Copyright (c) 2017-2018 University College London, United Kingdom. All rights reserved.
# http://cmictig.cs.ucl.ac.uk
#
# Distributed under the BSD-3 licence. Please see the file licence.txt
# This software is not certified for clinical use.
#
from __future__ import absolute_import, print_function
import numpy as np
from scipy import ndimage
import time
import os
import sys
import pickle
import tensorflow as tf
from tensorflow.contrib.data import Iterator
from util.data_loader import *
from util.data_process import *
from util.train_test_func import *
from util.parse_config import parse_config
from train import NetFactory
print("import finished")
def test(config_file):
# 1, load configure file
config = parse_config(config_file)
config_data = config['data']
config_net1 = config.get('network1', None)
config_net2 = config.get('network2', None)
config_net3 = config.get('network3', None)
config_test = config['testing']
batch_size = config_test.get('batch_size', 5)
print("configure file loaded")
# 2.1, network for whole tumor
if(config_net1):
net_type1 = config_net1['net_type']
net_name1 = config_net1['net_name']
data_shape1 = config_net1['data_shape']
label_shape1 = config_net1['label_shape']
class_num1 = config_net1['class_num']
print("configure file of whole tumor is loaded")
# construct graph for 1st network
full_data_shape1 = [batch_size] + data_shape1
x1 = tf.placeholder(tf.float32, shape = full_data_shape1)
net_class1 = NetFactory.create(net_type1)
net1 = net_class1(num_classes = class_num1,w_regularizer = None,
b_regularizer = None, name = net_name1)
net1.set_params(config_net1)
predicty1, caty1 = net1(x1, is_training = True)
proby1 = tf.nn.softmax(predicty1)
else:
config_net1ax = config['network1ax']
config_net1sg = config['network1sg']
config_net1cr = config['network1cr']
print("configure files of whole tumor in three planes are loaded")
# construct graph for 1st network axial
net_type1ax = config_net1ax['net_type']
net_name1ax = config_net1ax['net_name']
data_shape1ax = config_net1ax['data_shape']
label_shape1ax = config_net1ax['label_shape']
class_num1ax = config_net1ax['class_num']
full_data_shape1ax = [batch_size] + data_shape1ax
x1ax = tf.placeholder(tf.float32, shape = full_data_shape1ax)
net_class1ax = NetFactory.create(net_type1ax)
net1ax = net_class1ax(num_classes = class_num1ax,w_regularizer = None,
b_regularizer = None, name = net_name1ax)
net1ax.set_params(config_net1ax)
predicty1ax, caty1ax = net1ax(x1ax, is_training = True)
proby1ax = tf.nn.softmax(predicty1ax)
print("graph for 1st network1ax is constructed")
# construct graph for 1st network sagittal
net_type1sg = config_net1sg['net_type']
net_name1sg = config_net1sg['net_name']
data_shape1sg = config_net1sg['data_shape']
label_shape1sg = config_net1sg['label_shape']
class_num1sg = config_net1sg['class_num']
full_data_shape1sg = [batch_size] + data_shape1sg
x1sg = tf.placeholder(tf.float32, shape = full_data_shape1sg)
net_class1sg = NetFactory.create(net_type1sg)
net1sg = net_class1sg(num_classes = class_num1sg,w_regularizer = None,
b_regularizer = None, name = net_name1sg)
net1sg.set_params(config_net1sg)
predicty1sg, caty1sg = net1sg(x1sg, is_training = True)
proby1sg = tf.nn.softmax(predicty1sg)
print("graph for 1st network1sg is constructed")
# construct graph for 1st network coronal
net_type1cr = config_net1cr['net_type']
net_name1cr = config_net1cr['net_name']
data_shape1cr = config_net1cr['data_shape']
label_shape1cr = config_net1cr['label_shape']
class_num1cr = config_net1cr['class_num']
full_data_shape1cr = [batch_size] + data_shape1cr
x1cr = tf.placeholder(tf.float32, shape = full_data_shape1cr)
net_class1cr = NetFactory.create(net_type1cr)
net1cr = net_class1cr(num_classes = class_num1cr,w_regularizer = None,
b_regularizer = None, name = net_name1cr)
net1cr.set_params(config_net1cr)
predicty1cr, caty1cr = net1cr(x1cr, is_training = True)
proby1cr = tf.nn.softmax(predicty1cr)
print("graph for 1st network1cr is constructed")
# 3, create session and load trained models
all_vars = tf.global_variables()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
if(config_net1):
net1_vars = [x for x in all_vars if x.name[0:len(net_name1) + 1]==net_name1 + '/']
saver1 = tf.train.Saver(net1_vars)
saver1.restore(sess, config_net1['model_file'])
else:
net1ax_vars = [x for x in all_vars if x.name[0:len(net_name1ax) + 1]==net_name1ax + '/']
saver1ax = tf.train.Saver(net1ax_vars)
saver1ax.restore(sess, config_net1ax['model_file'])
net1sg_vars = [x for x in all_vars if x.name[0:len(net_name1sg) + 1]==net_name1sg + '/']
saver1sg = tf.train.Saver(net1sg_vars)
saver1sg.restore(sess, config_net1sg['model_file'])
net1cr_vars = [x for x in all_vars if x.name[0:len(net_name1cr) + 1]==net_name1cr + '/']
saver1cr = tf.train.Saver(net1cr_vars)
saver1cr.restore(sess, config_net1cr['model_file'])
print("all variables of net1 is saved")
# 4, load test images
dataloader = DataLoader(config_data)
dataloader.load_data()
image_num = dataloader.get_total_image_number()
# 5, start to test
test_slice_direction = config_test.get('test_slice_direction', 'all')
save_folder = config_data['save_folder']
test_time = []
struct = ndimage.generate_binary_structure(3, 2)
margin = config_test.get('roi_patch_margin', 5)
x=['x1','x2']
paddings=tf.constant([[0,0],[0,0],[10,10],[0,0],[0,0]])
for i in range(image_num):
[temp_imgs, temp_weight, temp_name, img_names, temp_bbox, temp_size] = dataloader.get_image_data_with_name(i)
t0 = time.time()
# 5.1, test of 1st network
if(config_net1):
data_shapes = [ data_shape1[:-1], data_shape1[:-1], data_shape1[:-1]]
label_shapes = [label_shape1[:-1], label_shape1[:-1], label_shape1[:-1]]
nets = [net1, net1, net1]
outputs = [proby1, proby1, proby1]
inputs = [x1, x1, x1]
class_num = class_num1
else:
data_shapes = [ data_shape1ax[:-1], data_shape1sg[:-1], data_shape1cr[:-1]]
label_shapes = [label_shape1ax[:-1], label_shape1sg[:-1], label_shape1cr[:-1]]
nets = [net1ax, net1sg, net1cr]
outputs = [proby1ax, proby1sg, proby1cr]
inputs = [x1ax, x1sg, x1cr]
class_num = class_num1ax
predi=tf.concat([predicty1ax,tf.reshape(predicty1sg,[5,11,180,160,2]),tf.pad(predicty1cr,paddings,"CONSTANT")],0)
cati=tf.concat([caty1ax,tf.reshape(caty1sg,[5,11,180,160,14]),tf.pad(caty1cr,paddings,"CONSTANT")],0)
prob1 = test_one_image_three_nets_adaptive_shape(temp_imgs, data_shapes, label_shapes, data_shape1ax[-1], class_num,
batch_size, sess, nets, outputs, inputs, shape_mode = 0)
pred1 = np.asarray(np.argmax(prob1, axis = 3), np.uint16)
pred1 = pred1 * temp_weight
print("net1 is tested")
globals()[x[i]]=predi
test_time.append(time.time() - t0)
print(temp_name)
test_time = np.asarray(test_time)
print('test time', test_time.mean())
np.savetxt(save_folder + '/test_time.txt', test_time)
if __name__ == '__main__':
if(len(sys.argv) != 2):
print('Number of arguments should be 2. e.g.')
print(' python test.py config17/test_all_class.txt')
exit()
config_file = str(sys.argv[1])
assert(os.path.isfile(config_file))
test(config_file)
y=tf.stack([x1,x2],0)
z=tf.Session().run(y)
the output is a tensor(y) that I want to convert it to numpy array using tf.Session().run() but I get this error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [5,19,180,160,4]
[[Node: Placeholder = Placeholderdtype=DT_FLOAT, shape=[5,19,180,160,4], _device="/job:localhost/replica:0/task:0/device:GPU:0"]]
Note, this answer is based on a deep look in the crystal ball, predicting the code, which seems to be classified -- at least not written in the question itself.
Have a look at the error message:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor
This is exactly, what is wrong with your code. Trimming down, your code is essentially just (there are a lot of issues):
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3])
y = tf.layers.dense(x1, 2)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
print(tf.Session().run(y))
The output tensor y cannot be evaluated without knowing the value of x1, since it depends on this value.
1. Fix use proper naming
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
print(tf.Session().run(y))
Now the error-message becomes much clearer
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'my_input' with dtype float and shape [?,3]
2. Fix: provide a feed_dict
To let TensorFlow know, which value the computation of y should be based on, you need to feed it into the graph:
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
np_result = tf.Session().run(y, feed_dict={x1: [[42, 43, 44]]})
Now, this reveals the second issue with your code. You have 2 sessions:
sess = tf.InteractiveSession() (session_a)
tf.Session() in tf.Session().run() (session_b)
Now, session_a get all initialized variables, since your code contains
sess.run(tf.global_variables_initializer())
But, during tf.Session().run(...) another session is created, leaving a new error message:
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value ...
3. Fix: use just one session
import tensorflow as tf
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
np_result = sess.run(y, feed_dict={x1: [[42, 43, 44]]})
And to provide, the best possible solution:
import tensorflow as tf
# construct graph somewhere
x1 = tf.placeholder(tf.float32, [None, 3], name='my_input')
y = tf.layers.dense(x1, 2, name='fc1')
with tf.Session() as sess:
# init variables / or load them
sess.run(tf.global_variables_initializer())
# make sure, that no operations willl be added to the graph
sess.graph.finalize()
# fetch result as numpy array
np_result = sess.run(y, feed_dict={x1: [[42, 43, 44]]})
The code you either wrote yourself or copied from somewhere is the best demonstration of "How to not write in tensorflow."
last remark:
TensorFlow forces you to create a clean structure. This is important. It should become a habit to follow this structure. After a while, you see these parts immediately, which smells like bad code.
If you use an entire network, then just replace tf.layers.dense with my_network_definition and
def my_network_definition(x1):
output = ...
return output
In pytorch, you can write in the arbitrary style like you provided in the question. Not saying, you should do that. But it is possible. So then, try to follow the structure TensorFlow expects from you.
Dear pytorch users, I am looking forward to your feedback.
Related
I am building an LSTM net using the Dataset API.
The input tensor (named x in code) has different shapes for the train and the val sets and the iterator is defined without specifying an output shape.
The problem is that when tf.nn.dynamic_rnn graph_op is defined the shape of x is unknown and the following error is raised:
ValueError: as_list() is not defined on an unknown TensorShape.
Using tf.nn.dynamic_rnn without the Dataset API works as expected.
How can this error be fixed?
TF version: 1.4
import tensorflow as tf
import numpy as np
"""
1d: Number of examples per epoch
2d: Time steps size
3d: Batch size e.g. number of independent time series
4d: Number of points that are given as input in the lstm each time step
Batch size is usually smaller in val set because we use most of data for training.
Time steps size is bigger in val set because we want to speed up inference.
"""
x_train = np.random.rand(100, 8, 12, 2).astype(np.float32)
x_val = np.random.rand(8, 100, 4, 2).astype(np.float32)
use_dataset_api = True
with tf.device('/gpu:0'):
tf.reset_default_graph()
if not use_dataset_api:
batch_size_pl = tf.placeholder(shape=[], dtype=tf.int32)
x_pl = tf.placeholder(shape=[None, None, 2], dtype=tf.float32)
cell = tf.contrib.rnn.LSTMCell(num_units=11)
init_state = cell.zero_state(batch_size=batch_size_pl, dtype=tf.float32)
rnn_outputs, current_state = tf.nn.dynamic_rnn(cell, x_pl, initial_state=init_state,
time_major=True, dtype=tf.float32)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Use first example of train set
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 12, x_pl: x_train[0]})
# Use first example of val set
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 4, x_pl: x_val[0]})
else:
batch_size_pl = tf.placeholder(shape=[], dtype=tf.int32)
train_set = tf.data.Dataset.from_tensor_slices((x_train))
val_set = tf.data.Dataset.from_tensor_slices((x_val))
iterator = tf.data.Iterator.from_structure(train_set.output_types) # , train_set.output_shapes)
train_init_op = iterator.make_initializer(train_set)
val_init_op = iterator.make_initializer(val_set)
x = iterator.get_next()
cell = tf.contrib.rnn.LSTMCell(num_units=11)
init_state = cell.zero_state(batch_size=batch_size_pl, dtype=tf.float32)
# Raises error for tensor x: as_list() is not defined on an unknown TensorShape.
rnn_outputs, current_state = tf.nn.dynamic_rnn(cell, x, initial_state=init_state,
time_major=True, dtype=tf.float32)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Use first example of train set
sess.run(train_init_op)
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 12})
# Use first example of val set
sess.run(val_init_op)
rnn_outputs_, current_state_ = sess.run([rnn_outputs, current_state],
feed_dict={batch_size_pl: 4})
The solution is to change the following line:
iterator = tf.data.Iterator.from_structure(train_set.output_types)
with:
iterator = tf.data.Iterator.from_structure(train_set.output_types, [None, None, 2])
I am predicting financial time series with different time periods using tensorflow. In order to divide input data, I made sub-samples and used for loop.
However, I got an ValueError like this;
ValueError: Variable rnn/basic_lstm_cell/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
Without subsample this code works well.
Below is my code.
import tensorflow as tf
import numpy as np
import matplotlib
import os
import matplotlib.pyplot as plt
class lstm:
def __init__(self, x, y):
# train Parameters
self.seq_length = 50
self.data_dim = x.shape[1]
self.hidden_dim = self.data_dim*2
self.output_dim = 1
self.learning_rate = 0.0001
self.iterations = 5 # originally 500
def model(self,x,y):
# build a dataset
dataX = []
dataY = []
for i in range(0, len(y) - self.seq_length):
_x = x[i:i + self.seq_length]
_y = y[i + self.seq_length]
dataX.append(_x)
dataY.append(_y)
train_size = int(len(dataY) * 0.7977)
test_size = len(dataY) - train_size
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
print(train_size,test_size)
# input place holders
X = tf.placeholder(tf.float32, [None, self.seq_length, self.data_dim])
Y = tf.placeholder(tf.float32, [None, 1])
# build a LSTM network
cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.hidden_dim,state_is_tuple=True, activation=tf.tanh)
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
self.Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], self.output_dim, activation_fn=None)
# We use the last cell's output
# cost/loss
loss = tf.reduce_sum(tf.square(self.Y_pred - Y)) # sum of the squares
# optimizer
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train = optimizer.minimize(loss)
# RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
# training
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for i in range(self.iterations):
_, step_loss = sess.run([train, loss], feed_dict={X: trainX, Y: trainY})
# prediction
train_predict = sess.run(self.Y_pred, feed_dict={X: trainX})
test_predict = sess.run(self.Y_pred, feed_dict={X: testX})
return train_predict, test_predict
# variables definition
tsx = []
tsy = []
tsr = []
trp = []
tep = []
x = np.loadtxt('data.csv', delimiter=',') # data for analysis
y = x[:,[-1]]
z = np.loadtxt('rb.csv', delimiter=',') # data for time series
z1 = z[:,0] # start cell
z2 = z[:,1] # end cell
for i in range(1): # need to change to len(z)
globals()['x_%s' % i] = x[int(z1[i]):int(z2[i]),:] # definition of x
tsx.append(globals()["x_%s" % i])
globals()['y_%s' % i] = y[int(z1[i])+1:int(z2[i])+1,:] # definition of y
tsy.append(globals()["y_%s" % i])
globals()['a_%s' % i] = lstm(tsx[i],tsy[i]) # definition of class
globals()['trp_%s' % i],globals()['tep_%s' % i] = globals()["a_%s" % i].model(tsx[i],tsy[i])
trp.append(globals()["trp_%s" % i])
tep.append(globals()["tep_%s" % i])
Everytime the model method is called, you are building the computational graph of your LSTM. The second time the model method is called, tensorflow discovers that you already created variables with the same name. If the reuse flag of the scope in which the variables are created, is set to False, a ValueError is raised.
To solve this problem you have to set the reuse flag to True by calling tf.get_variable_scope().reuse_variables() at the end of your loop.
Note that you can't add this in the beginning of your loop, because then you are trying to reuse variables that have not yet been created.
You find more info in the tensorflow docs here
You define some variables in the "model" function.
Try this when you want to call "model" function multiple times:
with tf.variable_scope("model_fn") as scope:
train_predict, test_predict = model(input1)
with tf.variable_scope(scope, reuse=True):
train_predict, test_predict = model(input2)
My neural network code ends up outputting vectors with NaNs most of the time. The code is given below:
from __future__ import division, print_function
from six.moves import xrange
import time
import os
from glob import glob
from zipfile import ZipFile, ZIP_DEFLATED
import numpy as np
import tensorflow as tf
## Defining variables which have to be provided by user
## Defining the number of units in the RNN. This is also the size of the word
## and document embeddings
num_units = 100
##The number of data elements in a batch
batch_size = 1
##The folder where the npz files with the numpy arrays are stored.
npz_files_folder = "npz_files"
## Name of the file to which we want the model to be saved
model_file = "rnn_trial"
## Number of labels sampled from the noise for NCE
num_sampled = 50
## The dropout probability for the NN
dropout = 0.2
## The learning rate for the optimizer
lr = 0.1
## The number of epochs
epochs = 10
## Reading in the list of npz files with vectors for each document
doc_files = sorted(glob(os.path.join(npz_files_folder, "*.npz")))
num_classes = num_docs = len(doc_files)
## The tensor for storing a batch of sentences where each sentence is a
## sequence of word embeddings. This is an input to the NN
sentences = tf.placeholder(tf.float32, [batch_size, None, num_units],
name='sentences')
## The tensor for storing a batch of documents where each document is a
## sequence of sentence embeddings. This is an input to the NN
documents = tf.placeholder(tf.float32, [batch_size, None, num_units])
## The tensor for storing the labels for each batch of documents
labels = tf.placeholder(tf.float32, [batch_size])
## Here we define the LSTM used in the first layer
sent_lstm = tf.contrib.rnn.BasicLSTMCell(num_units)
sent_lstm = tf.contrib.rnn.DropoutWrapper(sent_lstm,
output_keep_prob=1.0-dropout)
## We define the initial_state of the LSTM in first layer here
initial_state_sent_lstm = sent_lstm.zero_state(batch_size, tf.float32)
## Here we get the outputs and states from the first layer
outputs_lstm, states_lstm = tf.nn.dynamic_rnn(sent_lstm,
inputs=sentences, initial_state=initial_state_sent_lstm)
## Here we define the forward GRU used in the second layer
doc_gru_fw = tf.contrib.rnn.GRUCell(num_units//2)
initial_state_doc_gru_fw = doc_gru_fw.zero_state(batch_size, tf.float32)
## Here we define the reverse GRU used in second layer.
doc_gru_bw = tf.contrib.rnn.GRUCell(num_units-num_units//2)
initial_state_doc_gru_bw = doc_gru_bw.zero_state(batch_size, tf.float32)
## Here we get the outputs and states from the second layer
outputs_gru, states_gru = tf.nn.bidirectional_dynamic_rnn(cell_fw=doc_gru_fw,
cell_bw=doc_gru_bw, initial_state_fw=initial_state_doc_gru_fw,
initial_state_bw=initial_state_doc_gru_bw,
inputs=documents)
# outputs_gru, states_gru = tf.nn.bidirectional_dynamic_rnn(cell_fw=doc_gru_fw,
# cell_bw=doc_gru_bw,
# inputs=documents, dtype=tf.float32)
## The final document embeddings
final_output = tf.reduce_mean(tf.concat(outputs_gru, 2), axis=1)
sigmoid_W = tf.Variable(
tf.truncated_normal([num_units, 1],
stddev=1.0/np.sqrt(num_units)))
sigmoid_b = tf.Variable(tf.zeros([1], dtype=tf.float32))
logits = tf.matmul(final_output, sigmoid_W) + sigmoid_b
y_ = (num_docs - 1) * tf.sigmoid(tf.reshape(logits, [-1]))
loss = tf.reduce_sum(tf.square(y_ - labels))
## Defining the training step
train = tf.train.AdamOptimizer(lr).minimize(loss)
## Initializing the session
sess = tf.Session()
## Initializing the variables
sess.run(tf.global_variables_initializer())
t = time.time()
for n in xrange(epochs):
result = False
for j, doc in enumerate(doc_files):
# if j==100:
# break
try:
npz_file = np.load(doc, allow_pickle=False)
except ValueError:
continue
train_label = np.array([j])
sent_files = sorted(npz_file.files)
temp_doc = np.array([])
temp_doc = np.reshape(temp_doc, (0, num_units))
for i, sent_file in enumerate(sent_files):
sent_input = np.reshape(npz_file[sent_file], (1, -1, num_units))
if 0 in sent_input.shape:
continue
output_1 = sess.run(outputs_lstm,
feed_dict={sentences: sent_input})
sent_embed = output_1[:, -1:]
temp_doc = np.concatenate([temp_doc] + list(sent_embed), 0)
## Training the model
temp_doc = np.array([temp_doc])
_, doc_vector = sess.run([train, final_output], feed_dict={
documents: temp_doc, labels: train_label})
if np.isnan(np.sum(doc_vector)):
result = True
print(result)
print("Finished with epoch ", n)
print()
doc_vecs_file_name = model_file + "_doc_vecs.zip"
with ZipFile(doc_vecs_file_name, 'w', ZIP_DEFLATED, True) as myzip:
for doc in doc_files:
# if doc_files.index(doc)==100:
# break
try:
npz_file = np.load(doc, allow_pickle=False)
except ValueError:
continue
sent_files = sorted(npz_file.files)
temp_doc = np.array([])
temp_doc = np.reshape(temp_doc, (0, num_units))
for i, sent_file in enumerate(sent_files):
sent_input = np.reshape(npz_file[sent_file], (1, -1, num_units))
if 0 in sent_input.shape:
continue
output_1 = sess.run(outputs_lstm,
feed_dict={sentences: sent_input})
sent_embed = output_1[:, -1:]
temp_doc = np.concatenate([temp_doc] + list(sent_embed), 0)
## Training the model
temp_doc = np.array([temp_doc])
doc_vec = sess.run(final_output, feed_dict={documents: temp_doc})
temp_file = doc.split(os.sep)[-1][:-4] + ".csv"
np.savetxt(temp_file, doc_vec, delimiter=',')
myzip.write(temp_file)
os.remove(temp_file)
saver = tf.train.Saver()
saver.save(sess, model_file)
print("Time taken = ", (time.time() - t))
If needed, I can upload a sample data set which you can use to try running the code yourself. With that sample data set, occasionally the training is completed without any NaNs creeping in. But, most of the time, NaNs pop up while training.
I am using tensorflow version 1.1.0 alongwith python 2.7.13 from the anaconda distribution.
Environment info
Operating System: Windows 7 64-bit
Tensorflow installed from pre-built pip (no CUDA): 1.0.1
Python 3.5.2 64-bit
Problem
I have problems with restoring my net (RNN character base language model). Below is a simplified version with the same problem.
When I run it the first time, I get, for example, this.
...
step 160: loss = 1.956 (perplexity = 7.069016620211226)
step 180: loss = 1.837 (perplexity = 6.274748642468816)
step 200: loss = 1.825 (perplexity = 6.202084762557817)
But on the second run, after restoring parameters, I get this.
step 220: loss = 2.346 (perplexity = 10.446611983898903)
step 240: loss = 2.346 (perplexity = 10.446709120339545)
...
All the tf variables seem to be correctly restored, including the state, which will be fed to RNN.
Data position is also restored (from 'step').
I also made a similar program for MNIST recognition model, and this one works fine: the losses before and after the restoring are continuous.
Are there any other parameters or states that should be saved and restored?
import argparse
import os
import tensorflow as tf
import numpy as np
import math
B = 20 # batch size
H = 200 # size of hidden layer of neurons
T = 25 # number of time steps to unroll the RNN for
data_file = 'ptb.train.txt' # any plain text file will do
checkpoint_dir = "tmp"
#----------------
# prepare data
#----------------
data = open(data_file, 'r').read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has {0} characters, {1} unique.'.format(data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
input_index_raw = np.array([char_to_ix[ch] for ch in data])
input_index_raw = input_index_raw[0:len(input_index_raw) // T * T]
input_index_raw_shift = np.append(input_index_raw[1:], input_index_raw[0])
input_all = input_index_raw.reshape([-1, T])
target_all = input_index_raw_shift.reshape([-1, T])
num_packed_data = len(input_all)
#----------------
# build model
#----------------
class Model(object):
def __init__(self):
self.input_ph = tf.placeholder(tf.int32, [None, T], name="input_ph")
self.target_ph = tf.placeholder(tf.int32, [None, T], name="target_ph")
embedding = tf.get_variable("embedding", [vocab_size, H], initializer=tf.random_normal_initializer(), dtype=tf.float32)
# input_ph is B x T.
# input_embedded is B x T x H.
input_embedded = tf.nn.embedding_lookup(embedding, self.input_ph)
cell = tf.contrib.rnn.BasicRNNCell(H)
self.state_ph = tf.placeholder(tf.float32, (None, cell.state_size), name="state_ph")
# Make state variable so that it will be saved by the saver.
self.state = tf.get_variable("state", (B, cell.state_size), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.float32)
# Construct initial_state according to whether restoring or not.
self.isRestore = tf.placeholder(tf.bool, shape=(), name="isRestore")
zero_state = cell.zero_state(B, dtype=tf.float32)
self.initial_state = tf.cond(self.isRestore, lambda: self.state, lambda: zero_state)
# input_embedded : B x T x H
# output: B x T x H
# state : B x cell.state_size
output, state_ = tf.nn.dynamic_rnn(cell, input_embedded, initial_state=self.state_ph)
self.final_state = tf.assign(self.state, state_)
# reshape to (B * T) x H.
output_flat = tf.reshape(output, [-1, H])
# Convert hidden layer's output to vector of logits for each vocabulary.
softmax_w = tf.get_variable("softmax_w", [H, vocab_size], dtype=tf.float32)
softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=tf.float32)
logits = tf.matmul(output_flat, softmax_w) + softmax_b
# cross_entropy is a vector of length B * T
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(self.target_ph, [-1]), logits=logits)
self.loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
self.global_step = tf.get_variable("global_step", (), initializer=tf.zeros_initializer(), trainable=False, dtype=tf.int32)
self.training_op = optimizer.minimize(cross_entropy, global_step=self.global_step)
def train_batch(self, sess, input_batch, target_batch, initial_state):
final_state_, _, final_loss = sess.run([self.final_state, self.training_op, self.loss], feed_dict={self.input_ph: input_batch, self.target_ph: target_batch, self.state_ph: initial_state})
return final_state_, final_loss
# main
with tf.Session() as sess:
if not tf.gfile.Exists(checkpoint_dir):
tf.gfile.MakeDirs(checkpoint_dir)
batch_stride = num_packed_data // B
# make model
model = Model()
saver = tf.train.Saver()
# always initialize
init = tf.global_variables_initializer()
init.run()
# restore if necessary
isRestore = False
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt:
isRestore = True
last_model = ckpt.model_checkpoint_path
print("Loading " + last_model)
saver.restore(sess, last_model)
# set initial step
step = tf.train.global_step(sess, model.global_step) + 1
print("start step = {0}".format(step))
# fetch initial state
state = sess.run(model.initial_state, feed_dict={model.isRestore: isRestore})
print("Initial state: {0}".format(state))
while True:
# prepare batch data
idx = [(step + x * batch_stride) % num_packed_data for x in range(0, B)]
input_batch = input_all[idx]
target_batch = target_all[idx]
state, last_loss = model.train_batch(sess, input_batch, target_batch, state)
if step % 20 == 0:
print('step {0}: loss = {1:.3f} (perplexity = {2})'.format(step, last_loss, math.exp(last_loss)))
if step % 200 == 0:
saved_file = saver.save(sess, os.path.join(checkpoint_dir, "model.ckpt"), global_step=step)
print("Saved to " + saved_file)
print("Last state: {0}".format(model.state.eval()))
break;
step = step + 1
The problem is solved. It had nothing to do with RNN nor TensorFlow.
I changed
chars = list(set(data))
to
chars = sorted(set(data))
and now it works.
This is because python uses a random hash function to build the set, and every time python restarted, 'chars' had a different ordering.
I recently started to learn Tensorflow and try to make simple rnn code using scan function.
What I'm trying to do is to make The RNN predict sine function.
It gets input of 1 dim. and outputs also 1 dim in batch as follow.
import tensorflow as tf
from tensorflow.examples.tutorials import mnist
import numpy as np
import matplotlib.pyplot as plt
import os
import time
# FLAGS (options)
tf.flags.DEFINE_string("data_dir", "", "")
#tf.flags.DEFINE_boolean("read_attn", True, "enable attention for reader")
#tf.flags.DEFINE_boolean("write_attn",True, "enable attention for writer")
opt = tf.flags.FLAGS
#Parameters
time_step = 10
num_rnn_h = 16
batch_size = 2
max_epoch=10000
learning_rate=1e-3 # learning rate for optimizer
eps=1e-8 # epsilon for numerical stability
#temporary sinusoid data
x_tr = np.zeros([batch_size,time_step])
y_tr = np.zeros([batch_size,time_step])
ptrn = 0.7*np.sin(np.arange(time_step+1)/(2*np.pi))
x_tr[0] = ptrn[0:time_step]
y_tr[0] = ptrn[1:time_step+1]
x_tr[1] = ptrn[0:time_step]
y_tr[1] = ptrn[1:time_step+1]
#Build model
x = tf.placeholder(tf.float32,shape=[batch_size,time_step,1], name= 'input')
y = tf.placeholder(tf.float32,shape=[None,time_step,1], name= 'target')
cell = tf.nn.rnn_cell.BasicRNNCell(num_rnn_h)
#cell = tf.nn.rnn_cell.LSTMCell(num_h, state_is_tuple=True)
with tf.variable_scope('output'):
W_o = tf.get_variable('W_o', shape=[num_rnn_h, 1])
b_o = tf.get_variable('b_o', shape=[1], initializer=tf.constant_initializer(0.0))
init_state = cell.zero_state(batch_size, tf.float32)
#make graph
#rnn_outputs, final_states = tf.scan(cell, xx1, initializer= tf.zeros([num_rnn_h]))
scan_outputs = tf.scan(lambda a, xi: cell(xi, a), tf.transpose(x, perm=[1,0,2]), initializer= init_state)
rnn_outputs, rnn_states = tf.unpack(tf.transpose(scan_outputs,perm=[1,2,0,3]))
print rnn_outputs, rnn_states
with tf.variable_scope('predictions'):
weighted_sum = tf.reshape(tf.matmul(tf.reshape(rnn_outputs, [-1, num_rnn_h]), W_o), [batch_size, time_step, 1])
predictions = tf.add(weighted_sum, b_o, name='predictions')
with tf.variable_scope('loss'):
loss = tf.reduce_mean((y - predictions) ** 2, name='loss')
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
But It gives an error at the last line (optimizer) like ,
ValueError: Shapes (2, 16) and (2, 2, 16) are not compatible
Please someone knows the reason, tell me how to fix it...
I assume your error is not on the last line (the optimizer) but rather on some operation you are doing earlier. Perhaps in the reduce_mean with this y - prediction? I will not go over your code in details but I will tell you that this error comes when you do an operation between two tensors which require the same shape (usually math operations).