word2vec_basic not working (Tensorflow) - tensorflow

I am new to word-embedding and Tensorflow. I am working on a project where I need to apply word2vec to health data.
I used the code for Tensorflow website (word2vec_basic.py). I modified a little this code to make it read my data instead of "text8.zip" and it runs normally until the last step:
num_steps = 100001
with tf.Session(graph=graph) as session:
# We must initialize all variables before we use them.
tf.initialize_all_variables().run()
print('Initialized')
average_loss = 0
for step in range(num_steps):
batch_data, batch_labels = generate_batch(
batch_size, num_skips, skip_window)
feed_dict = {train_dataset : batch_data, train_labels : batch_labels}
_, l = session.run([optimizer, loss], feed_dict=feed_dict)
average_loss += l
if step % 2000 == 0:
if step > 0:
average_loss = average_loss / 2000
# The average loss is an estimate of the loss over the last 2000 batches.
print('Average loss at step %d: %f' % (step, average_loss))
average_loss = 0
# note that this is expensive (~20% slowdown if computed every 500 steps)
if step % 10000 == 0:
sim = similarity.eval()
for i in range(valid_size):
valid_word = reverse_dictionary[valid_examples[i]]
top_k = 8 # number of nearest neighbors
nearest = (-sim[i, :]).argsort()[1:top_k+1]
log = 'Nearest to %s:' % valid_word
for k in range(top_k):
close_word = reverse_dictionary[nearest[k]]
log = '%s %s,' % (log, close_word)
print(log)
final_embeddings = normalized_embeddings.eval()<code>
This code is exactly the same as the example so I don't think it is wrong. the error It gave is:
KeyError Traceback (most recent call last)
<ipython-input-20-fc4c5c915fc6> in <module>()
34 for k in xrange(top_k):
35 print(nearest[k])
---> 36 close_word = reverse_dictionary[nearest[k]]
37 log_str = "%s %s," % (log_str, close_word)
38 print(log_str)
KeyError: 2868
I changed the size of the input data but it still gives the same error.
I would really appreciate if someone could give me some advice on how to fix this problem.

If the vocabulary size is less than default maximum (50000), you should modify the number.
At the last of step 2, let's modify vocabulary_size to actual dictionary size.
data, count, dictionary, reverse_dictionary = build_dataset(words)
del words # Hint to reduce memory.
print('Most common words (+UNK)', count[:5])
print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
#add this line to modify
vocabulary_size = len(dictionary)
print('Dictionary size', len(dictionary))

Related

Transfer learning by using vgg in pytorch

I am using vgg16 for image classification. I want to test my transfered model with the following code:
classes = ['A', 'B', 'C']
len(classes) #3
len(test_data)#171
batch_size=10
# Testing
test_loss = 0.0
class_correct = list(0. for i in range(len(classes)))
class_total = list(0. for i in range(len(classes)))
vgg16.eval()
for data, target in test_loader:
output = vgg16(data)
loss = criterion(output, target)
test_loss += loss.item()*data.size(0)
_, pred = torch.max(output, 1)
correct_tensor = pred.eq(target.data.view_as(pred))
correct = np.squeeze(correct_tensor.numpy())
for i in range(batch_size):
label = target.data[i]
class_correct[label] += correct[i].item()
class_total[label] += 1
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(len(classes)):
if class_total[i] > 0:
print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
classes[i], 100 * class_correct[i] / class_total[i],
np.sum(class_correct[i]), np.sum(class_total[i])))
else:
print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
100. * np.sum(class_correct) / np.sum(class_total),
np.sum(class_correct), np.sum(class_total)))
I receive following error:
I receive following error:
15 for i in range(batch_size):
16 label = target.data[i]
---> 17 class_correct[label] += correct[i].item()
18 class_total[label] += 1
19
IndexError: too many indices for array
I do not know why I am getting this error and how I can solve it. I would be grateful if you could help me.

AssertionError: batch_size must be divisible by the number of TPU cores in use (1 vs 8) when using the predict function

Some details for context:
Working on Google Colab using TPU.
Model is fitting successfully without any issues
Running into issues while attempting to use the predict function
Here is the code I'm using to train:
tpu_model.fit(x, y,
batch_size=128,
epochs=60)
Here is the code I'm using to predict:
def generate_output():
generated = ''
#sentence = text[start_index: start_index + Tx]
#sentence = '0'*Tx
usr_input = input("Write the beginning of your poem, the Shakespeare machine will complete it. Your input is: ")
# zero pad the sentence to Tx characters.
sentence = ('{0:0>' + str(maxlen) + '}').format(usr_input).lower()
generated += usr_input
sys.stdout.write("\n\nHere is your poem: \n\n")
sys.stdout.write(usr_input)
for i in range(400):
x_pred = np.zeros((1, maxlen, len(chars)))
for t, char in enumerate(sentence):
if char != '0':
x_pred[0, t, char_indices[char]] = 1.
--> preds = tpu_model.predict(x_pred, batch_size = 128 ,workers = 8,verbose=0)[0]
next_index = sample(preds, temperature = 1.0)
next_char = indices_char[next_index]
generated += next_char
sentence = sentence[1:] + next_char
sys.stdout.write(next_char)
sys.stdout.flush()
if next_char == '\n':
continue
And here is the error (Added an arrow above so you know the location of the error:
AssertionError: batch_size must be divisible by the number of TPU cores in use (1 vs 8)
This makes no sense to me as the batch size I used while training is divisible by 8 AND the batch size I've passes in my predict function is divisible by 8.
I'm not sure what the issue is and how to resolve it. Any help would be much appreciated.
From the error:
AssertionError: batch_size must be divisible by the number of TPU cores in use (1 vs 8)
It looks like you are using a batch_size of 1, which can be inferred from the first dimension of your input data:
x_pred = np.zeros((1, maxlen, len(chars)))
I think you might want to change it to:
x_pred = np.zeros((8, maxlen, len(chars)))
so that the batch dimension becomes 8 that matches the number of TPU cores in use.
Or you can also keep the current batch_size of 1 but use 1 TPU core.

indices = 2 is not in [0, 1)

I'm working on a seq2sql project and I successfully build a model but when training I get an error. I'm not using any Keras embedding layer.
M=13 #Question Length
d=40 #Dimention of the LSTM
C=12 #number of table Columns
batch_size=9
inputs1=Input(shape=(M,100),name='question_token')
Hq=Bidirectional(LSTM(d,return_sequences=True),name='QuestionENC')(inputs1) #this is HQ shape is (num_samples,13,80)
inputs2=Input(shape=(C,3,100),name='col_token')
col_lstm_layer=Bidirectional(LSTM(d,return_sequences=False),name='ColENC')
def hidd(te):
t=tf.Variable(initial_value=1,dtype=tf.int32)
for i in range(batch_size):
t=tf.assign(t,i)
Z = tf.nn.embedding_lookup(te, t)
print(col_lstm_layer(Z))
h=tf.reshape(col_lstm_layer(Z),[1,C,d*2])
if i==0:
# cols_last_hidden=tf.Variable(initial_value=h)
cols_last_hidden=tf.stack(h)#this is because it gives an error if we use tf.Variable here
else:
cols_last_hidden=tf.concat([cols_last_hidden,h],0)#shape of this one is (num_samples,num_col,80) 80 is last encoding of each column
return cols_last_hidden
cols_last_hidden=Lambda(hidd)(inputs2)
Hq=Dense(d*2,name='QuestionLastEncode')(Hq)
I=tf.Variable(initial_value=1,dtype=tf.int32)
J=tf.Variable(initial_value=1,dtype=tf.int32)
K=1
def get_col_att(tensors):
global K,all_col_attention
if K:
t=tf.Variable(initial_value=1,dtype=tf.int32)
for i in range(batch_size):
t=tf.assign(t,i)
x = tf.nn.embedding_lookup(tensors[0], t)
# print("tensors[1]:",tensors[1])
y = tf.nn.embedding_lookup(tensors[1], t)
# print("x shape",x.shape,"y shape",y.shape)
y=tf.transpose(y)
# print("x shape",x.shape,"y",y.shape)
Ecol=tf.reshape(tf.transpose(tf.tensordot(x,y,axes=1)),[1,C,M])
if i==0:
# all_col_attention=tf.Variable(initial_value=Ecol,name=""+i)
all_col_attention=tf.stack(Ecol)
else:
all_col_attention=tf.concat([all_col_attention,Ecol],0)
K=0
print("all_col_attention",all_col_attention)
return all_col_attention
total_alpha_sel_lambda=Lambda(get_col_att,name="Alpha")([Hq,cols_last_hidden])
total_alpha_sel=Dense(13,activation="softmax")(total_alpha_sel_lambda)
# print("Hq",Hq," total_alpha_sel_lambda shape",total_alpha_sel_lambda," total_alpha_sel shape",total_alpha_sel.shape)
def get_EQcol(tensors):
global K
if K:
t=tf.Variable(initial_value=1,dtype=tf.int32)
global all_Eqcol
for i in range(batch_size):
t=tf.assign(t,i)
x = tf.nn.embedding_lookup(tensors[0], t)
y = tf.nn.embedding_lookup(tensors[1], t)
Eqcol=tf.reshape(tf.tensordot(x,y,axes=1),[1,C,d*2])
if i==0:
# all_Eqcol=tf.Variable(initial_value=Eqcol,name=""+i)
all_Eqcol=tf.stack(Eqcol)
else:
all_Eqcol=tf.concat([all_Eqcol,Eqcol],0)
K=0
print("all_Eqcol",all_Eqcol)
return all_Eqcol
K=1
EQcol=Lambda(get_EQcol,name='EQcol')([total_alpha_sel,Hq])#total_alpha_sel(12x13) Hq(13xd*2)
EQcol=Dropout(.2)(EQcol)
L1=Dense(d*2,name='L1')(cols_last_hidden)
L2=Dense(d*2,name='L2')(EQcol)
L1_plus_L2=Add()([L1,L2])
pre=Flatten()(L1_plus_L2)
Psel=Dense(12,activation="softmax")(pre)
model=Model(inputs=[inputs1,inputs2],outputs=Psel)
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
model.summary()
earlyStopping=EarlyStopping(monitor='val_loss', patience=7, verbose=0, mode='auto')
history=model.fit([Equestion,Col_Embeddings],y_train,epochs=50,validation_split=.1,shuffle=False,callbacks=[earlyStopping],batch_size=batch_size)
The shapes of the Equestion, Col_Embeddings, and y_train are (10, 12, 3, 100) ,(10, 13, 100) and (10, 12).
I searched about this error but in all cases they have used an embedding layer incorrectly. Here I get this error even though I'm not using one.
indices = 2 is not in [0, 1)
[[{{node lambda_3/embedding_lookup_2}} = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:#col_token_2"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_col_token_2_0_1, lambda_3/Assign_2, lambda_3/embedding_lookup_2/axis)]]
The problem here was the batch size is defined at the graph level.here i have used batch_size =9 for the graph and yes i get batch size of 9 for training by the validation split .1 for the full batch size of 10 but for the validation i left only one sample because 10*.1 is one.
So the batch size of 1 cannot be passed to the graph because it needs batch size of 9.that's why this error comes
As for the solution i put the batch_size=1 and then it works fine also got a good accuracy by using batch_size=1.
Hope this will help someone.
Cheers ..
For me this error was due to bad form of my input data. You have to double check your input data to the model and it depends on your model input.

Linear Regression with Neural Networks in Tensorflow and normalization

I've been following this tutorial:
https://blog.altoros.com/using-linear-regression-in-tensorflow.html
I'm aware there's better ways to do linear regression, but I'm using this as a base to do multi-variate regression and multi-variate non-linear regression to try to understand TensorFlow.
Without normalizing my data at all, I get 'nan' with GradientDescentOptimizer. I'm curious about why this is. Why is normalization so important that the model won't run at all? And what about subtracting mean and dividing by standard deviation suddenly makes it work so well?
After normalizing data, I'd like to recover the original value.
Each set of data seems to be normalized separately with its own stddev and mean parameters: the training data X, training data Y, test data X, and test data Y.
However, when I run the network on new data, I'm assuming when I predict new values, I have to normalize the input again. In that case, how do I make sense of the predicted Y? Am I supposed to use the training data's standard deviation and mean to unnormalize, or the new data's standard deviation and mean? I am confused what the model is actually fitting to when I give it normalized training data, and how to interpret W and b. I originally wanted to fit to Y = mx + b, and want to know what m and b really are.
Because I trained on training data, I assumed that I would need to store the training_data's pre-normalization standard deviation and mean and unnormalize any results from the network using this value. But in fact, when I use the new data's standard deviation and mean to unnormalize I get more reasonable values. I don't think it's worth posting that code because I just have a fundamental misunderstanding of what I need to do, but this is the basic code I'm using anyway.
import tensorflow as tf
import numpy
import matplotlib.pyplot as plt
# Train a data set
# X: size data
size_data = [ 2104, 1600, 2400, 1416, 3000, 1985, 1534, 1427,
1380, 1494, 1940, 2000, 1890, 4478, 1268, 2300,
1320, 1236, 2609, 3031, 1767, 1888, 1604, 1962,
3890, 1100, 1458, 2526, 2200, 2637, 1839, 1000,
2040, 3137, 1811, 1437, 1239, 2132, 4215, 2162,
1664, 2238, 2567, 1200, 852, 1852, 1203 ]
# Y: price data (set to 5x + 30)
price_data = [5*c + 30 for c in size_data]
size_data = numpy.asarray(size_data)
price_data = numpy.asarray(price_data)
# Test a data set
size_data_test = [ 1600, 1494, 1236, 1100, 3137, 2238 ]
price_data_test = [5*c + 30 for c in size_data_test]
size_data_test = numpy.asarray(size_data_test)
price_data_test = numpy.asarray(price_data_test)
def normalize(array):
std = array.std()
mean = array.mean()
return (array - mean) / std, std, mean
# Normalize a data set
size_data_n, size_data_n_std, size_data_n_mean = normalize(size_data)
price_data_n, price_data_n_std, price_data_n_mean = normalize(price_data)
size_data_test_n, size_data_test_n_std, size_data_test_n_mean = normalize(size_data_test)
price_data_test_n, price_data_test_n_std, price_data_test_n_mean = normalize(price_data_test)
# Display a plot
#plt.plot(size_data, price_data, 'ro', label='Samples data')
#plt.legend()
#plt.draw()
samples_number = price_data_n.size
# TF graph input
X = tf.placeholder("float")
Y = tf.placeholder("float")
# Create a model
# Set model weights
W = tf.Variable(numpy.random.randn(), name="weight")
b = tf.Variable(numpy.random.randn(), name="bias")
# Set parameters
learning_rate = 0.05
training_iteration = 200
# Construct a linear model
model = tf.add(tf.mul(X, W), b)
# Minimize squared errors
cost_function = tf.reduce_sum(tf.pow(model - Y, 2))/(2 * samples_number) #L2 loss
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function) #Gradient descent
#optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(cost_function)
# Initialize variables
init = tf.initialize_all_variables()
# Launch a graph
with tf.Session() as sess:
sess.run(init)
display_step = 20
# Fit all training data
for iteration in range(training_iteration):
for (x, y) in zip(size_data_n, price_data_n):
sess.run(optimizer, feed_dict={X: x, Y: y})
# Display logs per iteration step
if iteration % display_step == 0:
print("Iteration:", '%04d' % (iteration + 1), "cost=", "{:.9f}".format(sess.run(cost_function, feed_dict={X:size_data_n, Y:price_data_n})),\
"W=", sess.run(W), "b=", sess.run(b))
tuning_cost = sess.run(cost_function, feed_dict={X: size_data_n, Y: price_data_n})
print("Tuning completed:", "cost=", "{:.9f}".format(tuning_cost), "W=", sess.run(W), "b=", sess.run(b))
# Validate a tuning model
testing_cost = sess.run(cost_function, feed_dict={X: size_data_test_n, Y: price_data_test_n})
print("Testing data cost:" , testing_cost)
Y_predicted = sess.run(model, feed_dict={X: size_data_test_n, Y: price_data_test_n})
print("%-20s%-20s%-20s%-20s" % ("Test X", "Actual", "Target", "Error(%)"))
print('Normalized')
for i in range(len(size_data_test_n)):
err = 100.0 * abs(Y_predicted[i] - price_data_test_n[i]) / abs(price_data_test_n[i])
print("%-20f%-20f%-20f%-20f" % (size_data_test_n[i], Y_predicted[i], price_data_test_n[i], err))
print('Unnormalized')
for i in range(len(size_data_test_n)):
orig_size_data_test_i = size_data_test_n[i] * size_data_test_n_std + size_data_test_n_mean
orig_price_data_test_i = price_data_test_n[i] * price_data_test_n_std + price_data_test_n_mean
# ??? which one is correct for getting unnormalized predicted Y?
#orig_Y_predicted_i = Y_predicted[i] * price_data_n_std + price_data_n_mean
orig_Y_predicted_i = Y_predicted[i] * price_data_test_n_std + price_data_test_n_mean
orig_err = 100.0 * abs(orig_Y_predicted_i - orig_price_data_test_i) / abs(orig_price_data_test_i)
print("%-20f%-20f%-20f%-20f" % (orig_size_data_test_i, orig_Y_predicted_i, orig_price_data_test_i, orig_err))
# Display a plot
plt.figure()
plt.plot(size_data, price_data, 'ro', label='Samples')
plt.plot(size_data_test, price_data_test, 'go', label='Testing samples')
plt.plot(size_data_test, (sess.run(W) * size_data_test_n + sess.run(b))*price_data_n_std + price_data_n_mean , label='Fitted test line')
plt.legend()
plt.show()

Comparison of GradientDescent algorithm in tensorflow with the implementation of Michael Nielsen

First I will give an overview of my problem. I have two setups:
1) A net which is based on tensorflow
2) A net which is based on code from Michael Nielsen's Book http://neuralnetworksanddeeplearning.com/index.html
Both nets are completely equal. They both have
3 hidden layers a 30 neurons
2 inputs neurons, one output neuron
All activations are sigmoid
Stochastic Gradient descent algorithm as learning algorithm with eta=3.0
quadratic cost function : cost_function = tf.scalar_mul(1.0/(N_training_set*2.0),tf.reduce_sum(tf.squared_difference(y,y_)))
batch_size of 10
weight initialization: The weights which connect the lth and l+1th layer are initialized with sigma=1/sqrt(N_l), where N_l is the number of neurons in the lth layer.
My problem is, that the tensorflow results are very bad ( a factor 10 worse than the results one obtains if I use the Nielsen code).
So before I post my complete code: Does anybody know that there is a bug in the tensorflow StochasticGradientDescent algorithm? (Or does anybody has a reference how the learning rate of the Stocharstic Gradient Descent in tensorflow is defined? I cannot find something in the api)
Here is my code for the tensorflow net:
regression.py
import readData
import matplotlib.pyplot as plt
import numpy as np
from random import randint
import random
from root_numpy import fill_hist
from ROOT import TCanvas, TH2F, TText, TF1 ,TH1D
import ROOT
import tensorflow as tf
import math
# # # # # # ##
#Read in data#
# #
function_outputs=True# apply an invertable function to the y's and train with the modified outputs y_mod! Up to know this function is just a normalization.
function_inputs=True #
full_set = readData.read_data_set("./TH2D_A00_TB10.root","LHCChi2_CMSSM_nObs1061_A00_TB10","full_set",function_inputs,function_outputs)
N_full_set=full_set.get_N()
N_validation_set=10000
N_training_set=N_full_set-(N_validation_set)
full=range(0,N_full_set)
random.shuffle(full)
training_subset=full[:N_training_set]#indices for training set
validation_subset=full[N_training_set:N_training_set+N_validation_set]#indices for validation set
training_set = readData.read_data_set("./TH2D_A00_TB10.root","LHCChi2_CMSSM_nObs1061_A00_TB10","training_set",
function_inputs,function_outputs,full_set=full_set,subset=training_subset)
validation_set = readData.read_data_set("./TH2D_A00_TB10.root","LHCChi2_CMSSM_nObs1061_A00_TB10","validation_set",
function_inputs,function_outputs,full_set=full_set,subset=validation_subset )
#overwiew of full data set, training_data set and validation_data set. The modified members( normalized in this case) can be accessed with the x_mod() and y_mod() member functions
#the normalized data (input and output) will be used to train the net
print "full_data_set:"
print "x (inputs)"
print full_set.get_x()
print "y (outputs)"
print full_set.get_y()
print "x_mod"
print full_set.get_x_mod()
print "y_mod"
print full_set.get_y_mod()
print "------------------"
print "training_data_set:"
print "x (inputs)"
print training_set.get_x()
print "y (outputs)"
print training_set.get_y()
print "x_mod"
print training_set.get_x_mod()
print "y_mod"
print training_set.get_y_mod()
print "------------------"
print "evaluation_data_set:"
print "x (inputs)"
print validation_set.get_x()
print "y (outputs)"
print validation_set.get_y()
print "x_mod"
print validation_set.get_x_mod()
print "y_mod"
print validation_set.get_y_mod()
print "------------------"
# # # # # # # # # # # ##
#setting up the network#
# #
N_epochs = 20
learning_rate = 3.0
batch_size = 10
N1 = 2 #equals N_inputs
N2 = 30
N3 = 30
N4 = 30
N5 = 1
N_in=N1
N_out=N5
#one calculates everything directly for all elements in one batch
"""example: N_in=2,N_out=3, mini_batch_size=5, activation function=linear. In der output matrix gibt es 5Zeilen,jede fuer ein mini batch. Jede Zeile hat 3 Spalten fuer ein output neuron jeweils
W2
[[-0.31917086 -0.03908769 0.5792625 ]
[ 1.34563279 0.03904691 0.39674851]]
b2
[ 0.40960133 -0.5495823 -0.97048181]
x_in
[[ 23.2 12.2 ]
[ 0. 1.1 ]
[ 2.3 3.3 ]
[ 23.22222 24.44444]
[ 333. 444. ]]
y=x_in*W2+b2
[[ 9.42155647 -0.98004436 17.30874062]
[ 1.88979745 -0.50663072 -0.53405845]
[ 4.1160965 -0.51062918 1.67109203]
[ 25.8909874 -0.50280523 22.17957497]
[ 491.5866394 3.77104688 368.08026123]]
hier wird klar, dass b2 auf jede Zeile der Matrix x_in*w2 draufaddiert wird.
W2 ist die transponierte der atrix, die im Buch definiert ist.
"""
x = tf.placeholder(tf.float32,[None,N1])#don't take the shape=(batch_size,N1) argument, because we need this for different batch sizes
W2 = tf.Variable(tf.random_normal([N1, N2],mean=0.0,stddev=1.0/math.sqrt(N1*1.0)))# Initialize the weights for one neuron with 1/sqrt(Number of weights which enter the neuron/ Number of neurons in layer before)
b2 = tf.Variable(tf.random_normal([N2]))
a2 = tf.sigmoid(tf.matmul(x, W2) + b2) #x=a1
W3 = tf.Variable(tf.random_normal([N2, N3],mean=0.0,stddev=1.0/math.sqrt(N2*1.0)))
b3 = tf.Variable(tf.random_normal([N3]))
a3 = tf.sigmoid(tf.matmul(a2, W3) + b3)
W4 = tf.Variable(tf.random_normal([N3, N4],mean=0.0,stddev=1.0/math.sqrt(N3*1.0)))
b4 = tf.Variable(tf.random_normal([N4]))
a4 = tf.sigmoid(tf.matmul(a3, W4) + b4)
W5 = tf.Variable(tf.random_normal([N4, N5],mean=0.0,stddev=1.0/math.sqrt(N4*1.0)))
b5 = tf.Variable(tf.random_normal([N5]))
y = tf.sigmoid(tf.matmul(a4, W5) + b5)
y_ = tf.placeholder(tf.float32,[None,N_out]) # ,shape=(None,N_out)
# # # # # # # # # # # # # #
#initializing and training#
# #
cost_function = tf.scalar_mul(1.0/(N_training_set*2.0),tf.reduce_sum(tf.squared_difference(y,y_)))
error_to_desired_output= y-y_
abs_error_to_desired_output= tf.abs(y-y_)
sum_abs_error_to_desired_output= tf.reduce_sum(tf.abs(y-y_))
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
init = tf.initialize_all_variables()
#launch the graph
sess = tf.Session()
sess.run(init)
N_training_batch=training_set.get_N()/batch_size #rounds to samllest integer
out_mod_validation=[0]*N_epochs # output of net, when inputting x_mod of validation data. Will be saved after each epoch.
error_mod_validation_data= [0]*N_epochs #absolute error on mod validation data after each epoch
diff_mod_validation=[0]*N_epochs # error vector of validation data after each epoch. i.e. y-y_
cost_training_data=[0]*N_epochs
for i in range(0,N_epochs):
for j in range(0,N_training_batch):
batch_xs, batch_ys, epochs_completed = training_set.next_batch(batch_size)#always gives the modified x's and y's. If one does not want to modifie them the function has to be set to identity
sess.run(train_step, feed_dict={x: batch_xs,
y_: batch_ys})
cost_training_data[i]=sess.run(cost_function, feed_dict={
x: training_set.get_x_mod(), y_: training_set.get_y_mod()})
out_mod_validation[i]= sess.run(y, feed_dict={
x: validation_set.get_x_mod()})# output of net, when imputting x_mod of validation data after each training epoch
diff_mod_validation[i]=sess.run(error_to_desired_output, feed_dict={
x: validation_set.get_x_mod(),y_: validation_set.get_y_mod()})
error_mod_validation_data[i]=sess.run(sum_abs_error_to_desired_output, feed_dict={
x: validation_set.get_x_mod(),y_: validation_set.get_y_mod()})
print "epochs completed: "+str(i)
#now calculate everything for the unmodified/unnormalized outputs
out_validation=[0]*N_epochs # output of net, when inputting x_mod of validation data and making the normalization of the output backwards, saved after each epoch
error_validation_data=[0.0]*N_epochs
diff_validation=[0.0]*N_epochs
#make the transformation on the outputs backwards
for i in range(0,N_epochs):
out_validation[i]=np.ndarray(shape=(validation_set.get_N(),1))
for j in range(0,len(out_mod_validation[i])):
out_validation[i][j]=out_mod_validation[i][j]#do this, because otherwise we will produce only a reference
readData.apply_inverse_function_to_outputs(out_mod_validation[i],out_validation[i],full_set.get_y_max())# second argument will be changed!
diff_validation[i]=np.subtract(out_validation[i],validation_set.get_y())
error_validation_data[i]=np.sum(np.absolute(np.subtract(out_validation[i],validation_set.get_y())))
#print at 10 examples how good the output matches the desired output
for i in range(0,10):
print "desired output"
print validation_set.get_y()[i][0]
print "actual output after last training epoch"
print out_validation[-1][i][0]
print "-------"
print "total error on validation_data set after last training"
print error_validation_data[-1]
# # # # ##
#printing#
# #
plt.figure(1)
plt.title("Costfunction of (modified) Training-data")
plt.xlabel("epochs")
plt.ylabel("cost function")
x_range=[x+1 for x in range(0,N_epochs)]
plt.plot(x_range,cost_training_data)
plt.savefig("cost_on_training_data.png")
plt.figure(2)
plt.title("f data")
plt.xlabel("epochs")
plt.ylabel("total error on validation data")
x_range=[x+1 for x in range(0,N_epochs)]
plt.plot(x_range,error_validation_data)
plt.savefig("error_on_val_data.png")
error_on_validation_data_after_training = diff_validation[-1].reshape((1,validation_set.get_N()))
hist=TH1D('hist',"Errors on val data after last training epoch",200,-10000,10000)
fill_hist(hist,error_on_validation_data_after_training[0])
canvas=TCanvas();
hist.GetXaxis().SetTitle("desired Chi^2- outputted Chi^2");
hist.Draw()
canvas.SaveAs('error_on_val_data_hist.png')
readData.py
import numpy as np
import root_numpy
from ROOT import TFile, TH2D, TCanvas
import itertools
def apply_function_to_inputs(x,x_mod,x_max):# python uebergibt alles als reference
#normalize the inputs
for i in range(0,len(x)):
for j in range(0,len(x[i])):
#print "x["+str(i)+"]["+str(j)+"]="+str(x[i][j])
x_mod[i][j]=x[i][j]/x_max[j]
#print "x_mod["+str(i)+"]["+str(j)+"]="+str(x_mod[i][j])
def apply_inverse_function_to_inputs(x,x_mod,x_max):# python uebergibt alles als reference
#re normalize the inputs
for i in range(0,len(x)):
for j in range(0,len(x[i])):
x_mod[i][j]=x[i][j]*x_max[j]
def apply_function_to_outputs(y,y_mod,y_max):# python uebergibt alles als reference
#normalize the outputs
for i in range(0,len(y)):
for j in range(0,len(y[i])):
y_mod[i][j]=y[i][j]/y_max[j]
def apply_inverse_function_to_outputs(y,y_mod,y_max):# python uebergibt alles als reference
#re-normalize the outputs
for i in range(0,len(y)):
for j in range(0,len(y[i])):
y_mod[i][j]=y[i][j]*y_max[j]
class Dataset(object):
def __init__(self,path,hist_name,kind_of_set,function_inputs,function_outputs,full_set,subset):
self._kind_of_set=kind_of_set
"""example
self._x np.ndarray(shape=(N_points,2))
[[ 10. 95.]
[ 10. 100.]
[ 10. 105.]
...,
[ 2490. 1185.]
[ 2490. 1190.]
[ 2490. 1195.]]
self._y np.ndarray(shape=(N_points,1))
[[ 0.00000000e+00]
[ 0.00000000e+00]
[ 0.00000000e+00]
...,
[ 6.34848448e-06]
[ 6.34845946e-06]
[ 6.34848448e-06]]
"""
rfile = TFile(path)
histogram = rfile.Get(hist_name)
#now prepare data for training:
if kind_of_set=="full_set":
N_points=histogram.GetXaxis().GetNbins() * histogram.GetYaxis().GetNbins() #number of points in full_set
self._N=N_points
self._y=np.ndarray(shape=(N_points,1))
self._x=np.ndarray(shape=(N_points,2))
self._y_mod=np.ndarray(shape=(N_points,1)) #function applied to outputs, for example normalized, or a function is applied
self._x_mod=np.ndarray(shape=(N_points,2)) #function applied to inputs
self._y_max=np.ndarray(shape=(1))
self._y_max[0]=0.0
self._x_max=np.ndarray(shape=(2))
self._x_max=np.ndarray(shape=(2))
self._x_max[0]=0.0
self._x_max[1]=0.0
i=0
for x_bin in range(0, histogram.GetXaxis().GetNbins()):
for y_bin in range(0, histogram.GetYaxis().GetNbins()):
self._x[i][0]=histogram.GetXaxis().GetBinCenter(x_bin)
self._x[i][1]=histogram.GetYaxis().GetBinCenter(y_bin)
self._y[i][0]=histogram.GetBinContent(x_bin,y_bin)
for j in range(0,len(self._x[i])):# only in the full_set case the maximum values are calculated
if self._x[i][j]>self._x_max[j]:
self._x_max[j]=self._x[i][j]
for j in range(0,len(self._y[i])):
if self._y[i][j]>self._y_max[j]:
self._y_max[j]=self._y[i][j]
i=i+1
#apply function to inputs and outputs, the function can also be the identity
apply_function_to_inputs(self._x,self._x_mod,self._x_max)
apply_function_to_outputs(self._y,self._y_mod,self._y_max)
elif kind_of_set=="training_set" or kind_of_set=="validation_set" or kind_of_set=="test_set":
self._N = len(subset)#Number of elements of the data set
self._y=np.ndarray(shape=(self._N,1))
self._x=np.ndarray(shape=(self._N,2))
self._y_mod=np.ndarray(shape=(self._N,1))
self._x_mod=np.ndarray(shape=(self._N,2))
self._y_max=full_set.get_y_max()
self._x_max=full_set.get_x_max()
for i in range(0,self._N):
self._x[i][0]=full_set.get_x()[subset[i]][0]
self._x[i][1]=full_set.get_x()[subset[i]][1]
self._y[i][0]=full_set.get_y()[subset[i]][0]
self._x_mod[i][0]=full_set.get_x_mod()[subset[i]][0]
self._x_mod[i][1]=full_set.get_x_mod()[subset[i]][1]
self._y_mod[i][0]=full_set.get_y_mod()[subset[i]][0]
if len(self._x)==0:# If the set has 0 entries the list is empty
self._N_input=-1
else:
self._N_input = len(self._x[0])
if len(self._y)==0:# If the set has 0 entries the list is empty
self._N_output=-1
else:
self._N_output = len(self._y[0])
self._index_in_epoch = 0 #if one has trained 2 mini batches in the epoch already then this is 2*batch_size
self._epochs_completed = 0
def get_N_input_nodes(self):
return self._N_input
def get_N_output_nodes(self):
return self._N_output
def get_N(self):
return self._N
def get_x(self):
return self._x
def get_y(self):
return self._y
def get_x_max(self):
return self._x_max
def get_y_max(self):
return self._y_max
def get_x_mod(self):
return self._x_mod
def get_y_mod(self):
return self._y_mod
def next_batch(self, batch_size, fake_x=False):
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch >= self._N:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = np.arange(self._N)
np.random.shuffle(perm)
self._x = self._x[perm]#shuffle both, actually one would only need to shuffle x_mod and y_mod, but for consistency we shuffle both!
self._y = self._y[perm]
self._x_mod = self._x_mod[perm]
self._y_mod = self._y_mod[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._N #if batch size<= self._N then an exception is thrown!
end = self._index_in_epoch
return self._x_mod[start:end], self._y_mod[start:end], self._epochs_completed
def read_data_set(path,hist_name,kind_of_set,function_inputs,function_outputs,full_set=None,subset=None):
return Dataset(path,hist_name,kind_of_set,function_inputs,function_outputs,full_set,subset)
I have uploaded the corresponding data input file to
https://github.com/kanban1992/GradientDescent_Comparison