How to keep calculated values in a Tensorflow graph (on the GPU)? - tensorflow

How can we make sure that a calculated value will not be copied back to CPU/python memory, but is still available for calculations in the next step?
The following code obviously doesn't do it:
import tensorflow as tf
a = tf.Variable(tf.constant(1.),name="a")
b = tf.Variable(tf.constant(2.),name="b")
result = a + b
stored = result
with tf.Session() as s:
val = s.run([result,stored],{a:1.,b:2.})
print(val) # 3
val=s.run([result],{a:4.,b:5.})
print(val) # 9
print(stored.eval()) # 3 NOPE:
Error : Attempting to use uninitialized value _recv_b_0

The answer is to store the value in a tf.Variable by storing to it using the assign operation:
working code:
import tensorflow as tf
with tf.Session() as s:
a = tf.Variable(tf.constant(1.),name="a")
b = tf.Variable(tf.constant(2.),name="b")
result = a + b
stored = tf.Variable(tf.constant(0.),name="stored_sum")
assign_op=stored.assign(result)
val,_ = s.run([result,assign_op],{a:1.,b:2.})
print(val) # 3
val=s.run(result,{a:4.,b:5.})
print(val[0]) # 9
print(stored.eval()) # ok, still 3

Related

passing panda dataframe data to functions and its not outputting the results

In my code, I am trying to extract data from csv file to use in the function, but it doesnt output anything, and gives no error. My code works because I tried it with just numpy array as inputs. not sure why it doesnt work with panda.
import numpy as np
import pandas as pd
import os
# change the current directory to the directory where the running script file is
os.chdir(os.path.dirname(os.path.abspath(__file__)))
# finding best fit line for y=mx+b by iteration
def gradient_descent(x,y):
m_iter = b_iter = 1 #starting point
iteration = 10000
n = len(x)
learning_rate = 0.05
last_mse = 10000
#take baby steps to reach global minima
for i in range(iteration):
y_predicted = m_iter*x + b_iter
#mse = 1/n*sum([value**2 for value in (y-y_predicted)]) # cost function to minimize
mse = 1/n*sum((y-y_predicted)**2) # cost function to minimize
if (last_mse - mse)/mse < 0.001:
break
# recall MSE formula is 1/n*sum((yi-y_predicted)^2), where y_predicted = m*x+b
# using partial deriv of MSE formula, d/dm and d/db
dm = -(2/n)*sum(x*(y-y_predicted))
db = -(2/n)*sum((y-y_predicted))
# use current predicted value to get the next value for prediction
# by using learning rate
m_iter = m_iter - learning_rate*dm
b_iter = b_iter - learning_rate*db
print('m is {}, b is {}, cost is {}, iteration {}'.format(m_iter,b_iter,mse,i))
last_mse = mse
#x = np.array([1,2,3,4,5])
#y = np.array([5,7,8,10,13])
#gradient_descent(x,y)
df = pd.read_csv('Linear_Data.csv')
x = df['Area']
y = df['Price']
gradient_descent(x,y)
My code works because I tried it with just numpy array as inputs. not sure why it doesnt work with panda.
Well no, your code also works with pandas dataframes:
df = pd.DataFrame({'Area': [1,2,3,4,5], 'Price': [5,7,8,10,13]})
x = df['Area']
y = df['Price']
gradient_descent(x,y)
Above will give you the same output as with numpy arrays.
Try to check what's in Linear_Data.csv and/or add some print statements in the gradient_descent function just to check your assumptions. I would suggest to first of all add a print statement before the condition with the break statement:
print(last_mse, mse)
if (last_mse - mse)/mse < 0.001:
break

Finding loss mask of variable length in keras tensorflow

Trying to build loss function which captures the below functionality, which mask the output values once 'end of sequence' is encountered.
Given a tensor of shape [BatchSize,MaxSequenceLenght,OutputNodes]
Consider the below example
batch size = 3
Max Sequence Length=4
OutputNodes = 3
predicted = [[[0.1,0.3,0.2],[0.4,0.6,0.8],[0.5,0.2,0.3],[0.0,0.0,0.99]],
[[0.1,0.3,0.2],[0.4,0.9,0.8],[0.5,0.2,0.9],[0.4,0.6,0.8]],
[[0.1,0.3,0.2],[0.4,0.9,0.8],[0.5,0.2,0.1],[0.4,0.6,0.1]]]
I am dedicating the last output node to symbolise the 'end of sequence(EOS)' here node=2 . Nodes are labelled as (0, 1 and 2)
Based on the predicted value, I have to return a mask which tries to find the first occurrence of EOS.
In the above example,
first row has following sequence (argmax) => 1,2,0,2
Second row has following sequence => 1,1,2,2
Third row has following sequence => 1,1,9,1
So my mask should be
[[1,0,0,0],
[1,1,0,0],
[1,1,1,1]
The mask will ensure, the values post the EOS is ignored or not considered in calculating the loss.
Below is my code snipped I tried
sequence_cluster_asign = keras.backend.argmax(sequence_values,axis=-1)
loss_mask = []
for seq in K.tf.unstack(sequence_cluster_asign):
##appendEOS- To make sure tf.where is not empty
seq = tf.concat([seq,endOfSequenceTensor],axis=0)
endOfSequenceLocation = K.tf.where(K.tf.equal(seq,endOfSequence))[0][0]
loss_mask.append(tf.sequence_mask(endOfSequenceLocation,max_decoder_seq_length,dtype=tf.float32))
final_mask = K.stack(loss_mask)
Error encountered : ValueError: Cannot infer num from shape (?,?)
If you want to get mask in your question, you can use the following method.
import tensorflow as tf
import keras
from keras import backend as K
sequence_values = K.placeholder(shape=(None, 4, 3))
sequence_cluster_asign = keras.backend.argmax(sequence_values,axis=-1)
# keras version
result = K.cast(K.less(sequence_cluster_asign,sequence_values.get_shape().as_list()[-1]-1),dtype='int32')
result = K.cumprod(result,axis=-1)
# tensorflow version
# result = tf.cast(tf.less(sequence_cluster_asign,sequence_values.get_shape().as_list()[-1]-1),dtype=tf.int32)
# result = tf.cumprod(result,axis=-1)
predicted = [[[0.1,0.3,0.2],[0.4,0.6,0.8],[0.5,0.2,0.3],[0.0,0.0,0.99]],
[[0.1,0.3,0.2],[0.4,0.9,0.8],[0.5,0.2,0.9],[0.4,0.6,0.8]],
[[0.1,0.3,0.2],[0.4,0.9,0.8],[0.5,0.2,0.1],[0.4,0.6,0.1]]]
with tf.Session() as sess:
print(result.eval(feed_dict={sequence_values:predicted}))
[[1 0 0 0]
[1 1 0 0]
[1 1 1 1]]

Are tensorflow random-variables only created once per sess.run?

If I have something like this:
a = tf.random_uniform((1,), dtype=tf.float32)
b = 1 + a
c = 2 + a
Will a be the same or different when calculating b and c?
Every time a sess.run() is executed, different results are generated, as can be seen in the official documentation of tensorflow.
For example, given the following code:
import tensorflow as tf
a = tf.random_uniform((1,), dtype=tf.float32)
b = 1 + a
c = 2 + a
init = tf.global_variables_initializer()
sess = tf.Session()
print(sess.run(a))
print(sess.run(b))
print(sess.run(c))
print(sess.run(a))
It will produce different values of a and hence the values of b will be 1 + a (new generated)
where a(new generated) will be different from a.
Output:
[ 0.13900638] # value of a
[ 1.87361598] # value of b = 1 + 0.87361598(!= a)
[ 2.81547356] # value of c = 2 + 0.81547356(!= a)
[ 0.00705874] # value of a(!= previous value of a)
As answered by #heena bawa
For every sess.run() the values will be re initialised.
To solve for that problem, we initialise the session and call run once. If multiple results are expected then they are passed in a list as such:
import tensorflow as tf
a = tf.random_uniform((1,), dtype=tf.float32)
b = 1 + a
c = 2 + a
init = tf.global_variables_initializer()
with tf.Session() as sess:
print(sess.run([c, b, a]))
output:
[array([2.0236197], dtype=float32), array([1.0236198], dtype=float32), array([0.02361977], dtype=float32)]
# c is 2.023..
# b is 1.023..
# a is 0.023..

Compute value of variable for multiple input values

I have a tensorflow graph which is trained. After training, I want to sample one variable for multiple intermediate values. Simplified:
a = tf.placeholder(tf.float32, [1])
b = a + 10
c = b * 10
Now I want to query c for values of b. Currently, I am using an outer loop
b_values = [0, 1, 2, 3, 4, 5]
samples = []
for b_value in b_values:
samples += [sess.run(c,
feed_dict={b: [b_value]})]
This loop takes quite a bit of time, I think it is because b_values contains 5000 values in my case. Is there a way of running sess.run only once, and passing all b_values at once? I cannot really modify the graph a->b->c, but I could add something to it if that helps.
You could do it as follows:
import tensorflow as tf
import numpy as np
import time
a = tf.placeholder(tf.float32, [None,1])
b = a + 10
c = b * 10
sess = tf.Session()
b_values = np.random.randint(500,size=(5000,1))
samples = []
t = time.time()
for b_value in b_values:
samples += [sess.run(c,feed_dict={b: [b_value]})]
print time.time()-t
#print samples
t=time.time()
samples = sess.run(c,feed_dict={b:b_values})
print time.time()-t
#print samples
Output: (time in seconds)
0.874449968338
0.000532150268555
Hope this helps !

Using Tensorflow for workload distribution

My code looks like:
import tensorflow as tf
N = 16, num_ckfs = 5
init_variances = tf.placeholder(tf.float64, shape=[ num_ckfs, N],name='inital_variances')
init_states = tf.placeholder(tf.float64, shape=[num_ckfs, N], name='init_states')
#some more code
predicted_state = prior_state_expanded + kalman_gain * diff_expanded
error_covariance = sum_cov_cholesky + tf.batch_matmul(kg , kalman_gain, adj_x=True)
projected_output = tf.batch_matmul(predicted_state,input_vectors_extra, adj_y=True)
session = tf.Session()
# read data from input file
init_var = [10 for i in range(N)]
init_var_ckfs = [init_var for i in range(num_ckfs)]
init_state = [0 for i in range(N)]
init_state_ckfs = [init_state for i in range(num_ckfs)]
for timestep in range(10):
out= session.run([projected_output, predicted_state, error_covariance], {init_variances:init_var_ckfs, init_states:init_state_ckfs })
init_state_ckfs = np.array([i.tolist()[0] for i in out[1]])
init_var_ckfs = np.array([i.diagonal().tolist() for i in out[2]])
This code is for running a Cubature Kalman Filter(CKF) in a batched mode. For example:
num_ckfs = 5
means that this code will run 5 CKFs in parallel. Now, what I would like to do is to distribute the workload to multiple nodes depending upon the value of num_ckfs. For example, if I pass num_ckfs as an argument to the code, and it is set to 20,000, then I would distribute the workload to 4 nodes running 5000 each.
I would like to do this using the distributed version of Tensorflow. Can someone please give me some hints on how this could be achieved? Ideally, I should have to execute the code on a single node and it should then get distributed to as many nodes as defined in
tf.train.ClusterSpec