I am trying to estimate the forward pass and the backword gradient of the function below:
def func(img-batch, X1,X2):
L=1
A1 = X1*L**2
A2 = X2*L**2
AA1 = A1*A1
AA2 = A2*A2
A11A2 = A1*A2
v = tf.nn.conv2d(img-batch, A1A2, strides=[1, 1, 1, 1], padding='SAME')
v = v+ AA1+AA2
return v
When I add this function to the network, the gradient will be performed on each instruction of the function by default.
How can I use this function and calculate it in the forward pass, in the meantime ignoring the gradient of each instruction in the function and provide other gradient estimation and add it to the main gradient of the model?
You can use py_func to ignore the gradients in this function, and use gradient_override_map to provide customized gradients. Here is an example:
import tensorflow as tf
def myfunc(X1, X2):
L = 1
A1 = X1 * L**2
A2 = X2 * L**2
AA1 = A1 * A1
AA2 = A2 * A2
A11A2 = A1 * A2
...
v = AA1 + AA2 + A11A2
return v
#tf.RegisterGradient("GradMyfunc")
def grad_myfunc(op, grad):
X1 = op.inputs[0]
X2 = op.inputs[1]
return [grad * X2, grad * X1]
X1 = tf.Variable(tf.constant(1.1, dtype=tf.float64))
X2 = tf.Variable(tf.constant(2.2, dtype=tf.float64))
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": "GradMyfunc"}):
y = tf.py_func(myfunc, [X1, X2], [tf.float64])
with tf.Session() as sess:
grad = tf.gradients(y, [X1, X2])
sess.run(tf.global_variables_initializer())
print(sess.run(y))
print(sess.run(grad))
Related
I am learning RNN through https://medium.com/#erikhallstrm/hello-world-rnn-83cd7105b767. I change the loss function to mean square error and found it does not converge. The output is stuck at 0.5. Somehow, I feel the mistake is inside
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
But I don't how. I am not familiar with datatype. This may be a silly question. In case I don't make myself clear, the full code is below:
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 1
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
def generateData():
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
return (x, y)
tf.reset_default_graph()
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)
# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1)
labels_series = tf.unstack(batchY_placeholder, axis=1)
# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
current_input = tf.reshape(current_input, [batch_size, 1])
input_and_state_concatenated = tf.concat([current_input, current_state],axis=1) # Increasing number of columns
next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition
states_series.append(next_state)
current_state = next_state
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
#Loss function HERE
midlosses = [tf.squeeze(logits)-tf.squeeze(labels) for logits, labels in zip(logits_series,labels_series)]
losses = tf.square(midlosses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
for epoch_idx in range(num_epochs):
x,y = generateData()
_current_state = np.zeros((batch_size, state_size))
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state,_logits_series,_midlosses = sess.run(
[total_loss, train_step, current_state,logits_series,midlosses],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
if batch_idx%100 == 0:
print("Step",batch_idx, "Loss", _total_loss)
Just need to replace
logits_series = [tf.matmul(state, W2) + b2 for state in states_series]
by
logits_series = [tf.squeeze(tf.matmul(state, W2) + b2) for state in states_series] #Broadcasted addition
Problem can solved.
I would like to feed a placeholder defined in a function. The following is an simplified example.
#!/usr/bin/python
import tensorflow as tf
def CreateInference():
x2 = tf.placeholder(tf.float32, (None))
w2 = tf.get_variable('w2', initializer=1.0)
b2 = tf.get_variable('b2', initializer=2.0)
y2 = w2 * x2 + b2
y2 = CreateInference()
writer = tf.summary.FileWriter('./graphs', tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# print (sess.run(y2, feed_dict={x2:2.0}))
writer.close()
The graph is correctly created as shown in the following Tensorboard graph.
The problem is that feed_dict={x2:2.0} doesn't work, since x2 is a local variable used within the function CreateInference. Could anyone please tell me how to access and feed values for the variable x2 in the above example?
Why not do the obvious and return references of the objects
#!/usr/bin/python
import tensorflow as tf
def CreateInference():
x2 = tf.placeholder(tf.float32, (None))
w2 = tf.get_variable('w2', initializer=1.0)
b2 = tf.get_variable('b2', initializer=2.0)
y2 = w2 * x2 + b2
return x2, y2
x2, y2 = CreateInference()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print (sess.run(y2, feed_dict={x2:2.0}))
I want to understand that how l2 regularization is implement here. In l2 regularization we add a square of weights to the loss function. But in this code we are also adding bias term. Why is it so?
`x = tf.placeholder(tf.float32, [None, nPixels])
W1 = tf.Variable(tf.random_normal([nPixels, nNodes1], stddev=0.01))
b1 = tf.Variable(tf.zeros([nNodes1])
y1 = tf.nn.sigmoid(tf.matmul(x, W1) + b1)
W2 = tf.Variable(tf.random_normal([nNodes1, nLabels], stddev=0.01))
b2 = tf.Variable(tf.zeros([nLabels]))
y = tf.matmul(y1, W2) + b2
y_ = tf.placeholder(tf.float32, [None, nLabels])
l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(b1) + tf.nn.l2_loss(W2) +
tf.nn.l2_loss(b2)
cross_entropy =
tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y))
regularized_cross_entropy = cross_entropy + beta * l2_loss`
This bias is not the same we used in l2 regularization.
This is the bias we add in neural network to keep the value not equal to zero.
I have been trying for a while to implement sampled softmax because I have half a million output classes.
I have tried to follow the official documentation exactly, but I always get an error. This is my code:
def forward_propagation_sampled(X, parameters):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.add(tf.matmul(W1, X), b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2,A1), b2)
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3,A2), b3)
return Z3, W3, b3
This is the cost computation function:
def compute_cost(Z3, W3, b3, Y, mode):
Z3.set_shape([1144,1])
if mode == "train":
loss = tf.nn.sampled_softmax_loss(
weights=tf.transpose(W3),
biases=tf.Variable(b3),
labels = tf.reshape(tf.argmax(Y, 1), [-1,1]), #Since Y is one hot encoded
inputs=tf.Variable(initial_value=Z3,dtype=tf.float32, expected_shape=[1144,1]),
num_sampled = 2000,
num_classes = 1144,
partition_strategy="div"
)
elif mode == "eval":
logits = tf.matmul(inputs, tf.transpose(weights))
logits = tf.nn.bias_add(logits, biases)
labels_one_hot = tf.one_hot(labels, n_classes)
loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_one_hot,logits=logits)
cost = tf.reduce_mean(loss)
return cost
For the purpose of just testing this out, I am using 1144 output classes, which would otherwise scale to 500,000. There are 3144 training examples.
I get this error:
Shape must be rank 1 but is rank 2 for 'sampled_softmax_loss/Slice_1' (op: 'Slice') with input shapes: [3144,1], [1], [1].
I am unable to debug this or make any sense out of it. Any help would be really appreciated.
import tensorflow as tf
import numpy as np
#Constant Declaration
LEARNING_RATE = 0.05
LEARNING_TIME = 10000
FILE_NAME = 'xor'
# Input Data Declaration
xy = np.loadtxt(FILE_NAME+'_data_set.txt',unpack=True,dtype='float32',delimiter=',')
x_data = np.transpose(xy[0:-1])
y_data = np.transpose(xy[-1])
print x_data
# Declaration Part
X = tf.placeholder(dtype = tf.float32,name="X-input")
Y = tf.placeholder(dtype = tf.float32,name="Y-input")
W1 = tf.Variable(tf.random_uniform([2,2],-1.0,1.0), name="Weight_1")
W2 = tf.Variable(tf.random_uniform([2,1],-1.0,1.0), name="Weight_2")
b1 = tf.Variable(tf.zeros([2]), name = 'Bias1')
b2 = tf.Variable(tf.zeros([1]), name = 'Bias2')
# Formula Part
with tf.name_scope("Layer1") as scope:
L1 = tf.sigmoid(tf.matmul(X,W1) + b1)
with tf.name_scope("Layer2") as scope:
hypothesis = tf.sigmoid(tf.matmul(L1,W2) + b2)
with tf.name_scope("Cost") as scope:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
cost_summ = tf.scalar_summary("cost",cost)
# Minimizing Part
a = tf.Variable(LEARNING_RATE)
with tf.name_scope("train") as scope:
optimizer = tf.train.GradientDescentOptimizer(a)
train = optimizer.minimize(cost)
# Initializing Part
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
merged = tf.merge_all_summaries()
writer = tf.train.SummaryWriter('./logs/'+FILE_NAME,sess.graph_def)
# Running Part
for step in range(LEARNING_TIME):
sess.run(train,feed_dict = {X:x_data,Y:y_data})
if step % 20 == 0:
print step, sess.run(cost,feed_dict = {X:x_data,Y:y_data}), sess.run(W2)
summary = sess.run(merged, feed_dict = {X:x_data,Y:y_data})
writer.add_summary(summary, step)
#Test Part
correction = tf.equal(tf.floor(hypothesis + 0.5),Y)
accuracy = tf.reduce_mean(tf.cast(correction,'float'))
print sess.run([hypothesis,tf.floor(hypothesis + 0.5),correction,accuracy], feed_dict = {X:x_data,Y:y_data})
Above is my tensorflow code for solving xor logic. But problem is that accuracy is just 50 percent. And cost Converge to 0.69321.
Actually I have seen lots of code that solves xor implemented in tensorflow, I can't find what is wrong.
belows are images indicate how my code works.