Convolutional Neural Network in Tensorflow with Own Data for Prediction - tensorflow

I am a beginner in CNN and Tensorflow. I am trying to implement convolutional neural network in tensorflow with own data for prediction but I am having some problems. I converted Deep MNIST for Experts tutorials to this. Deep MNIST for Experts is classification, but I am trying to do regression. Another problem is, this code give me accuracy=1 for each step.
What is the cause of the error? How can I convert this code for regression?
Data set:
import tensorflow as tf
import pandas as pandas
from sklearn import cross_validation
from sklearn import preprocessing
from sklearn import metrics
sess = tf.InteractiveSession()
data = pandas.read_csv("tuna.csv")
print("number of samples", number_of_samples)
print("elements_of_one_sample", elements_of_one_sample)
train_x, test_x, train_y, test_y = cross_validation.train_test_split(X, Y, test_size=0.1, random_state=42)
print("train_x.shape=", train_x.shape)
print("train_y.shape=", train_y.shape)
print("test_x.shape=", test_x.shape)
print("test_y.shape=", test_y.shape)
epoch = 0 # counter for number of rounds training network
last_cost = 0 # keep track of last cost to measure difference
max_epochs = 2000 # total number of training sessions
tolerance = 1e-6 # we stop when diff in costs less than that
batch_size = 50 # we batch the data in groups of this size
num_samples = train_y.shape[0] # number of samples in training set
num_batches = int( num_samples / batch_size ) # compute number of batches, given
print("############################## num_samples", num_samples)
print("############################## num_batches", num_batches)
x = tf.placeholder(tf.float32, shape=[None, 16])
y_ = tf.placeholder(tf.float32, shape=[None, 1])
# xW + b
W = tf.Variable(tf.zeros([16,1]))
b = tf.Variable(tf.zeros([1]))
# y = softmax(xW + b)
y = tf.nn.softmax(tf.matmul(x,W) + b)
# lossはcross entropy
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
for n in range( num_batches ):
batch_x = train_x[ n*batch_size : (n+1)*batch_size ]
batch_y = train_y[ n*batch_size : (n+1)*batch_size ] feed_dict={x: batch_x, y_: batch_y} )
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: test_x, y_: test_y}))
# To create this model, we're going to need to create a lot of weights and biases.
# One should generally initialize weights with a small amount of noise for symmetry
# breaking, and to prevent 0 gradients
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
# Since we're using ReLU neurons, it is also good practice to initialize them
# with a slightly positive initial bias to avoid "dead neurons." Instead of doing
# this repeatedly while we build the model, let's create two handy functions
# to do it for us.
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([2, 2, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,4,4,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([2, 2, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([1 * 1 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 1*1*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 1])
b_fc2 = bias_variable([1])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# loss
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# accuracy
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# train
for i in range(20000):
if i%100 == 0:
batch_x = train_x[ n*batch_size : (n+1)*batch_size ]
batch_y = train_y[ n*batch_size : (n+1)*batch_size ]
train_accuracy = accuracy.eval(feed_dict={x:batch_x, y_: batch_y, keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy)){x: batch_x, y_: batch_y, keep_prob: 0.5})
# result
print("test accuracy %g"%accuracy.eval(feed_dict={
x: test_x, y_: test_y, keep_prob: 1.0}))
number of samples 1250
elements_of_one_sample 16
train_x.shape= (1125, 16)
train_y.shape= (1125, 1)
test_x.shape= (125, 16)
test_y.shape= (125, 1)
############################## num_samples 1125
############################## num_batches 22
step 0, training accuracy 1
step 100, training accuracy 1
step 200, training accuracy 1
step 300, training accuracy 1
step 400, training accuracy 1
step 19500, training accuracy 1
step 19600, training accuracy 1
step 19700, training accuracy 1
step 19800, training accuracy 1
step 19900, training accuracy 1
test accuracy 1
I am quite new to neural nets and machine learning so pardon me for any mistakes, thanks in advance.

You've got a loss function of cross entropy, which is a loss function specifically designed for classification. If you want to do regression, you need to start with a loss function that penalizes prediction error (L2 error is a great place to start).
For prediction, the rightmost layer of the network needs to have linear units (no activation function). The number of neurons in the rightmost layer should correspond to the number of values you're predicting (if it's a simple regression problem where you're predicting a single value of y given a vector of inputs x, then you just need a single neuron in the right-most layer). Right now, you've got a softmax layer on the back end of the network, which is also specifically used for classification tasks.
Basically - you need to swap your softmax for a linear neuron and change your loss function to something like L2 error (aka mean-squared error).


tensorflow - nn =>accuracy print error

i use nn using the tensorflow.
multiful input => linear regression .
i'm not exactly tensorflow example..
just i wannna success this example becuase of just checking.
( input data is fruit & water & vegetable
output value is real number(concentration)
So, i think this example is similar.
if you have more good example, please give me .. thank you.
if this source print accuracy , this have a error.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import learn
from sklearn.model_selection import train_test_split
boston = learn.datasets.load_dataset('boston')
x, y =,
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.6, random_state=42)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 10
display_step = 1
dropout_rate = 0.9
# Network Parameters
n_hidden_1 = 32 # 1st layer number of features
n_hidden_2 = 200 # 2nd layer number of features
n_hidden_3 = 200
n_hidden_4 = 256
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input기
x = tf.placeholder("float", [None,13])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Hidden layer with RELU activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
# Hidden layer with RELU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, 0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], 0, 0.1))
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, 0.1)),
'b4': tf.Variable(tf.random_normal([n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(tf.transpose(pred)-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# # Initializing the variables
# init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p =[optimizer, cost, pred], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
print ("[*]----------------------------")
for i in range(3):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
You compute accuracy outside of the session.
Move it under with tf.Session() as sess:.

TensorFlow: Why do parameters not update when GradientDescentOptimizer train step is run?

When I run the following code, it prints a constant loss at every training step; I also tried printing the parameters, which also do not change.
I can't seem to figure out why train_step, which uses a GradientDescentOptimizer, doesnt change the weights in W_fc1, b_fc1, W_fc2, and b_fc2.
I'm a beginner to machine learning so I might be missing something obvious.
(An answer for a similar question was that weights should not be initialized at zero, but the weights here are initialized with truncated normal so that cant be the problem).
import tensorflow as tf
import numpy as np
import csv
import random
with open('wine_data.csv', 'rb') as csvfile:
input_arr = list(csv.reader(csvfile, delimiter=','))
for i in range(len(input_arr)):
input_arr[i][0] = int(input_arr[i][0]) - 1 # 0 index for one hot
for j in range(1, len(input_arr[i])):
input_arr[i][j] = float(input_arr[i][j])
training_data = np.array(input_arr[:2*len(input_arr)/3]) # train on first two thirds of data
testing_data = np.array(input_arr[2*len(input_arr)/3:]) # test on last third of data
x_train = training_data[0:, 1:]
y_train = training_data[0:, 0]
x_test = testing_data[0:, 1:]
y_test = testing_data[0:, 0]
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
x = tf.placeholder(tf.float32, shape=[None, 13], name='x')
y_ = tf.placeholder(tf.float32, shape=[None], name='y_')
y_one_hot = tf.one_hot(tf.cast(y_, tf.int32), 3) # actual y values
W_fc1 = weight_variable([13, 128])
b_fc1 = bias_variable([128])
fc1 = tf.matmul(x, W_fc1)+b_fc1
W_fc2 = weight_variable([128, 3])
b_fc2 = bias_variable([3])
y = tf.nn.softmax(tf.matmul(fc1, W_fc2)+b_fc2)
cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(labels=y_one_hot, logits=y))
train_step = tf.train.GradientDescentOptimizer(1e-17).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_one_hot,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.InteractiveSession()
for _ in range(1000):{x: x_train, y_: y_train})
if _%10 == 0:
loss = cross_entropy.eval(feed_dict={x: x_train, y_: y_train})
print('step', _, 'loss', loss)
Thanks in advance.
From the official tensorflow documentation:
WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. Do not call this op with the output of softmax, as it will produce incorrect results.
Remove the softmax on y before feeding it into tf.nn.softmax_cross_entropy_with_logits
Also set your learning rate to something higher (like 3e-4)

Tensorflow weight initialization

Regarding the MNIST tutorial on the TensorFlow website, I ran an experiment (gist) to see what the effect of different weight initializations would be on learning. I noticed that, against what I read in the popular [Xavier, Glorot 2010] paper, learning is just fine regardless of weight initialization.
The different curves represent different values for w for initializing the weights of the convolutional and fully connected layers. Note that all values for w work fine, even though 0.3 and 1.0 end up at lower performance and some values train faster - in particular, 0.03 and 0.1 are fastest. Nevertheless, the plot shows a rather large range of w which works, suggesting 'robustness' w.r.t. weight initialization.
def weight_variable(shape, w=0.1):
initial = tf.truncated_normal(shape, stddev=w)
return tf.Variable(initial)
def bias_variable(shape, w=0.1):
initial = tf.constant(w, shape=shape)
return tf.Variable(initial)
Question: Why does this network not suffer from the vanishing or exploding gradient problem?
I would suggest you read the gist for implementation details, but here's the code for reference. It took approximately an hour on my Nvidia 960m, although I imagine it could also run on a CPU within reasonable time.
import time
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
from tensorflow.python.client import device_lib
import numpy
import matplotlib.pyplot as pyplot
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# Weight initialization
def weight_variable(shape, w=0.1):
initial = tf.truncated_normal(shape, stddev=w)
return tf.Variable(initial)
def bias_variable(shape, w=0.1):
initial = tf.constant(w, shape=shape)
return tf.Variable(initial)
# Network architecture
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def build_network_for_weight_initialization(w):
""" Builds a CNN for the MNIST-problem:
- 32 5x5 kernels convolutional layer with bias and ReLU activations
- 2x2 maxpooling
- 64 5x5 kernels convolutional layer with bias and ReLU activations
- 2x2 maxpooling
- Fully connected layer with 1024 nodes + bias and ReLU activations
- dropout
- Fully connected softmax layer for classification (of 10 classes)
Returns the x, and y placeholders for the train data, the output
of the network and the dropbout placeholder as a tuple of 4 elements.
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1,28,28,1])
W_conv1 = weight_variable([5, 5, 1, 32], w)
b_conv1 = bias_variable([32], w)
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64], w)
b_conv2 = bias_variable([64], w)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024], w)
b_fc1 = bias_variable([1024], w)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10], w)
b_fc2 = bias_variable([10], w)
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
return (x, y_, y_conv, keep_prob)
# Experiment
def evaluate_for_weight_init(w):
""" Returns an accuracy learning curve for a network trained on
10000 batches of 50 samples. The learning curve has one item
every 100 batches."""
with tf.Session() as sess:
x, y_, y_conv, keep_prob = build_network_for_weight_initialization(w)
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
lr = []
for _ in range(100):
for i in range(100):
batch = mnist.train.next_batch(50){x: batch[0], y_: batch[1], keep_prob: 0.5})
assert mnist.test.images.shape[0] == 10000
# This way the accuracy-evaluation fits in my 2GB laptop GPU.
a = sum(
x: mnist.test.images[2000*i:2000*(i+1)],
y_: mnist.test.labels[2000*i:2000*(i+1)],
keep_prob: 1.0})
for i in range(5)) / 5
return lr
ws = [0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0]
accuracies = [
[evaluate_for_weight_init(w) for w in ws]
for _ in range(3)
# Plotting results
pyplot.ylim(0.9, 1)
pyplot.xlabel('batch (x 100)')
pyplot.ylabel('test accuracy')
Weight initialization strategies can be an important and often overlooked step in improving your model, and since this is now the top result on Google I thought it could warrant a more detailed answer.
In general, the total product of each layer's activation function gradient, number of incoming/outgoing connections (fan_in/fan_out), and variance of weights should be equal to one. This way, as you backpropagate through the network the variance between input and output gradients will stay consistent, and you won't suffer from exploding or vanishing gradients. Even though ReLU is more resistant to exploding/vanishing gradients, you might still have problems.
tf.truncated_normal used by OP does a random initialization which encourages weights to be updated "differently", but does not take the above optimization strategy into account. On smaller networks this might not be a problem, but if you want deeper networks, or faster training times, then you are best trying a weight initialization strategy based on recent research.
For weights preceding a ReLU function you could use the default settings of:
for tanh/sigmoid activated layers "xavier" might be more appropriate:
More details on both these functions and associated papers can be found at:
Beyond weight initialization strategies, further optimization could explore batch normalization:
Logistic functions are more prone to vanishing gradient, because their gradients are all <1, so the more of them you multiply during back-propagation, the smaller your gradient becomes (and quite quickly), whereas RelU has a gradient of 1 on the positive part, so it does not have this problem.
Also, you network is not at all deep enough to suffer from that.

'numpy.ndarray' object has no attribute 'train'

how can i solve this ? this is my first time for Tensortflow. I try to copy Train and Evaluate the Model from tensortflow tutorial but it seem not work. Can someone help me to solve my problem? Thanks!
import tensorflow as tf
sess = tf.InteractiveSession()
import numpy as np
from numpy import genfromtxt
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 3*3, 1], padding='VALID')
data = genfromtxt('circle_deeplearn_data_small.txt',delimiter=',')
out = genfromtxt('circle_deeplearn_output_small.txt',delimiter=',')
x = tf.placeholder(tf.float32, shape =[None, 3*3*15]) # size of x
y_ = tf.placeholder(tf.float32, shape =[None, 1]) # size of output
W_conv1 = weight_variable([1,3*3,1,15])
b_conv1 = bias_variable([15])
x_image = tf.reshape(x,[-1,1,3*3*15,1])
h_conv1 = tf.nn.relu(conv2d(x_image,W_conv1) + b_conv1)
W_fc1 = weight_variable([1 * 1 * 15 , 1])
b_fc1 = bias_variable([1])
h_conv1_flat = tf.reshape(h_conv1 , [-1,1 * 1 * 15])
h_fc1 = tf.nn.relu(tf.matmul(h_conv1_flat , W_fc1) + b_fc1)
y_conv = h_fc1
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
for i in range(20000):
batch = data.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy)){x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy %g"%accuracy.eval(feed_dict={x: data, y_: out, keep_prob: 1.0}))
This is result:
AttributeError: 'numpy.ndarray' object has no attribute 'train'
here datais just a numpy array. You may need to write ur own train data iterator
It is not quite clear what you are trying to do. The problem occurs because data is a numpy array generated in this line
data = genfromtxt('circle_deeplearn_data_small.txt',delimiter=',')
The error occurs when you try to use the method train of data, which does not exist, in the following line
batch = data.train.next_batch(50)
Instead you need to feed data to tensorflow.
I have faced same problem. Actually, it's not a problem. Literally, I didn't know the structure of the data that's why I have faced this problem. Te datasets comes from tensorflow lib are compressed in a single file and separated in a file as train, test, and validation set. That's why when we call dataset.train.next_batch() it does work. You own datatset is not compressed in the same way that's why it doesn't work. You have to configure your dataset on the own way so do the batch system and looping.
You may try to use numpy.reshape to turn your data from 2 dimension into 3 dimension.
For example if you had 20 samples and 100 features, so a (20,100) data matrix and used a minibatch size of 5. Then you could reshape using np.reshape(data,[10,5,-1]) to get a (10,5,40) matrix.
*The "-1" meaning that you leave numpy to count the array for your, the total number of array is 20,000.
Thus, in this example: 10*5*40 = 20,000.

Optimize input image with class prior

I'm trying to implement the first part of the google blog entry
Inceptionism: Going Deeper into Neural Networks in TensorFlow. So far I have found several resources that either explain it in natural language or focus on other parts or give code snippets for other frameworks. I understand the idea of optimizing a random input image with respect to a class prior and also the maths behind it given in the this paper, section 2, but I'm not able to implement it myself using TensorFlow.
From this SO question and the helpful comment by etarion, I now know that you can give a list of variables to the optimizer, while all other variables are untouched. However, when giving the optimizer a random image as a variable leads to
File "", line 101, in main
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(-cost, var_list=[rnd_img])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/", line 198, in minimize
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/", line 309, in apply_gradients
ValueError: No gradients provided for any variable: ((None,<tensorflow.python.ops.variables.Variable object at 0x7feac1870410>),)
For testing purpose I used a stripped down MNIST example. I tried to keep it as short as possible while still being readable and executable:
def main():
# parameters
learning_rate = 0.001
train_batches = 1000
batch_size = 128
display_step = 50
# net parameters
n_input = 784 #28x28
n_classes = 10
keep_prob = 0.75
weights = {
'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32])),
'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64])),
'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024])),
'out': tf.Variable(tf.truncated_normal([1024, n_classes]))
biases = {
'bc1': tf.Variable(tf.constant(0.1, shape=[32])),
'bc2': tf.Variable(tf.constant(0.1, shape=[64])),
'bd1': tf.Variable(tf.constant(0.1, shape=[1024])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
# tf inputs
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32)
# create net
net = create_net(x, weights, biases, keep_prob)
# define loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y))
# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# evaluation
pred_correct = tf.equal(tf.argmax(net, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(pred_correct, tf.float32))
print "loading mnist data"
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
sess = tf.Session()
for i in xrange(train_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size), feed_dict={x: batch_x, y: batch_y, dropout: keep_prob})
if i % display_step == 0:
loss, acc =[cost, accuracy], feed_dict={x: batch_x, y: batch_y, dropout: 1.0})
print "batch: %i, loss: %.5f, accuracy: %.5f" % (i, loss, acc)
acc =, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout: 1.0})
print "test accuracy: %.5f" % (acc)
# ====== this is where the reconstruction begins =====
rnd_img = tf.Variable(tf.random_normal([1, n_input]))
one_hot = np.zeros(10)
one_hot[4] = 1;
# the next line causes the error
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(-cost, var_list=[rnd_img])
for i in xrange(1000):, feed_dict={x: rnd_img, y: one_hot, dropout: 1.0})
if __name__ == "__main__":
The helper functions I used:
def create_net(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d_relu(x, weights['wc1'], biases['bc1'])
conv1 = maxpool2d(conv1, 2)
conv2 = conv2d_relu(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, 2)
fc1 = fullyconnected_relu(conv2, weights['wd1'], biases['bd1'])
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
def conv2d_relu(x, W, b, stride=1):
conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
conv = tf.nn.bias_add(conv, b)
return tf.nn.relu(conv)
def maxpool2d(x, k=2, stride=2, padding='VALID'):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1], padding=padding)
def fullyconnected_relu(x, W, b):
fc = tf.reshape(x, [-1, W.get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc, W), b)
fc = tf.nn.relu(fc)
I've found some sources saying that this error occurs when there is no path within the computation graph between the output and the variables to be optimize, but I don't see why this should be the case here.
My questions are:
Why isn't the optimizer able to apply any gradients?
Is this the right way to go in order to implement the visualization of a class?
Thanks in advance.
Here is the complete code again, after incorporation of the accepted answer (for anyone who is interested). Anyway, the results are still not as expected, as the script basically produces random images after 100000 rounds of reconstruction. Ideas are welcome.
import tensorflow as tf
import numpy as np
def conv2d_relu(x, W, b, stride=1):
conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
conv = tf.nn.bias_add(conv, b)
return tf.nn.relu(conv)
def maxpool2d(x, k=2, stride=2, padding='VALID'):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1], padding=padding)
def fullyconnected_relu(x, W, b):
fc = tf.reshape(x, [-1, W.get_shape().as_list()[0]])
fc = tf.add(tf.matmul(fc, W), b)
fc = tf.nn.relu(fc)
return fc;
def create_net(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d_relu(x, weights['wc1'], biases['bc1'])
conv1 = maxpool2d(conv1, 2)
conv2 = conv2d_relu(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, 2)
fc1 = fullyconnected_relu(conv2, weights['wd1'], biases['bd1'])
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
def save_image(img_data, name):
img = img_data.reshape(28,28)
mi = np.min(img)
ma = np.max(img)
img = (img-mi)/(ma-mi), img)
def main():
# parameters
learning_rate = 0.001
train_batches = 1000
batch_size = 100
display_step = 50
# net parameters
n_input = 784 #28x28
n_classes = 10
keep_prob = 0.75
weights = {
'wc1': tf.Variable(tf.truncated_normal([5, 5, 1, 32])),
'wc2': tf.Variable(tf.truncated_normal([5, 5, 32, 64])),
'wd1': tf.Variable(tf.truncated_normal([7*7*64, 1024])),
'out': tf.Variable(tf.truncated_normal([1024, n_classes]))
biases = {
'bc1': tf.Variable(tf.constant(0.1, shape=[32])),
'bc2': tf.Variable(tf.constant(0.1, shape=[64])),
'bd1': tf.Variable(tf.constant(0.1, shape=[1024])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
# tf inputs
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32)
# create net
net = create_net(x, weights, biases, dropout)
# define loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y))
# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# evaluation
pred_correct = tf.equal(tf.argmax(net, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(pred_correct, tf.float32))
print "loading mnist data"
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
sess = tf.Session()
for i in xrange(train_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size), feed_dict={x: batch_x, y: batch_y, dropout: keep_prob})
if i % display_step == 0:
loss, acc =[cost, accuracy], feed_dict={x: batch_x, y: batch_y, dropout: 1.0})
print "batch: %i, loss: %.5f, accuracy: %.5f" % (i, loss, acc)
acc =, feed_dict={x: mnist.test.images, y: mnist.test.labels, dropout: 1.0})
print "test accuracy: %.5f" % (acc)
# reconstruction part
rnd_img = tf.Variable(tf.random_normal([1, n_input]))
one_hot = np.zeros((1, 10))
one_hot[0,1] = 1;
net2 = create_net(rnd_img, weights, biases, dropout)
cost2 = -tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net2, y))
optimizer2 = tf.train.AdamOptimizer(learning_rate).minimize(cost2, var_list=[rnd_img])
init_var_list = []
for var in tf.all_variables():
if(not tf.is_variable_initialized(var).eval(session=sess)):
save_image(rnd_img.eval(sess), "bevor.tiff")
for i in xrange(100000):
_, loss =[optimizer2, cost2], feed_dict={y: one_hot, dropout: 1.0})
if(i%10000 == 0):
cur_img = rnd_img.eval(session=sess)
print "loss:", loss, "mi:", np.min(cur_img), "ma:", np.max(cur_img)
save_image(rnd_img.eval(sess), "after.tiff")
if __name__ == "__main__":
Some explanation: After rebuilding the graph with the new input variable and optimizer, I had to initialize the new variables, i.e. the rnd_img and some helper variables used by the Adam optimizer, hence the loop over all_variables() and checking for initialization status. If somebody knows a more elegant way, let me know. Or maybe that's the reason why I don't get any results?
The rnd_img needs to part of the graph that you optimize. In your case, you just create a variable and tell the optimizer to optimize it, but the variable is not connected to the loss in the graph. What you can for example do is use another call to create_net with rnd_image instead of x (but using the same weights!), create the cost for that and then create a minimization op for that cost. Then for optimization you only feed in y.