'DataFrame' object has no attribute 'train' - tensorflow

Please help me where is my missing? why I always get this error:
'DataFrame' object has no attribute 'train'
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv("all.csv")
x = dataset.iloc[:, 1:51].values
y = dataset.iloc[:, 51].values
time_steps=5
num_units=128
n_input=50
learning_rate=0.001
n_classes=2
batch_size=5
#weights and biases of appropriate shape to accomplish above task
out_weights=tf.Variable(tf.random_normal([num_units,n_classes]))
out_bias=tf.Variable(tf.random_normal([n_classes]))
#defining placeholders
#input image placeholder
x=tf.placeholder("float",[None,time_steps,n_input])
#input label placeholder
y=tf.placeholder("float",[None,n_classes])
#processing the input tensor from [batch_size,n_steps,n_input] to
"time_steps"
number of [batch_size,n_input] tensors
input=tf.unstack(x ,time_steps,1)
#defining the network
lstm_layer=rnn.BasicLSTMCell(num_units,forget_bias=1)
outputs,_=rnn.static_rnn(lstm_layer,input,dtype="float32")
#converting last output of dimension [batch_size,num_units] to
[batch_size,n_classes] by out_weight multiplication
prediction=tf.matmul(outputs[-1],out_weights)+out_bias
#loss_function
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
#optimization
opt=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
#model evaluation
correct_prediction=tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
#initialize variables
init=tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
iter=1
while iter<800:
batch_x,batch_y=dataset.train.next_batch(batch_size=batch_size)
batch_x=batch_x.reshape((batch_size,time_steps,n_input))
sess.run(opt, feed_dict={x: batch_x, y: batch_y})
if iter %10==0:
acc=sess.run(accuracy,feed_dict={x:batch_x,y:batch_y})
los=sess.run(loss,feed_dict={x:batch_x,y:batch_y})
print("For iter ",iter)
print("Accuracy ",acc)
print("Loss ",los)
print("__________________")
iter=iter+1

As the error states, your pandas "DataFrame" object has no attribute/method called "next_batch".
You probably followed a tutorial that used Tensorflow helper methods to load the MNIST database maybe. But pandas return a different object than the "DataSet" class you are expecting.

Related

numpy method for tensors in TensorFlow 2.x and eager execution

Using TensorFlow 2.4.1 on colab
Running this code below:
import tensorflow as tf
from tensorflow.keras.datasets import cifar100
import numpy as np
(train_data, train_labels), (test_data, test_labels) = cifar100.load_data(label_mode='fine')
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
for (train, label) in train_dataset.take(1):
print(label)
print(label.numpy()[0])
# tf.Tensor([19], shape=(1,), dtype=int64)
# 19
This is all fine but when trying this with the filter method for keras.Dataset objects in the code below, the numpy method does not work:
def filter_classes(dataset, classes):
def match_class(data, label):
print(label)
print(label.numpy()[0])
return label.numpy()[0] in classes
return dataset.filter(match_class)
cifar_classes = [0, 29, 99]
train_dataset = filter_classes(train_dataset, cifar_classes)
# Tensor("args_1:0", shape=(1,), dtype=int64)
# AttributeError: 'Tensor' object has no attribute 'numpy'
From reading some of the related questions, the error seems to be due to the latter tensor not being eager executed.
Does the attribute in the tensor "arg_1:0" rather than being a numpy array signify that the tensor has not been evaluated?
With the filter method, is it by design that the tensors within the dataset objects do not get evaluated eagerly?
Thanks.
tf.data.Dataset functions do not run in EagerMode for performance reasons. You can't use the .numpy method in a function used by a tf.data.Dataset.
In your case, you can use a combination of tf.math.equal and tf.math.reduce_any to filter your dataset and keep only the desired classes:
ds_filtered = train_dataset.filter(lambda x:tf.math.reduce_any(tf.equal(x,cifar_classes)))

How can I print output (tensor values, shapes) in gpflow?

I am trying to develop a new model within gpflow. In order to debug it I need to know shapes and values of tensors during execution of the graph.
I tried the below based on printing tensor values in tensorflow, but nothing is printed to the console.
import numpy as np
import sys
import gpflow
from gpflow.mean_functions import MeanFunction
from gpflow.decors import params_as_tensors
class Log(MeanFunction):
"""
:math:`y_i = \log(x_i)`
"""
def __init__(self):
MeanFunction.__init__(self)
#params_as_tensors
def __call__(self, X):
# I want to figure out the shape of X here
tf.print(tf.shape(X), output_stream=sys.stdout)
# Returns the natural logarithm of the input
return tf.log(X)
# Test gpflow implementation
sess = tf.InteractiveSession()
with sess.as_default(), sess.graph.as_default():
X = np.random.uniform(size=[100, 1])
y = np.random.uniform(size=[100, 1])
m = gpflow.models.GPR(X=X, Y=y, mean_function=Log(), kern=gpflow.kernels.RBF(input_dim=1))
You're on the right track. According to the TensorFlow docs [1], you need to wrap tf.print() in a tf.control_dependencies() context manager to make sure it's run, when in graph model. GPflow currently works in graph model. GPflow 2.0, which is indevelopment, will allow usage in eager mode.
#params_as_tensors
def __call__(self, X):
# I want to figure out the shape of X here
print_op = tf.print(tf.shape(X), output_stream=sys.stdout)
with tf.control_dependencies([print_op]):
log_calc = tf.log(X)
# Returns the natural logarithm of the input
return log_calc
[1] https://www.tensorflow.org/api_docs/python/tf/print

TensorFlow returns the same value when using numpy random function

I am using Tensorflow with numpy random function, but the output is the same value. How can I generate different values? You may recommend use native tf random functions, but I need to use numpy random function.
import tensorflow as tf
import random
def get_rand():
return random.randint(0,5)
a = get_rand()
tfprint = tf.Print(a, [a])
for i in range(10):
print(print(get_rand()))
with tf.Session() as sess:
for i in range(10):
sess.run(tfprint)
With tf.py_func, turn the Numpy function to Tensorflow function.
import tensorflow as tf
import random
def get_rand():
return random.randint(0,5)
a = tf.py_func(get_rand, [], tf.int64)
tfprint = tf.Print(a, [a])
for i in range(10):
print(get_rand())
with tf.Session() as sess:
for i in range(10):
sess.run(tfprint)
You need to feed data using placeholders and the feed_dict variable:
import tensorflow as tf
import random
def get_rand():
return random.randint(0,5)
a = tf.placeholder(tf.int32)
tfprint = tf.Print(a, [a])
with tf.Session() as sess:
for i in range(10):
sess.run(tfprint, feed_dict={a: get_rand()})
You may read more about placeholders here

Tensorflow data import

I just started to use tensorflow, but I failed to import the data properly to use with the DNNClassifier. I actually have two files in the hdf5 format, that I import with pandas. The feature vector has dimension 100 and there are 5 classes where the features can belong to. If I use for example the following code:
import pandas as pd
import numpy as np
import tensorflow as tf
#Data
train = pd.read_hdf("train.h5", "train")
test = pd.read_hdf("test.h5", "test")
Y=train.iloc[0:,0]
X=train.iloc[0:,1:]
X_t=test.iloc[0:,0:]
Y=np.array(Y.values).astype('int')
X=np.array(X.values).astype('double')
X_t=np.array(X_t.values).astype('double')
#Train
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=100)]
classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 20],
n_classes=5,
model_dir="/tmp/model")
# Define the training inputs
def get_train_inputs():
x = tf.constant(X)
y = tf.constant(Y)
return x, y
#fit
classifier.fit(input_fn=get_train_inputs, steps=1000)
predictions = list(classifier.predict(input_fn=get_train_inputs))
print(predictions)
I get the error: InvalidArgumentError (see above for traceback): Shape in shape_and_slice spec [100,10] does not match the shape stored in checkpoint: [1,10]
[[Node: save/RestoreV2_2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/RestoreV2_2/tensor_names, save/RestoreV2_2/shape_and_slices)]]
I don't get why this happens? How should I transform my data to apply to this classifier?
My Solution:-
Change your model_dir="/tmp/model" to
model_dir="/tmp/model-1
Note:- It need not to be model-1, replace it with any valid names like
model_dir="/tmp/model-a ..something like that..

Wrong dtype for a feed to the placeholder x-input TensorFlow

I want to implement a simple logistic regression on MNIST with TF that I just installed and want to monitor the progress of the minibatch-SGD with TensorBoard.
I first did without tensorboard it compiled and got 0.9166 accuracy on testset.
However when I added tensorboard to see what was going on I couldn't even compile it anymore I got:
the placeholders must be fed with dtype float but all my arrays are np arrays with dtype float !
If you can point the problem(s) in my code that would be amazing:
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 14 13:06:44 2016
#author: me
"""
#from tensorflow.examples.tutorials.mnist import input_data
#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
import tensorflow as tf
import os
import random
import numpy as np
from array import array
import struct
import matplotlib.pyplot as plt
import time
#I first placed the decompressed -ubyte files from mnist on the path indicated
os.chdir('/home/me/Bureau/Step1/')
with open("train-labels.idx1-ubyte") as file:
magic, size = struct.unpack(">II",file.read(8))
train_labels_data=np.asarray(array("B",file.read()))
with open("t10k-labels.idx1-ubyte") as file:
magic, size = struct.unpack(">II",file.read(8))
test_labels_data=np.asarray(array("B",file.read()))
with open("train-images.idx3-ubyte") as file:
magic, size, rows, cols =struct.unpack(">IIII",file.read(16))
train_images_data=np.reshape(np.asarray(array("B",file.read())),(size,rows,cols))
with open("t10k-images.idx3-ubyte") as file:
magic, size, rows, cols =struct.unpack(">IIII",file.read(16))
test_images_data=np.reshape(np.asarray(array("B",file.read())),(size,rows,cols))
for i in range(10):
plt.imshow(train_images_data[i,:])
plt.show()
print(train_labels_data[i])
train_images=np.reshape(train_images_data,(60000,28*28)).astype(np.float32)*1/255
test_images=np.reshape(test_images_data,(10000,28*28)).astype(np.float32)*1/255
train_labels=np.zeros((60000,10),dtype=np.float32)
test_labels=np.zeros((10000,10),dtype=np.float32)
for i in range(60000):
a=train_labels_data[i]
train_labels[i,a]=1.
for j in range(10000):
b=test_labels_data[j]
test_labels[j,b]=1.
sess=tf.Session()
x=tf.placeholder(tf.float32, [None, 784],name="x-input")
W=tf.Variable(tf.zeros([784, 10]),name="weights")
b=tf.Variable(tf.zeros([10]),name="bias")
with tf.name_scope("Wx_b") as scope:
y=tf.nn.softmax(tf.matmul(x,W) + b)
w_hist=tf.histogram_summary("weights",W)
b_hist=tf.histogram_summary("bias",b)
y_hist=tf.histogram_summary("y",y)
y_ =tf.placeholder(tf.float32, [None, 10], name="y-input")
with tf.name_scope("xent") as scope:
cross_entropy= -tf.reduce_sum(y_*tf.log(y))
ce_summ=tf.scalar_summary("cross_entropy", cross_entropy)
with tf.name_scope("train") as scope:
train_step=tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
with tf.name_scope("test") as scope:
correct_prediction =tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
accuracy_summary=tf.scalar_summary("accuracy",accuracy)
merged=tf.merge_all_summaries()
writer=tf.train.SummaryWriter("/tmp/mnist_logs",sess.graph_def)
init=tf.initialize_all_variables()
sess.run(init)
for i in range(1000):
if i % 10 == 0:
feed={x:test_images, y_: test_labels}
result=sess.run([merged, accuracy],feed_dict=feed)
summary_str=result[0]
acc=result[1]
writer.add_summary(summary_str, i)
print("Accuracy at step %s: %s" % (i,acc))
else:
index=np.random.randint(60000-1,size=100)
batch_xs, batch_ys = train_images[index,:], train_labels[index]
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print(sess.run(accuracy, feed_dict={x: train_images, y_: train_labels}))
The line where it happens is in the feed to merged however as I feed exactly the same way as I fed train_step I am at a loss...
It turns out you cannot run the same script over and over when I reopened a new spyder and launched the program it worked !!!
Mind=blown