Image adjustments with Conv2d - numpy

I am working on a project related to CNN using TensorFlow.
I imported image using (20 such images)
for filename in glob.glob('input_data/*.jpg'):
input_images.append(cv2.imread(filename,0))
image_size_input = len(input_images[0])
The images were of size (250,250) because of grayscale.
But for conv2D, it requires a 4D input tensor to feed. My input tensor looks like
x = tf.placeholder(tf.float32,shape=[None,image_size_output,image_size_output,1], name='x')
So i was not able to convert the above 2d image into the given shape(4D). How to deal with the "None" field.
I tried this:
input_images_padded = []
for image in input_images:
temp = np.zeros((1,image_size_output,image_size_output,1))
for i in range(image_size_input):
for j in range(image_size_input):
temp[0,i,j,0] = image[i,j]
input_images_padded.append(temp)
I got the following error:
File "/opt/intel/intelpython3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 975, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (20, 1, 250, 250, 1) for Tensor 'x_11:0', which has shape '(?, 250, 250, 1)'
Here's the entire code(for reference):
import tensorflow as tf
from PIL import Image
import glob
import cv2
import os
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
input_images = []
output_images = []
for filename in glob.glob('input_data/*.jpg'):
input_images.append(cv2.imread(filename,0))
for filename in glob.glob('output_data/*.jpg'):
output_images.append(cv2.imread(filename,0))
image_size_input = len(input_images[0])
image_size_output = len(output_images[0])
'''
now adding padding to the input images to convert from 125x125 to 250x2050 sized images
'''
input_images_padded = []
for image in input_images:
temp = np.zeros((1,image_size_output,image_size_output,1))
for i in range(image_size_input):
for j in range(image_size_input):
temp[0,i,j,0] = image[i,j]
input_images_padded.append(temp)
output_images_padded = []
for image in output_images:
temp = np.zeros((1,image_size_output,image_size_output,1))
for i in range(image_size_input):
for j in range(image_size_input):
temp[0,i,j,0] = image[i,j]
output_images_padded.append(temp)
sess = tf.Session()
'''
Creating tensor for the input
'''
x = tf.placeholder(tf.float32,shape= [None,image_size_output,image_size_output,1], name='x')
'''
Creating tensor for the output
'''
y = tf.placeholder(tf.float32,shape= [None,image_size_output,image_size_output,1], name='y')
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def create_biases(size):
return tf.Variable(tf.constant(0.05, shape=[size]))
def create_convolutional_layer(input, bias_count, filter_height, filter_width, num_input_channels, num_out_channels, activation_function):
weights = create_weights(shape=[filter_height, filter_width, num_input_channels, num_out_channels])
biases = create_biases(bias_count)
layer = tf.nn.conv2d(input=input,
filter=weights,
strides=[1, 1, 1, 1],
padding='SAME')
layer += biases
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 1, 1, 1],
padding='SAME')
if activation_function=="relu":
layer = tf.nn.relu(layer)
return layer
'''
Conv. Layer 1: Patch extraction
64 filters of size 1 x 9 x 9
Activation function: ReLU
Output: 64 feature maps
Parameters to optimize:
1 x 9 x 9 x 64 = 5184 weights and 64 biases
'''
layer1 = create_convolutional_layer(input=x,
bias_count=64,
filter_height=9,
filter_width=9,
num_input_channels=1,
num_out_channels=64,
activation_function="relu")
'''
Conv. Layer 2: Non-linear mapping
32 filters of size 64 x 1 x 1
Activation function: ReLU
Output: 32 feature maps
Parameters to optimize: 64 x 1 x 1 x 32 = 2048 weights and 32 biases
'''
layer2 = create_convolutional_layer(input=layer1,
bias_count=32,
filter_height=1,
filter_width=1,
num_input_channels=64,
num_out_channels=32,
activation_function="relu")
'''Conv. Layer 3: Reconstruction
1 filter of size 32 x 5 x 5
Activation function: Identity
Output: HR image
Parameters to optimize: 32 x 5 x 5 x 1 = 800 weights and 1 bias'''
layer3 = create_convolutional_layer(input=layer2,
bias_count=1,
filter_height=5,
filter_width=5,
num_input_channels=32,
num_out_channels=1,
activation_function="identity")
'''print(layer1.get_shape().as_list())
print(layer2.get_shape().as_list())
print(layer3.get_shape().as_list())'''
'''
applying gradient descent algorithm
'''
#loss_function
loss = tf.reduce_sum(tf.square(layer3-y))
#optimiser
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess.run(init)
for i in range(len(input_images)):
sess.run(train,{x: input_images_padded, y:output_images_padded})
curr_loss = sess.run([loss], {x: x_train, y: y_train})
print("loss: %s"%(curr_loss))

I think your image_padded is not right. I don't have tf-code writing experience (though have read some code). But try this:
// imgs is your input-image-sequences
// padded is to feed
cnt = len(imgs)
H,W = imgs[0].shape[:2]
padded = np.zeros((cnt, H, W, 1))
for i in range(cnt):
padded[i, :,:,0] = img[i]

One option would be to ignore giving the shape when you create the placeholder so that it accepts a tensor of any shape that you feed during sess.run()
From the docs:
shape: The shape of the tensor to be fed (optional). If the shape is not
specified, you can feed a tensor of any shape.
Alternatively, you can specify 20, which is your batch size. Note that the first dimension in the tensor always corresponds to batch_size

Check the next lines. It works for me :
train_set = np.zeros((input_images.shape[0], input_images.shape[1], input_images.shape[2],1))
for image in range(input_images.shape[0]):
train_set[image,:,:,0] = input_images[image,:,:]

Related

Weighted Pixel Wise Categorical Cross Entropy for Semantic Segmentation

I have recently started learning about Semantic Segmentation. I am trying to train a UNet for the same. My input is RGB 128x128x3 images. My masks are made up of 4 classes 0, 1, 2, 3 and are One-Hot Encoded with dimension 128x128x4.
def weighted_cce(y_true, y_pred):
weights = []
t_inf = tf.convert_to_tensor(1e9, dtype = 'float32')
t_zero = tf.convert_to_tensor(0, dtype = 'int64')
for i in range(0, 4):
l = tf.argmax(y_true, axis = -1) == i
n = tf.cast(tf.math.count_nonzero(l), 'float32') + K.epsilon()
weights.append(n)
weights = [batch_size/j for j in weights]
y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
# clip to prevent NaN's and Inf's
y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
# calc
loss = y_true * K.log(y_pred) * weights
loss = -K.sum(loss, -1)
return loss
This is the loss function that I am using but it classifies every pixel as 2. What am I doing wrong?
You should have weights based on you entire data (unless your batch size is reasonably big so you have sort of stable weights).
If some class is underrepresented, with a small batch size, it will have near infinity weights.
If your target data is numpy array:
shp = y_train.shape
totalPixels = shp[0] * shp[1] * shp[2]
weights = np.sum(y_train, axis=(0, 1, 2)) #final shape (4,)
weights = totalPixels/weights
If your data is in a Sequence generator:
totalPixels = 0
counts = np.zeros((4,))
for i in range(len(generator)):
x, y = generator[i]
shp = y.shape
totalPixels += shp[0] * shp[1] * shp[2]
counts = counts + np.sum(y, axis=(0,1,2))
weights = totalPixels / counts
If your data is in a yield generator (you must know how many batches you have in an epoch):
for i in range(batches_per_epoch):
x, y = next(generator)
#the rest is equal to the Sequence example above
Attempt 1
I don't know if newer versions of Keras are able to handle this, but you can try the simplest approach first: simply call fit or fit_generator with the class_weight argument:
model.fit(...., class_weight = {0: weights[0], 1: weights[1], 2: weights[2], 3: weights[3]})
Attempt 2
Make a healthier loss function:
weights = weights.reshape((1,1,1,4))
kWeights = K.constant(weights)
def weighted_cce(y_true, y_pred):
yWeights = kWeights * y_pred #shape (batch, 128, 128, 4)
yWeights = K.sum(yWeights, axis=-1) #shape (batch, 128, 128)
loss = K.categorical_crossentropy(y_true, y_pred) #shape (batch, 128, 128)
wLoss = yWeights * loss
return K.sum(wLoss, axis=(1,2))

In Pytorch, how to test simple image with my loaded model?

I made a alphabet classification CNN model using Pytorch, and then use that model to test it with a single image that I've never seen before. I extracted a bounding box in my handwriting image with opencv, but I don't know how to apply it to the model.
bounded my_image
this is custom dataset
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, height, width, transforms=None):
"""
Args:
csv_path (string): path to csv file
height (int): image height
width (int): image width
transform: pytorch transforms for transforms and tensor conversion
"""
self.data = pd.read_csv(csv_path)
self.labels = np.asarray(self.data.iloc[:, 0])
self.height = height
self.width = width
self.transforms = transforms
def __getitem__(self, index):
single_image_label = self.labels[index]
# Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28])
img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
# Convert image from numpy array to PIL image, mode 'L' is for grayscale
img_as_img = Image.fromarray(img_as_np)
img_as_img = img_as_img.convert('L')
# Transform image to tensor
if self.transforms is not None:
img_as_tensor = self.transforms(img_as_img)
# Return image and the label
return (img_as_tensor, single_image_label)
def __len__(self):
return len(self.data.index)
transformations = transforms.Compose([
transforms.ToTensor()
])
alphabet_from_csv = CustomDatasetFromCSV("/content/drive/My Drive/A_Z Handwritten Data.csv",
28, 28, transformations)
random_seed = 50
data_size = len(alphabet_from_csv)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
if True:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
train_dataset = SubsetRandomSampler(train_indices)
test_dataset = SubsetRandomSampler(test_indices)
train_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = train_dataset)
test_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = test_dataset)
this is my model
class ConvNet3(nn.Module):
def __init__(self, num_classes=26):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(28),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(56),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Dropout(p = 0.5),
nn.Linear(56 * 7 * 7, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Dropout(p = 0.5),
nn.Linear(512, 26),
)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet3(num_classes).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
# train phase
model.train()
# create a progress bar
batch_loss_list = []
progress = ProgressMonitor(length=len(train_dataset))
for batch, target in train_loader:
# Move the training data to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# calculate the loss
loss = loss_func( output, target )
# clear previous gradient computation
optimizer.zero_grad()
# backpropagate to compute gradients
loss.backward()
# update model weights
optimizer.step()
# update progress bar
batch_loss_list.append(loss.item())
progress.update(batch.shape[0], sum(batch_loss_list)/len(batch_loss_list) )
def test():
# test phase
model.eval()
correct = 0
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
for batch, target in test_loader:
# Move the training batch to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
# accumulate correct number
correct += (output == target).sum().item()
# Calculate test accuracy
acc = 100 * float(correct) / len(test_dataset)
print( 'Test accuracy: {}/{} ({:.2f}%)'.format( correct, len(test_dataset), acc ) )
for epoch in range(num_epochs):
print("{}'s try".format(int(epoch)+1))
train()
test()
print("-----------------------------------------------------------------------------")
this is my image to bound
import cv2
import matplotlib.image as mpimg
im = cv2.imread('/content/drive/My Drive/my_handwritten.jpg')
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
rects=[]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h < 20: continue
red = (0, 0, 255)
cv2.rectangle(im, (x, y), (x+w, y+h), red, 2)
rects.append((x,y,w,h))
cv2.imwrite('my_handwritten_bounding.png', im)
img_result = []
img_for_class = im.copy()
margin_pixel = 60
for rect in rects:
#[y:y+h, x:x+w]
img_result.append(
img_for_class[rect[1]-margin_pixel : rect[1]+rect[3]+margin_pixel,
rect[0]-margin_pixel : rect[0]+rect[2]+margin_pixel])
# Draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]),
(rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2)
count = 0
nrows = 4
ncols = 7
plt.figure(figsize=(12,8))
for n in img_result:
count += 1
plt.subplot(nrows, ncols, count)
plt.imshow(cv2.resize(n,(28,28)), cmap='Greys', interpolation='nearest')
plt.tight_layout()
plt.show()
You have already written the function test to test your net. The only thing you should do — create batch with one image with same preprocessing as images in your dataset.
def test_one_image(I, model):
'''
I - 28x28 uint8 numpy array
'''
# test phase
model.eval()
# convert image to torch tensor and add batch dim
batch = torch.tensor(I / 255).unsqueeze(0)
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
batch = batch.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
return output

Tensorflow: Processing one class at time

I am trying to train a dataset of 10,000+ images using Tensorflow GPU (GTX 1060 Max-Q 6GB). Because the size of the image in my dataset is huge (512 x 424 pixels) I get a MemoryError.
Traceback (most recent call last):
File "train.py", line 33, in <module>
data = dataset.read_train_sets(train_path, img_size, classes, validation_size=validation_size)
File "/home/nabeel/tf-realsense-gesture/dataset.py", line 103, in read_train_sets
images, labels, img_names, cls = shuffle(images, labels, img_names, cls)
File "/home/nabeel/anaconda3/envs/tensorflow/lib/python2.7/site-packages/sklearn/utils/__init__.py", line 403, in shuffle
return resample(*arrays, **options)
File "/home/nabeel/anaconda3/envs/tensorflow/lib/python2.7/site-packages/sklearn/utils/__init__.py", line 327, in resample
resampled_arrays = [safe_indexing(a, indices) for a in arrays]
File "/home/nabeel/anaconda3/envs/tensorflow/lib/python2.7/site-packages/sklearn/utils/__init__.py", line 216, in safe_indexing
return X.take(indices, axis=0)
MemoryError
The problem with my code is that I am training all seven classes at the same time which is why I get a memory error. I want to process single class at a time.
I have tried to implement a while/for loop inside but every time a loop finishes, the .meta file is overwritten and only works on one class. Is there any way to train multiple classes at the time or one by one?
train.py
batch_size = 1
# 7 classess for recognitions
#classes = ['up']
classes = ['up','down','left','right','forward','backward','none']
#classes = ['up','down','left','right','forward','backward','none']
num_classes = len(classes)
# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 200
num_channels = 3
train_path='training_data'
# load all the training and validation images and labels into memory
data = dataset.read_train_sets(train_path, img_size, classes, validation_size=validation_size)
print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training-set:\t\t{}".format(len(data.train.labels)))
print("Number of files in Validation-set:\t{}".format(len(data.valid.labels)))
session = tf.Session()
x = tf.placeholder(tf.float32, shape=[batch_size,img_size,img_size,num_channels], name='x')
# labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)
#Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 64
filter_size_conv4 = 3
num_filters_conv4 = 128
filter_size_conv5 = 3
num_filters_conv5 = 256
filter_size_conv6 = 3
num_filters_conv6 = 512
filter_size_conv7 = 3
num_filters_conv7= 1024
fc_layer_size = 2048
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def create_biases(size):
return tf.Variable(tf.constant(0.05, shape=[size]))
def create_convolutional_layer(input,num_input_channels,conv_filter_size,num_filters):
# define the weights that will be trained
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
# create biases
biases = create_biases(num_filters)
# Creat convolutional layer
layer = tf.nn.conv2d(input=input,filter=weights,strides=[1, 1, 1, 1],padding='SAME')
layer += biases
# max-pooling
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
# Relu is the activation function
layer = tf.nn.relu(layer)
return layer
def create_flatten_layer(layer):
layer_shape = layer.get_shape()
num_features = layer_shape[1:4].num_elements()
# Flatten the layer so reshape to num_features
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input,
num_inputs,
num_outputs,
use_relu=True):
# define trainable weights and biases.
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
# Fully connected layer
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
layer_conv1 = create_convolutional_layer(input=x,num_input_channels=num_channels,conv_filter_size=filter_size_conv1,
num_filters=num_filters_conv1)
layer_conv2 = create_convolutional_layer(input=layer_conv1,
num_input_channels=num_filters_conv1,
conv_filter_size=filter_size_conv2,
num_filters=num_filters_conv2)
layer_conv3= create_convolutional_layer(input=layer_conv2,
num_input_channels=num_filters_conv2,
conv_filter_size=filter_size_conv3,
num_filters=num_filters_conv3)
layer_conv4= create_convolutional_layer(input=layer_conv3,
num_input_channels=num_filters_conv3,
conv_filter_size=filter_size_conv4,
num_filters=num_filters_conv4)
layer_conv5= create_convolutional_layer(input=layer_conv4,
num_input_channels=num_filters_conv4,
conv_filter_size=filter_size_conv5,
num_filters=num_filters_conv5)
layer_conv6= create_convolutional_layer(input=layer_conv5,
num_input_channels=num_filters_conv5,
conv_filter_size=filter_size_conv6,
num_filters=num_filters_conv6)
layer_conv7= create_convolutional_layer(input=layer_conv6,
num_input_channels=num_filters_conv6,
conv_filter_size=filter_size_conv7,
num_filters=num_filters_conv7)
layer_flat = create_flatten_layer(layer_conv7)
layer_fc1 = create_fc_layer(input=layer_flat,num_inputs=layer_flat.get_shape()[1:4].num_elements(),num_outputs=fc_layer_size,
use_relu=True)
layer_fc2 = create_fc_layer(input=layer_fc1, num_inputs=fc_layer_size,num_outputs=num_classes, use_relu=False)
y_pred = tf.nn.softmax(layer_fc2,name='y_pred')
y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
session.run(tf.global_variables_initializer())
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
acc = session.run(accuracy, feed_dict=feed_dict_train)
val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}"
print(msg.format(epoch + 1, acc, val_acc, val_loss))
total_iterations = 0
saver = tf.train.Saver()
def train(num_iteration):
global total_iterations
for i in range(total_iterations,total_iterations + num_iteration):
x_batch, y_true_batch, _, cls_batch = data.train.next_batch(batch_size)
x_valid_batch, y_valid_batch, _, valid_cls_batch = data.valid.next_batch(batch_size)
feed_dict_tr = {x: x_batch,y_true: y_true_batch}
feed_dict_val = {x: x_valid_batch,y_true: y_valid_batch}
session.run(optimizer, feed_dict=feed_dict_tr)
if i % int(data.train.num_examples/batch_size) == 0:
val_loss = session.run(cost, feed_dict=feed_dict_val)
epoch = int(i / int(data.train.num_examples/batch_size))
show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
saver.save(session, '/home/nabeel/tf-realsense-gesture/')
total_iterations += num_iteration
train(num_iteration=6000)
Since you are Facing Out Of Memory Issue in CNNs, you can try the below steps:
Increase the Strides of the Convolutional Layer i.e., instead of using Sh = 1 and Sw = 1, you can use either Sh = 2 and Sw = 2. This will reduce the Dimensionality of the Image and hence will reduce the RAM Consumption. Code for the same is shown below:
layer = tf.nn.conv2d(input=input,filter=weights,strides=[1, 2, 2, 1],padding='SAME')
Verify if you really require 7 Convolutional Layers. You can try with Less Number of Convolutional Layers (4 or 5 or 6) and can check the performance. Because each Convolutional Layer with some Number of Filters will increase the Memory usage.
Replace tf.float32 with tf.float16 and if it works without any error.
Using an Inception Module instead of Convolutional Layer.

Multiplying a rank 3 tensor with a rank 2 tensor in Tensorflow

Given a rank 3 tensor:
sentence_max_length = 5
batch_size = 3
n_hidden = 10
n_classes = 2
x = tf.constant(np.reshape(np.arange(150),(batch_size,sentence_max_length, n_hidden)), dtype = tf.float32)
And a rank 2 tensor:
W = tf.constant(np.reshape(np.arange(20), (n_hidden, n_classes)), dtype = tf.float32)
And a rank 1 bias tensor:
b = tf.constant(np.reshape(np.arange(5), (n_classes), dtype = tf.float32))
I was wondering how one would the last two axis of x by W such that the resulting vector Z would be of shape (batch_size, max_length, n_classes) though batch_size would not be known during graph creation I've just given it a value here for demonstration purposes
So to clarify:
Z[0] = tf.matmul(x[0,:,:], W) + b
So that the W and b are shared across all the batches. The reason for this is that I am trying to use the output of tf.dynamic_rnn whereby the output is of shape (batch_size, sentence_max_length, n_hidden) and build another layer ontop of output which has shared weights W and b.
One approach could be ...
import tensorflow as tf
import numpy as np
from tensorflow.python.layers.core import Dense
sentence_max_length = 5
batch_size = 3
n_hidden = 10
n_classes = 2
x = tf.constant(np.reshape(np.arange(150),(batch_size,sentence_max_length, n_hidden)), dtype = tf.float32)
linear_layer = Dense(n_classes, use_bias=True) #required projection value
z = linear_layer(x)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
res = sess.run(z)
res.shape
(3, 5, 2)
Internally, Dense layer creates trainable W & b variables. And, it uses standard_ops.tensordot operation to transform the last dimension to the projected value. For further details, refer to the source code here.

Predict next number in a pattern

I am trying to write a simple program using TensorFlow to predict the next number in a sequence.
I am not experienced in TensorFlow so instead of starting from scratch I started with this guide: http://monik.in/a-noobs-guide-to-implementing-rnn-lstm-using-tensorflow/
However, in contrast to the implementation in the link above I do not want to treat the problem as a classification problem - where I only have n possible outcomes - but instead just calculate a single value for a sequence.
I tried modifying the code to fit my problem:
import numpy as np
import random
from random import shuffle
import tensorflow as tf
NUM_EXAMPLES = 10000
train_input = ['{0:020b}'.format(i) for i in range(2**20)]
shuffle(train_input)
train_input = [map(int,i) for i in train_input]
ti = []
for i in train_input:
temp_list = []
for j in i:
temp_list.append([j])
ti.append(np.array(temp_list))
train_input = ti
train_output = []
for i in train_input:
count = 0
for j in i:
if j[0] == 1:
count+=1
#temp_list = ([0]*21)
#temp_list[count]=1
#train_output.append(temp_list)
train_output.append(count)
test_input = train_input[NUM_EXAMPLES:]
test_output = train_output[NUM_EXAMPLES:]
train_input = train_input[:NUM_EXAMPLES]
train_output = train_output[:NUM_EXAMPLES]
print "test and training data loaded"
target = tf.placeholder(tf.float32, [None, 1])
data = tf.placeholder(tf.float32, [None, 20,1]) #Number of examples, number of input, dimension of each input
#target = tf.placeholder(tf.float32, [None, 1])
#print('target shape: ', target.get_shape())
#print('shape[0]', target.get_shape()[1])
#print('int(shape) ', int(target.get_shape()[1]))
num_hidden = 24
cell = tf.nn.rnn_cell.LSTMCell(num_hidden)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
print('val shape, ', val.get_shape())
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
#prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
prediction = tf.matmul(last, weight) + bias
cross_entropy = -tf.reduce_sum(target - prediction)
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 100
no_of_batches = int(len(train_input)) / batch_size
epoch = 500
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out})
print "Epoch ",str(i)
incorrect = sess.run(error,{data: test_input, target: test_output})
#print sess.run(prediction,{data: [[[1],[0],[0],[1],[1],[0],[1],[1],[1],[0],[1],[0],[0],[1],[1],[0],[1],[1],[1],[0]]]})
#print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
sess.close()
It is still work in progress, since the input is bogus as well as the cross entropy calculation.
However, my main problem is that the code doesn't compile at all.
I get this error:
ValueError: Cannot feed value of shape (100,) for Tensor
u'Placeholder:0', which has shape '(?, 1)'
The number 100 comes from the "batch_size" and the (?, 1) comes from the fact that my prediction is a one dimensional number. However, I do not have any idea where the problem is in my code?
Can anyone help me get the dimensions to match?
This error means your targets placeholder is being fed something with the wrong shape. To fix it, I think you should reshape something like test_output.reshape([-1, 1])
To fix the placeholders shape, change your code to
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp = train_input[ptr:ptr+batch_size]
out = train_output[ptr:ptr+batch_size]
ptr+=batch_size
out = np.reshape(out, (100,1)) #reshape
sess.run(minimize,{data: inp, target: out})
print ("Epoch ",str(i))
test_output = np.reshape(test_output, (1038576,1)) #reshape
incorrect = sess.run(error,{data: test_input, target: test_output})