Tensorflow 1 to Tensorflow 2 - migration

I have tried to write tensorflow 1 model in tensorflow 2 using keras api. Why the losses I am getting differ a lot? Aren't they the same models?
TF1 version:
conv1 = tf.compat.v1.layers.conv1d(input_data, num_filters, 1, strides=1, padding = 'valid', kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
conv1_norm = tf.compat.v1.layers.batch_normalization(conv1)
conv1_activ = tf.nn.sigmoid(conv1_norm)
conv2 = tf.compat.v1.layers.conv1d(conv1_activ, 2*num_filters, 3, padding = 'valid', kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
conv2_norm = tf.compat.v1.layers.batch_normalization(conv2)
conv2_activ = tf.nn.sigmoid(conv2_norm)
dropout = tf.nn.dropout(conv2_activ, rate = 1 - (1 - dropout_prob/2))
flat_layer = tf.compat.v1.layers.flatten(dropout)
dense = tf.compat.v1.layers.dense(flat_layer, units = 4 * num_filters, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
dense_norm = tf.compat.v1.layers.batch_normalization(dense)
dense_activ = tf.nn.sigmoid(dense_norm)
y_pred = tf.compat.v1.layers.dense(dense_activ, units = 1, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
y_pred = tf.nn.tanh(y_pred)
Phi_t_predict = Phi0_t_tf + y_pred
AI_t_predict = 5 * Phi_t_predict + 0.3
ref_t_predict = (AI_t_predict[1:] - AI_t_predict[:-1]) / (AI_t_predict[:-1] + AI_t_predict[1:])
syn_PP_t = tf.matmul(wvlt_map_tf, ref_t_predict)
data_misfit = 100 * tf.reduce_mean(input_tensor=tf.square(syn_PP_t - output_data))
model_misfit = 1 * tf.reduce_mean(input_tensor=tf.square(Phi_t_predict - Phi0_t_tf))
up = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[0] - Phi0_t_tf[0]))
bottom = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[-1] - Phi0_t_tf[-1]))
bound = 1 * (up + bottom) / 2
loss = data_misfit + model_misfit
optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
#%%
plt.close('all')
iter = 5001
l = np.zeros((iter,1))
data_error = np.zeros((iter,1))
model_error = np.zeros((iter,1))
AI_predict_iter = np.zeros((len(AI0_t),iter))
Phi_predict_iter = np.zeros((len(AI0_t),iter))
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for step in range(iter):
dic = {input_data: X_train, output_data: Y_train}
_, AI_predict, l[step], model_error[step], Phi_predict, contrast = sess.run([train, AI_t_predict, loss, model_misfit, Phi_t_predict, y_pred], feed_dict = dic)
ref_predict = (AI_predict[1:] - AI_predict[:-1]) / (AI_predict[:-1] + AI_predict[1:])
top = AI_predict[0] - AI0_t[0]
bottom = AI_predict[-1] - AI0_t[-1]
AI_predict = AI_predict - (top + bottom) / 2
AI_predict_iter[:,step] = AI_predict.ravel()
top = Phi_predict[0] - Phi0_t[0]
bottom = Phi_predict[-1] - Phi0_t[-1]
Phi_predict = Phi_predict - (top + bottom) / 2
Phi_predict_iter[:,step] = Phi_predict.ravel()
Tensorflow 2 model:
model = keras.Sequential()
model.add(layers.Conv1D(filters = num_filters, kernel_size=1, strides=1, padding = 'valid', kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros'))
model.add(layers.BatchNormalization())
model.add(layers.Activation('sigmoid'))
model.add(layers.Conv1D(filters = num_filters, kernel_size = 3, padding = 'valid', kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros'))
model.add(layers.BatchNormalization())
model.add(layers.Activation('sigmoid'))
model.add(layers.Dropout(rate = 0.1))
model.add(layers.Flatten())
model.add(layers.Dense(units = 4 * num_filters, kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer = 'zeros'))
model.add(layers.BatchNormalization())
model.add(layers.Activation('sigmoid'))
model.add(layers.Dense(units = 1, kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer = 'zeros'))
model.add(layers.Activation('tanh'))
def loss_fn(y_pred, y_true):
Phi_t_predict = Phi0_t_tf + y_pred
AI_t_predict = 5 * Phi_t_predict + 0.3
ref_t_predict = (AI_t_predict[1:] - AI_t_predict[:-1]) / (AI_t_predict[:-1] + AI_t_predict[1:])
syn_PP_t = tf.matmul(wvlt_map_tf, ref_t_predict)
y_true = y_true[1:,]
data_misfit = 100 * tf.reduce_mean(input_tensor=tf.square(syn_PP_t - y_true))
model_misfit = 1 * tf.reduce_mean(input_tensor=tf.square(Phi_t_predict - Phi0_t_tf))
up = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[0] - Phi0_t_tf[0]))
bottom = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[-1] - Phi0_t_tf[-1]))
bound = 1 * (up + bottom) / 2
loss = data_misfit + model_misfit
return loss
model.compile(loss=loss_fn, optimizer=keras.optimizers.Adam(learning_rate=0.001))
model.fit(X_train, Y_train, epochs=5001, batch_size=64)
Tensorflow has upgrading feature and using it I made tf1 written model executable in tf2, however I need it to write in tf2 with keras

Related

tensorflow implementation is much slower than pytorch (with slow gpu utility) and occupy more memory

I am trying to converting a pytorch implementation to tensorflow, but I found that the program occupies more memory, has lower gpu utility and is much more slower than pytorch version. I am wondering it is a general case, or I am wrong with the tf model structure?
I put the code on, it can be directly run.
https://colab.research.google.com/drive/1oI6GVnt3sAULvbMMAGTY4B6LsHguJd9U#scrollTo=DNH5pPynm-jm
The pytorch version uses half memory with twice gpu util compared with tf version.
Code:
import math
import os
import tensorflow as tf
import json
import numpy as np
import time
def calc_diffusion_step_embedding(diffusion_steps, diffusion_step_embed_dim_in):
assert diffusion_step_embed_dim_in % 2 == 0
half_dim = diffusion_step_embed_dim_in // 2
_embed = tf.math.log(tf.convert_to_tensor(10000.0)) / (half_dim - 1)
_embed = tf.math.exp(tf.cast(tf.experimental.numpy.arange(start = 0,stop = half_dim),dtype = tf.float32) * -_embed)
_embed = tf.cast(diffusion_steps,dtype=tf.float32) * _embed
diffusion_step_embed = tf.concat((tf.math.sin(_embed),
tf.math.cos(_embed)), 1)
assert diffusion_step_embed.shape[0] == diffusion_steps.shape[0]
assert diffusion_step_embed.shape[1] == diffusion_step_embed_dim_in
return diffusion_step_embed
class Residual_block(tf.keras.layers.Layer):
def __init__(self, res_channels, skip_channels,
diffusion_step_embed_dim_out, in_channels,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(Residual_block, self).__init__()
self.res_channels = res_channels
self.fc_t = tf.keras.layers.Dense(self.res_channels)
self.conv_layer = tf.keras.layers.Conv1D(filters=2 * self.res_channels, kernel_size=3, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.cond_conv = tf.keras.layers.Conv1D(filters=2*self.res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_conv1 = tf.keras.layers.Conv1D(filters=res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_conv2 = tf.keras.layers.Conv1D(filters=skip_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
def call(self, input_data):
x, cond, diffusion_step_embed = input_data
h = x
B, C, L = h.shape
part_t = self.fc_t(diffusion_step_embed)
part_t = tf.reshape(part_t,[B, self.res_channels, 1])
h = h + part_t
h = self.conv_layer(h)
cond = self.cond_conv(cond)
h += cond
out = tf.math.tanh(h[:,:self.res_channels,:]) * tf.math.sigmoid(h[:,self.res_channels:,:])
res = self.res_conv1(out)
skip = self.res_conv2(out)
return (x + res) * tf.math.sqrt(0.5), skip # normalize for training stability
class Residual_group(tf.keras.Model):
def __init__(self, res_channels, skip_channels, num_res_layers,
diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out,
in_channels,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(Residual_group, self).__init__()
self.num_res_layers = num_res_layers
self.diffusion_step_embed_dim_in = diffusion_step_embed_dim_in
self.fc_t1 = tf.keras.layers.Dense(diffusion_step_embed_dim_mid)
self.fc_t2 = tf.keras.layers.Dense(diffusion_step_embed_dim_out)
self.residual_blocks = []
for n in range(self.num_res_layers):
self.residual_blocks.append(Residual_block(res_channels, skip_channels,
diffusion_step_embed_dim_out=diffusion_step_embed_dim_out,
in_channels=in_channels,
s4_lmax=s4_lmax,
s4_d_state=s4_d_state,
s4_dropout=s4_dropout,
s4_bidirectional=s4_bidirectional,
s4_layernorm=s4_layernorm))
def call(self, input_data):
h, conditional, diffusion_steps = input_data
diffusion_step_embed = calc_diffusion_step_embedding(diffusion_steps, self.diffusion_step_embed_dim_in)
diffusion_step_embed = tf.keras.activations.swish((self.fc_t1(diffusion_step_embed)))
diffusion_step_embed = tf.keras.activations.swish((self.fc_t2(diffusion_step_embed)))
#out = self.residual_blocks((h, tf.zeros((8,256,248)),conditional, diffusion_step_embed))
#skip = out[1]
skip = tf.zeros((8,256,248))
for n in range(self.num_res_layers):
h, skip_n = self.residual_blocks[n]((h, conditional, diffusion_step_embed))
skip += skip_n
return skip * tf.math.sqrt(1.0 / self.num_res_layers)
class SSSDS4Imputer(tf.keras.Model):
def __init__(self, in_channels, res_channels, skip_channels, out_channels,
num_res_layers,
diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(SSSDS4Imputer, self).__init__()
# convert the dimension of input from (B,in_channels,L) to (B,res_channels,L)
self.init_conv = tf.keras.layers.Conv1D(filters=res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_channels = res_channels
self.skip_channels = skip_channels
self.residual_layer = Residual_group(res_channels=res_channels,
skip_channels=skip_channels,
num_res_layers=num_res_layers,
diffusion_step_embed_dim_in=diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid=diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out=diffusion_step_embed_dim_out,
in_channels=in_channels,
s4_lmax=s4_lmax,
s4_d_state=s4_d_state,
s4_dropout=s4_dropout,
s4_bidirectional=s4_bidirectional,
s4_layernorm=s4_layernorm)
# convert the dimension from (B,skip_channels,L) to (B,out_channels,L)
self.final_conv1 = tf.keras.layers.Conv1D(filters=skip_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.final_conv2 = tf.keras.layers.Conv1D(filters=out_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='zeros', data_format='channels_first')
def call(self, input_data):
x, conditional, mask, diffusion_steps = input_data
conditional = conditional * mask
conditional = tf.concat([conditional, tf.cast(mask,dtype=tf.float32)], axis=1)
x = tf.nn.relu(self.init_conv(x))
x = self.residual_layer((x, conditional, diffusion_steps))
y = tf.nn.relu(self.final_conv1(x))
y = tf.nn.relu(self.final_conv2(y))
return y
def train_step(X, y, net,loss_fn,optimizer):
with tf.GradientTape() as tape:
logits = net(X)
loss_value = loss_fn(y, logits)
grads = tape.gradient(loss_value, net.trainable_variables,unconnected_gradients=tf.UnconnectedGradients.ZERO)
optimizer.apply_gradients(zip(grads, net.trainable_variables))
return loss_value.numpy()
if __name__ == '__main__':
model_config = {'in_channels': 12, 'out_channels': 12, 'num_res_layers': 36, 'res_channels': 256, 'skip_channels': 256,
'diffusion_step_embed_dim_in': 128, 'diffusion_step_embed_dim_mid': 512, 'diffusion_step_embed_dim_out': 512,
's4_lmax': 250, 's4_d_state': 64, 's4_dropout': 0.0, 's4_bidirectional': 1, 's4_layernorm': 1}
net = SSSDS4Imputer(**model_config)
# define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
#training
n_iter = 0
#iterator = iter(dataset)
while n_iter < 150000 + 1:
#try:
X = (tf.random.normal([8,12,248], 0, 1, tf.float32, seed=1),tf.random.normal([8,12,248], 0, 1, tf.float32, seed=2),
tf.random.normal([8,12,248], 0, 1, tf.float32, seed=2),tf.random.normal([8,1], 0, 1, tf.float32, seed=4))
y = tf.random.normal([8,12,248], 0, 1, tf.float32, seed=1)
t0 = time.time()
loss = train_step(X,y,net,tf.keras.losses.MeanSquaredError(),optimizer)
print(time.time()-t0)
n_iter += 1
The tensorflow version I use is 2.4.1

How to apply MeanIoU to multioutput model?

I have a model based on MobileNet v2 with 2 outputs: class (cat/dog) and face coordinates. So "class" output has BinaryCrossentropy loss and "bbox" output has YOLO los (in code).
The problem is, when I try to apply metrics (accuracy for class and MeanIOU to bbox):
On random dataset: they show strange results (accuracy == 0, miou == 1 all the time).
On real dataset (images and labels): fit() throws error:
TypeError: '>' not supported between instances of 'NoneType' and 'int'
I suspect that I messed up outputs and metrics somehow, it would be nice if someone with more experience in tensorflow would take a look at it.
There is the code (with random dataset to reproduce):
import tensorflow as tf
# generate fake dataset
IMG_SIZE = 200
num_of_samples = 2000
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
train = tf.data.Dataset.zip((images, labels))
train = train.shuffle(num_of_samples)
train = train.batch(8)
train = train.prefetch(4)
num_of_samples = 500
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
valid = tf.data.Dataset.zip((images, labels))
valid = valid.shuffle(num_of_samples)
valid = valid.batch(8)
valid = valid.prefetch(4)
# Model with two outputs
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import MobileNetV2
def cnn_from_transfer():
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
input_layer = Input(shape=IMG_SHAPE)
base_net = MobileNetV2(include_top=False, weights='imagenet')(input_layer)
# Classification
h1 = GlobalMaxPooling2D()(base_net)
class1 = Dense(2048, activation='relu')(h1)
class2 = Dense(1, activation='sigmoid')(class1)
# Bounding box
h2 = GlobalMaxPooling2D()(base_net)
regress1 = Dense(2048, activation='relu')(h2)
regress2 = Dense(4, activation='sigmoid')(regress1)
return Model(inputs=input_layer, outputs=[class2, regress2])
model = cnn_from_transfer()
# Losses
def localization_loss(y_true, yhat):
delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2]))
h_true = y_true[:,3] - y_true[:,1]
w_true = y_true[:,2] - y_true[:,0]
h_pred = yhat[:,3] - yhat[:,1]
w_pred = yhat[:,2] - yhat[:,0]
delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
return delta_coord + delta_size
classloss = tf.keras.losses.BinaryCrossentropy()
regressloss = localization_loss
# Train
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, decay=0.00001),
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.Accuracy()], [tf.keras.metrics.MeanIoU(num_classes=2)]],
)
history = model.fit(train, epochs=5, validation_data=valid)
What am I doing wrong?
As M.Innat said in comments, tf MeanIoU is not applicable to my case (bboxes), so I need to make my own custom metric (iou_metric function below). Moreover I found out that BinaryAccuracy should be used instead of regular Accuracy. The rest is fine.
Here is correct code:
def iou_metric(y_true, y_pred):
aog = tf.abs(tf.transpose(y_true)[2] - tf.transpose(y_true)[0] + 1) * tf.abs(tf.transpose(y_true)[3] - tf.transpose(y_true)[1] + 1)
aop = tf.abs(tf.transpose(y_pred)[2] - tf.transpose(y_pred)[0] + 1) * tf.abs(tf.transpose(y_pred)[3] - tf.transpose(y_pred)[1] + 1)
overlap_0 = tf.maximum(tf.transpose(y_true)[0], tf.transpose(y_pred)[0])
overlap_1 = tf.maximum(tf.transpose(y_true)[1], tf.transpose(y_pred)[1])
overlap_2 = tf.minimum(tf.transpose(y_true)[2], tf.transpose(y_pred)[2])
overlap_3 = tf.minimum(tf.transpose(y_true)[3], tf.transpose(y_pred)[3])
intersection = (overlap_2 - overlap_0 + 1) * (overlap_3 - overlap_1 + 1)
union = aog + aop - intersection
iou = intersection / union
iou = tf.keras.backend.clip(iou, 0.0 + tf.keras.backend.epsilon(), 1.0 - tf.keras.backend.epsilon())
return iou
model.compile(
optimizer = opt,
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.BinaryAccuracy()], [iou_metric]],
)

TensorFlow equivalent of this code written in Keras

The Keras code runs perfectly, and the loss is close to zero. The input data is xData, the labeled data is yTrainData.
xData = np.reshape(xData, (-1, 1, sendLengthG * 4))
yTrainData = np.reshape(yTrainData, (-1, sendLengthG, sentComponentTypeCount))
model = k.models.Sequential()
model.add(k.layers.Dense(512, input_shape=(1, sendLengthG * 4), activation='tanh'))
model.add(k.layers.Dense(sendLengthG * sentComponentTypeCount, activation='linear'))
model.add(k.layers.Reshape([sendLengthG, sentComponentTypeCount]))
model.add(k.layers.Dense(sentComponentTypeCount, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='RMSProp', metrics=['accuracy'])
model.summary()
model.fit(xData, yTrainData, epochs=roundCount, batch_size=1, verbose=2)
I've written some TensorFlow code like below, but cannot reduce the loss under 0.011
x = tf.placeholder(dtype=tf.float32)
yTrain = tf.placeholder(dtype=tf.float32)
x1 = tf.reshape(x, shape=[1, sendLengthG * 4])
nodeCount1 = 18
w1 = tf.Variable(tf.random_normal([sendLengthG * 4, nodeCount1], mean=0.5, stddev=0.1), dtype=tf.float32)
b1 = tf.Variable(tf.zeros([nodeCount1]), dtype=tf.float32)
n1 = tf.nn.tanh(tf.matmul(x1, w1) + b1)
nodeCount2 = 21
w2 = tf.Variable(tf.random_normal([nodeCount1, nodeCount2], mean=1.5, stddev=0.1), dtype=tf.float32)
b2 = tf.Variable(tf.zeros([nodeCount2]), dtype=tf.float32)
n2 = tf.nn.tanh(tf.matmul(n1, w2) + b2)
wn = tf.Variable(tf.random_normal([nodeCount2, sendLengthG * sentComponentTypeCount], mean=0.5, stddev=0.1), dtype=tf.float32)
bn = tf.Variable(tf.zeros([sendLengthG * sentComponentTypeCount]), dtype=tf.float32)
y = tf.matmul(n2, wn) + bn
yResult = tf.nn.softmax(tf.reshape(y, [sendLengthG, -1]))
loss = -tf.reduce_mean(yTrain * tf.log(tf.clip_by_value(yResult, 1e-10, 1.0)))
optimizer = tf.train.RMSPropOptimizer(learnRate)
train = optimizer.minimize(loss)

Tensorflow: cost not changing

I build up a CNN in the following structure. The inputs are 32 * 32 * 3 pictures and a one-hot label of 10 categories.
inputs = tf.placeholder(tf.float32, [None, 32, 32, 3], name = "input")
targets = tf.placeholder(tf.float32, [None, 10], name = "targets")
layer_1_filter = tf.layers.conv2d(inputs = inputs,
filters = 64,
kernel_size = (2, 2),
strides = (1, 1),
padding = "same",
activation= tf.nn.relu)
layer_2_pooling = tf.layers.max_pooling2d(inputs = layer_1_filter,
pool_size = (2 * 2),
strides =1 * 1,
padding = 'same')
layer_3_filter = tf.layers.conv2d(inputs = layer_2_pooling,
filters = 128,
kernel_size = (4, 4),
strides = (1, 1),
padding = "same",
activation= tf.nn.relu)
layer_4_pooling = tf.layers.max_pooling2d(inputs = layer_3_filter,
pool_size = (2 * 2),
strides = 1 * 1,
padding = 'same')
sha = np.prod(layer_4_pooling.get_shape().as_list()[1:])
layer_5_reshape = tf.reshape(tensor= layer_4_pooling,
shape = [-1, sha])
layer_6_fc = tf.contrib.layers.fully_connected(inputs = layer_5_reshape,
num_outputs = 1024)
layer_6_fc = tf.nn.dropout(layer_6_fc, keep_prob) # Faster with drop out
layer_7_fc2 = tf.contrib.layers.fully_connected(inputs = layer_6_fc,
num_outputs = 512)
layer_8_fc3 = tf.contrib.layers.fully_connected(inputs = layer_7_fc2,
num_outputs = 10)
layer_9_logit = tf.identity(input = layer_8_fc3,
name = "logistic")
And I define my cost and optimizer as:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=layer_9_logit, labels=targets))
optimizer = tf.train.AdamOptimizer().minimize(cost)
When I run it, the cost is always around a certain number: 2.30. I tried it several times and it always converge to it.
count = 0
with tf.Session() as sess:
print(info)
sess.run(tf.global_variables_initializer())
for batch_i in range(img_shape[0] // batch_size - 1):
feature_batch = picture[batch_i * batch_size: (batch_i + 1) * batch_size]
label_batch = label[batch_i * batch_size: (batch_i + 1) * batch_size]
train_loss, _ = sess.run([cost, optimizer],
feed_dict={inputs: feature_batch,
targets: label_batch})
if (count % 10 == 0):
print(str(count) + ' | Train Loss {:.8f}'.format(train_loss))
count += 1
output
0 | Train Loss 37.51004410
10 | Train Loss 2.30226469
20 | Train Loss 2.30263376
30 | Train Loss 2.30258608
40 | Train Loss 2.30258536
50 | Train Loss 2.30265045
60 | Train Loss 2.35271192
70 | Train Loss 2.30241871
May I ask why and how to fix it? Thanks a lot

ValueError: Cannot feed value of shape (3375, 50, 50, 2) for Tensor 'Reshape:0', which has shape '(?, 5000)'

I am learning Tensorflow. Following is my code for MLP with TensorFlow. I have some issues with mismatching of data dimentions.
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
wholedataset = np.load('C:/Users/pourya/Downloads/WholeTrueData.npz')
data = wholedataset['wholedata'].astype('float32')
label = wholedataset['wholelabel'].astype('float32')
height = wholedataset['wholeheight'].astype('float32')
print(type(data[20,1,1,0]))
learning_rate = 0.001
training_iters = 5
display_step = 20
n_input = 3375
X = tf.placeholder("float32")
Y = tf.placeholder("float32")
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 2, 1])),
'wd1': tf.Variable(tf.random_normal([3, 3, 1, 1]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([1])),
'out': tf.Variable(tf.random_normal([1,50,50,1]))
}
mnist= data
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 2
batch_size = 100
x = tf.placeholder('float', shape = [None,50,50,2])
shape = x.get_shape().as_list()
dim = np.prod(shape[1:])
x_reshaped = tf.reshape(x, [-1, dim])
y = tf.placeholder('float', shape= [None,50,50,2])
shape = y.get_shape().as_list()
dim = np.prod(shape[1:])
y_reshaped = tf.reshape(y, [-1, dim])
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([5000,
n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,
n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,
n_nodes_hl3])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3,
n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes])),}
l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']),
hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']),
hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weights']),
hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3,output_layer['weights']) + output_layer['biases']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(n_input/batch_size)):
epoch_x = wholedataset['wholedata'].astype('float32')
epoch_y = wholedataset['wholedata'].astype('float32')
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y:
epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out
of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:mnist.test.images,
y:mnist.test.labels}))
train_neural_network(x)
I got the following error:
ValueError: Cannot feed value of shape (3375, 50, 50, 2) for Tensor 'Reshape:0', which has shape '(?, 5000)'
Does anyone know what is the issue with my code, and how can I fix it?
The data value is (3375, 50, 50, 2)
Thank you for anyone's input!
I think that the problem is that you use the same variable name x for the placeholder and the reshape, in lines
x = tf.placeholder('float', shape = [None,50,50,2])
and
x = tf.reshape(x, [-1, dim])
so that when you
feed_dict={x: your_val}
you are feeding the output of the reshape operation.
You should have different names, for instance
x_placeholder = tf.placeholder('float', shape = [None,50,50,2])
x_reshaped = tf.reshape(x, [-1, dim])
and then
feed_dict={x_placeholder: your_val}