I have tried to write tensorflow 1 model in tensorflow 2 using keras api. Why the losses I am getting differ a lot? Aren't they the same models?
TF1 version:
conv1 = tf.compat.v1.layers.conv1d(input_data, num_filters, 1, strides=1, padding = 'valid', kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
conv1_norm = tf.compat.v1.layers.batch_normalization(conv1)
conv1_activ = tf.nn.sigmoid(conv1_norm)
conv2 = tf.compat.v1.layers.conv1d(conv1_activ, 2*num_filters, 3, padding = 'valid', kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
conv2_norm = tf.compat.v1.layers.batch_normalization(conv2)
conv2_activ = tf.nn.sigmoid(conv2_norm)
dropout = tf.nn.dropout(conv2_activ, rate = 1 - (1 - dropout_prob/2))
flat_layer = tf.compat.v1.layers.flatten(dropout)
dense = tf.compat.v1.layers.dense(flat_layer, units = 4 * num_filters, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
dense_norm = tf.compat.v1.layers.batch_normalization(dense)
dense_activ = tf.nn.sigmoid(dense_norm)
y_pred = tf.compat.v1.layers.dense(dense_activ, units = 1, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros')
y_pred = tf.nn.tanh(y_pred)
Phi_t_predict = Phi0_t_tf + y_pred
AI_t_predict = 5 * Phi_t_predict + 0.3
ref_t_predict = (AI_t_predict[1:] - AI_t_predict[:-1]) / (AI_t_predict[:-1] + AI_t_predict[1:])
syn_PP_t = tf.matmul(wvlt_map_tf, ref_t_predict)
data_misfit = 100 * tf.reduce_mean(input_tensor=tf.square(syn_PP_t - output_data))
model_misfit = 1 * tf.reduce_mean(input_tensor=tf.square(Phi_t_predict - Phi0_t_tf))
up = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[0] - Phi0_t_tf[0]))
bottom = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[-1] - Phi0_t_tf[-1]))
bound = 1 * (up + bottom) / 2
loss = data_misfit + model_misfit
optimizer = tf.compat.v1.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
#%%
plt.close('all')
iter = 5001
l = np.zeros((iter,1))
data_error = np.zeros((iter,1))
model_error = np.zeros((iter,1))
AI_predict_iter = np.zeros((len(AI0_t),iter))
Phi_predict_iter = np.zeros((len(AI0_t),iter))
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for step in range(iter):
dic = {input_data: X_train, output_data: Y_train}
_, AI_predict, l[step], model_error[step], Phi_predict, contrast = sess.run([train, AI_t_predict, loss, model_misfit, Phi_t_predict, y_pred], feed_dict = dic)
ref_predict = (AI_predict[1:] - AI_predict[:-1]) / (AI_predict[:-1] + AI_predict[1:])
top = AI_predict[0] - AI0_t[0]
bottom = AI_predict[-1] - AI0_t[-1]
AI_predict = AI_predict - (top + bottom) / 2
AI_predict_iter[:,step] = AI_predict.ravel()
top = Phi_predict[0] - Phi0_t[0]
bottom = Phi_predict[-1] - Phi0_t[-1]
Phi_predict = Phi_predict - (top + bottom) / 2
Phi_predict_iter[:,step] = Phi_predict.ravel()
Tensorflow 2 model:
model = keras.Sequential()
model.add(layers.Conv1D(filters = num_filters, kernel_size=1, strides=1, padding = 'valid', kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros'))
model.add(layers.BatchNormalization())
model.add(layers.Activation('sigmoid'))
model.add(layers.Conv1D(filters = num_filters, kernel_size = 3, padding = 'valid', kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer='zeros'))
model.add(layers.BatchNormalization())
model.add(layers.Activation('sigmoid'))
model.add(layers.Dropout(rate = 0.1))
model.add(layers.Flatten())
model.add(layers.Dense(units = 4 * num_filters, kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer = 'zeros'))
model.add(layers.BatchNormalization())
model.add(layers.Activation('sigmoid'))
model.add(layers.Dense(units = 1, kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), bias_initializer = 'zeros'))
model.add(layers.Activation('tanh'))
def loss_fn(y_pred, y_true):
Phi_t_predict = Phi0_t_tf + y_pred
AI_t_predict = 5 * Phi_t_predict + 0.3
ref_t_predict = (AI_t_predict[1:] - AI_t_predict[:-1]) / (AI_t_predict[:-1] + AI_t_predict[1:])
syn_PP_t = tf.matmul(wvlt_map_tf, ref_t_predict)
y_true = y_true[1:,]
data_misfit = 100 * tf.reduce_mean(input_tensor=tf.square(syn_PP_t - y_true))
model_misfit = 1 * tf.reduce_mean(input_tensor=tf.square(Phi_t_predict - Phi0_t_tf))
up = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[0] - Phi0_t_tf[0]))
bottom = tf.reduce_mean(input_tensor=tf.square(Phi_t_predict[-1] - Phi0_t_tf[-1]))
bound = 1 * (up + bottom) / 2
loss = data_misfit + model_misfit
return loss
model.compile(loss=loss_fn, optimizer=keras.optimizers.Adam(learning_rate=0.001))
model.fit(X_train, Y_train, epochs=5001, batch_size=64)
Tensorflow has upgrading feature and using it I made tf1 written model executable in tf2, however I need it to write in tf2 with keras
Related
I am trying to converting a pytorch implementation to tensorflow, but I found that the program occupies more memory, has lower gpu utility and is much more slower than pytorch version. I am wondering it is a general case, or I am wrong with the tf model structure?
I put the code on, it can be directly run.
https://colab.research.google.com/drive/1oI6GVnt3sAULvbMMAGTY4B6LsHguJd9U#scrollTo=DNH5pPynm-jm
The pytorch version uses half memory with twice gpu util compared with tf version.
Code:
import math
import os
import tensorflow as tf
import json
import numpy as np
import time
def calc_diffusion_step_embedding(diffusion_steps, diffusion_step_embed_dim_in):
assert diffusion_step_embed_dim_in % 2 == 0
half_dim = diffusion_step_embed_dim_in // 2
_embed = tf.math.log(tf.convert_to_tensor(10000.0)) / (half_dim - 1)
_embed = tf.math.exp(tf.cast(tf.experimental.numpy.arange(start = 0,stop = half_dim),dtype = tf.float32) * -_embed)
_embed = tf.cast(diffusion_steps,dtype=tf.float32) * _embed
diffusion_step_embed = tf.concat((tf.math.sin(_embed),
tf.math.cos(_embed)), 1)
assert diffusion_step_embed.shape[0] == diffusion_steps.shape[0]
assert diffusion_step_embed.shape[1] == diffusion_step_embed_dim_in
return diffusion_step_embed
class Residual_block(tf.keras.layers.Layer):
def __init__(self, res_channels, skip_channels,
diffusion_step_embed_dim_out, in_channels,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(Residual_block, self).__init__()
self.res_channels = res_channels
self.fc_t = tf.keras.layers.Dense(self.res_channels)
self.conv_layer = tf.keras.layers.Conv1D(filters=2 * self.res_channels, kernel_size=3, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.cond_conv = tf.keras.layers.Conv1D(filters=2*self.res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_conv1 = tf.keras.layers.Conv1D(filters=res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_conv2 = tf.keras.layers.Conv1D(filters=skip_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
def call(self, input_data):
x, cond, diffusion_step_embed = input_data
h = x
B, C, L = h.shape
part_t = self.fc_t(diffusion_step_embed)
part_t = tf.reshape(part_t,[B, self.res_channels, 1])
h = h + part_t
h = self.conv_layer(h)
cond = self.cond_conv(cond)
h += cond
out = tf.math.tanh(h[:,:self.res_channels,:]) * tf.math.sigmoid(h[:,self.res_channels:,:])
res = self.res_conv1(out)
skip = self.res_conv2(out)
return (x + res) * tf.math.sqrt(0.5), skip # normalize for training stability
class Residual_group(tf.keras.Model):
def __init__(self, res_channels, skip_channels, num_res_layers,
diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out,
in_channels,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(Residual_group, self).__init__()
self.num_res_layers = num_res_layers
self.diffusion_step_embed_dim_in = diffusion_step_embed_dim_in
self.fc_t1 = tf.keras.layers.Dense(diffusion_step_embed_dim_mid)
self.fc_t2 = tf.keras.layers.Dense(diffusion_step_embed_dim_out)
self.residual_blocks = []
for n in range(self.num_res_layers):
self.residual_blocks.append(Residual_block(res_channels, skip_channels,
diffusion_step_embed_dim_out=diffusion_step_embed_dim_out,
in_channels=in_channels,
s4_lmax=s4_lmax,
s4_d_state=s4_d_state,
s4_dropout=s4_dropout,
s4_bidirectional=s4_bidirectional,
s4_layernorm=s4_layernorm))
def call(self, input_data):
h, conditional, diffusion_steps = input_data
diffusion_step_embed = calc_diffusion_step_embedding(diffusion_steps, self.diffusion_step_embed_dim_in)
diffusion_step_embed = tf.keras.activations.swish((self.fc_t1(diffusion_step_embed)))
diffusion_step_embed = tf.keras.activations.swish((self.fc_t2(diffusion_step_embed)))
#out = self.residual_blocks((h, tf.zeros((8,256,248)),conditional, diffusion_step_embed))
#skip = out[1]
skip = tf.zeros((8,256,248))
for n in range(self.num_res_layers):
h, skip_n = self.residual_blocks[n]((h, conditional, diffusion_step_embed))
skip += skip_n
return skip * tf.math.sqrt(1.0 / self.num_res_layers)
class SSSDS4Imputer(tf.keras.Model):
def __init__(self, in_channels, res_channels, skip_channels, out_channels,
num_res_layers,
diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out,
s4_lmax,
s4_d_state,
s4_dropout,
s4_bidirectional,
s4_layernorm):
super(SSSDS4Imputer, self).__init__()
# convert the dimension of input from (B,in_channels,L) to (B,res_channels,L)
self.init_conv = tf.keras.layers.Conv1D(filters=res_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.res_channels = res_channels
self.skip_channels = skip_channels
self.residual_layer = Residual_group(res_channels=res_channels,
skip_channels=skip_channels,
num_res_layers=num_res_layers,
diffusion_step_embed_dim_in=diffusion_step_embed_dim_in,
diffusion_step_embed_dim_mid=diffusion_step_embed_dim_mid,
diffusion_step_embed_dim_out=diffusion_step_embed_dim_out,
in_channels=in_channels,
s4_lmax=s4_lmax,
s4_d_state=s4_d_state,
s4_dropout=s4_dropout,
s4_bidirectional=s4_bidirectional,
s4_layernorm=s4_layernorm)
# convert the dimension from (B,skip_channels,L) to (B,out_channels,L)
self.final_conv1 = tf.keras.layers.Conv1D(filters=skip_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='he_normal', data_format='channels_first')
self.final_conv2 = tf.keras.layers.Conv1D(filters=out_channels, kernel_size=1, padding = 'SAME',use_bias=False, kernel_initializer='zeros', data_format='channels_first')
def call(self, input_data):
x, conditional, mask, diffusion_steps = input_data
conditional = conditional * mask
conditional = tf.concat([conditional, tf.cast(mask,dtype=tf.float32)], axis=1)
x = tf.nn.relu(self.init_conv(x))
x = self.residual_layer((x, conditional, diffusion_steps))
y = tf.nn.relu(self.final_conv1(x))
y = tf.nn.relu(self.final_conv2(y))
return y
def train_step(X, y, net,loss_fn,optimizer):
with tf.GradientTape() as tape:
logits = net(X)
loss_value = loss_fn(y, logits)
grads = tape.gradient(loss_value, net.trainable_variables,unconnected_gradients=tf.UnconnectedGradients.ZERO)
optimizer.apply_gradients(zip(grads, net.trainable_variables))
return loss_value.numpy()
if __name__ == '__main__':
model_config = {'in_channels': 12, 'out_channels': 12, 'num_res_layers': 36, 'res_channels': 256, 'skip_channels': 256,
'diffusion_step_embed_dim_in': 128, 'diffusion_step_embed_dim_mid': 512, 'diffusion_step_embed_dim_out': 512,
's4_lmax': 250, 's4_d_state': 64, 's4_dropout': 0.0, 's4_bidirectional': 1, 's4_layernorm': 1}
net = SSSDS4Imputer(**model_config)
# define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
#training
n_iter = 0
#iterator = iter(dataset)
while n_iter < 150000 + 1:
#try:
X = (tf.random.normal([8,12,248], 0, 1, tf.float32, seed=1),tf.random.normal([8,12,248], 0, 1, tf.float32, seed=2),
tf.random.normal([8,12,248], 0, 1, tf.float32, seed=2),tf.random.normal([8,1], 0, 1, tf.float32, seed=4))
y = tf.random.normal([8,12,248], 0, 1, tf.float32, seed=1)
t0 = time.time()
loss = train_step(X,y,net,tf.keras.losses.MeanSquaredError(),optimizer)
print(time.time()-t0)
n_iter += 1
The tensorflow version I use is 2.4.1
I have a model based on MobileNet v2 with 2 outputs: class (cat/dog) and face coordinates. So "class" output has BinaryCrossentropy loss and "bbox" output has YOLO los (in code).
The problem is, when I try to apply metrics (accuracy for class and MeanIOU to bbox):
On random dataset: they show strange results (accuracy == 0, miou == 1 all the time).
On real dataset (images and labels): fit() throws error:
TypeError: '>' not supported between instances of 'NoneType' and 'int'
I suspect that I messed up outputs and metrics somehow, it would be nice if someone with more experience in tensorflow would take a look at it.
There is the code (with random dataset to reproduce):
import tensorflow as tf
# generate fake dataset
IMG_SIZE = 200
num_of_samples = 2000
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
train = tf.data.Dataset.zip((images, labels))
train = train.shuffle(num_of_samples)
train = train.batch(8)
train = train.prefetch(4)
num_of_samples = 500
images = tf.random.uniform((num_of_samples, IMG_SIZE, IMG_SIZE, 3), minval=0, maxval=1)
images = tf.data.Dataset.from_tensor_slices(images)
label_classes = tf.random.uniform((num_of_samples, 1), minval=0, maxval=2, dtype=tf.int32)
label_classes = tf.data.Dataset.from_tensor_slices(label_classes)
label_coords = tf.random.uniform((num_of_samples, 4), minval=0, maxval=1, dtype=tf.float16)
label_coords = tf.data.Dataset.from_tensor_slices(label_coords)
labels = tf.data.Dataset.zip((label_classes, label_coords))
valid = tf.data.Dataset.zip((images, labels))
valid = valid.shuffle(num_of_samples)
valid = valid.batch(8)
valid = valid.prefetch(4)
# Model with two outputs
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import MobileNetV2
def cnn_from_transfer():
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
input_layer = Input(shape=IMG_SHAPE)
base_net = MobileNetV2(include_top=False, weights='imagenet')(input_layer)
# Classification
h1 = GlobalMaxPooling2D()(base_net)
class1 = Dense(2048, activation='relu')(h1)
class2 = Dense(1, activation='sigmoid')(class1)
# Bounding box
h2 = GlobalMaxPooling2D()(base_net)
regress1 = Dense(2048, activation='relu')(h2)
regress2 = Dense(4, activation='sigmoid')(regress1)
return Model(inputs=input_layer, outputs=[class2, regress2])
model = cnn_from_transfer()
# Losses
def localization_loss(y_true, yhat):
delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2]))
h_true = y_true[:,3] - y_true[:,1]
w_true = y_true[:,2] - y_true[:,0]
h_pred = yhat[:,3] - yhat[:,1]
w_pred = yhat[:,2] - yhat[:,0]
delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
return delta_coord + delta_size
classloss = tf.keras.losses.BinaryCrossentropy()
regressloss = localization_loss
# Train
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, decay=0.00001),
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.Accuracy()], [tf.keras.metrics.MeanIoU(num_classes=2)]],
)
history = model.fit(train, epochs=5, validation_data=valid)
What am I doing wrong?
As M.Innat said in comments, tf MeanIoU is not applicable to my case (bboxes), so I need to make my own custom metric (iou_metric function below). Moreover I found out that BinaryAccuracy should be used instead of regular Accuracy. The rest is fine.
Here is correct code:
def iou_metric(y_true, y_pred):
aog = tf.abs(tf.transpose(y_true)[2] - tf.transpose(y_true)[0] + 1) * tf.abs(tf.transpose(y_true)[3] - tf.transpose(y_true)[1] + 1)
aop = tf.abs(tf.transpose(y_pred)[2] - tf.transpose(y_pred)[0] + 1) * tf.abs(tf.transpose(y_pred)[3] - tf.transpose(y_pred)[1] + 1)
overlap_0 = tf.maximum(tf.transpose(y_true)[0], tf.transpose(y_pred)[0])
overlap_1 = tf.maximum(tf.transpose(y_true)[1], tf.transpose(y_pred)[1])
overlap_2 = tf.minimum(tf.transpose(y_true)[2], tf.transpose(y_pred)[2])
overlap_3 = tf.minimum(tf.transpose(y_true)[3], tf.transpose(y_pred)[3])
intersection = (overlap_2 - overlap_0 + 1) * (overlap_3 - overlap_1 + 1)
union = aog + aop - intersection
iou = intersection / union
iou = tf.keras.backend.clip(iou, 0.0 + tf.keras.backend.epsilon(), 1.0 - tf.keras.backend.epsilon())
return iou
model.compile(
optimizer = opt,
loss=[classloss, regressloss],
metrics=[[tf.keras.metrics.BinaryAccuracy()], [iou_metric]],
)
The Keras code runs perfectly, and the loss is close to zero. The input data is xData, the labeled data is yTrainData.
xData = np.reshape(xData, (-1, 1, sendLengthG * 4))
yTrainData = np.reshape(yTrainData, (-1, sendLengthG, sentComponentTypeCount))
model = k.models.Sequential()
model.add(k.layers.Dense(512, input_shape=(1, sendLengthG * 4), activation='tanh'))
model.add(k.layers.Dense(sendLengthG * sentComponentTypeCount, activation='linear'))
model.add(k.layers.Reshape([sendLengthG, sentComponentTypeCount]))
model.add(k.layers.Dense(sentComponentTypeCount, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='RMSProp', metrics=['accuracy'])
model.summary()
model.fit(xData, yTrainData, epochs=roundCount, batch_size=1, verbose=2)
I've written some TensorFlow code like below, but cannot reduce the loss under 0.011
x = tf.placeholder(dtype=tf.float32)
yTrain = tf.placeholder(dtype=tf.float32)
x1 = tf.reshape(x, shape=[1, sendLengthG * 4])
nodeCount1 = 18
w1 = tf.Variable(tf.random_normal([sendLengthG * 4, nodeCount1], mean=0.5, stddev=0.1), dtype=tf.float32)
b1 = tf.Variable(tf.zeros([nodeCount1]), dtype=tf.float32)
n1 = tf.nn.tanh(tf.matmul(x1, w1) + b1)
nodeCount2 = 21
w2 = tf.Variable(tf.random_normal([nodeCount1, nodeCount2], mean=1.5, stddev=0.1), dtype=tf.float32)
b2 = tf.Variable(tf.zeros([nodeCount2]), dtype=tf.float32)
n2 = tf.nn.tanh(tf.matmul(n1, w2) + b2)
wn = tf.Variable(tf.random_normal([nodeCount2, sendLengthG * sentComponentTypeCount], mean=0.5, stddev=0.1), dtype=tf.float32)
bn = tf.Variable(tf.zeros([sendLengthG * sentComponentTypeCount]), dtype=tf.float32)
y = tf.matmul(n2, wn) + bn
yResult = tf.nn.softmax(tf.reshape(y, [sendLengthG, -1]))
loss = -tf.reduce_mean(yTrain * tf.log(tf.clip_by_value(yResult, 1e-10, 1.0)))
optimizer = tf.train.RMSPropOptimizer(learnRate)
train = optimizer.minimize(loss)
I build up a CNN in the following structure. The inputs are 32 * 32 * 3 pictures and a one-hot label of 10 categories.
inputs = tf.placeholder(tf.float32, [None, 32, 32, 3], name = "input")
targets = tf.placeholder(tf.float32, [None, 10], name = "targets")
layer_1_filter = tf.layers.conv2d(inputs = inputs,
filters = 64,
kernel_size = (2, 2),
strides = (1, 1),
padding = "same",
activation= tf.nn.relu)
layer_2_pooling = tf.layers.max_pooling2d(inputs = layer_1_filter,
pool_size = (2 * 2),
strides =1 * 1,
padding = 'same')
layer_3_filter = tf.layers.conv2d(inputs = layer_2_pooling,
filters = 128,
kernel_size = (4, 4),
strides = (1, 1),
padding = "same",
activation= tf.nn.relu)
layer_4_pooling = tf.layers.max_pooling2d(inputs = layer_3_filter,
pool_size = (2 * 2),
strides = 1 * 1,
padding = 'same')
sha = np.prod(layer_4_pooling.get_shape().as_list()[1:])
layer_5_reshape = tf.reshape(tensor= layer_4_pooling,
shape = [-1, sha])
layer_6_fc = tf.contrib.layers.fully_connected(inputs = layer_5_reshape,
num_outputs = 1024)
layer_6_fc = tf.nn.dropout(layer_6_fc, keep_prob) # Faster with drop out
layer_7_fc2 = tf.contrib.layers.fully_connected(inputs = layer_6_fc,
num_outputs = 512)
layer_8_fc3 = tf.contrib.layers.fully_connected(inputs = layer_7_fc2,
num_outputs = 10)
layer_9_logit = tf.identity(input = layer_8_fc3,
name = "logistic")
And I define my cost and optimizer as:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=layer_9_logit, labels=targets))
optimizer = tf.train.AdamOptimizer().minimize(cost)
When I run it, the cost is always around a certain number: 2.30. I tried it several times and it always converge to it.
count = 0
with tf.Session() as sess:
print(info)
sess.run(tf.global_variables_initializer())
for batch_i in range(img_shape[0] // batch_size - 1):
feature_batch = picture[batch_i * batch_size: (batch_i + 1) * batch_size]
label_batch = label[batch_i * batch_size: (batch_i + 1) * batch_size]
train_loss, _ = sess.run([cost, optimizer],
feed_dict={inputs: feature_batch,
targets: label_batch})
if (count % 10 == 0):
print(str(count) + ' | Train Loss {:.8f}'.format(train_loss))
count += 1
output
0 | Train Loss 37.51004410
10 | Train Loss 2.30226469
20 | Train Loss 2.30263376
30 | Train Loss 2.30258608
40 | Train Loss 2.30258536
50 | Train Loss 2.30265045
60 | Train Loss 2.35271192
70 | Train Loss 2.30241871
May I ask why and how to fix it? Thanks a lot
I am learning Tensorflow. Following is my code for MLP with TensorFlow. I have some issues with mismatching of data dimentions.
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
wholedataset = np.load('C:/Users/pourya/Downloads/WholeTrueData.npz')
data = wholedataset['wholedata'].astype('float32')
label = wholedataset['wholelabel'].astype('float32')
height = wholedataset['wholeheight'].astype('float32')
print(type(data[20,1,1,0]))
learning_rate = 0.001
training_iters = 5
display_step = 20
n_input = 3375
X = tf.placeholder("float32")
Y = tf.placeholder("float32")
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 2, 1])),
'wd1': tf.Variable(tf.random_normal([3, 3, 1, 1]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([1])),
'out': tf.Variable(tf.random_normal([1,50,50,1]))
}
mnist= data
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 2
batch_size = 100
x = tf.placeholder('float', shape = [None,50,50,2])
shape = x.get_shape().as_list()
dim = np.prod(shape[1:])
x_reshaped = tf.reshape(x, [-1, dim])
y = tf.placeholder('float', shape= [None,50,50,2])
shape = y.get_shape().as_list()
dim = np.prod(shape[1:])
y_reshaped = tf.reshape(y, [-1, dim])
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([5000,
n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1,
n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2,
n_nodes_hl3])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3,
n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes])),}
l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']),
hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']),
hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weights']),
hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3,output_layer['weights']) + output_layer['biases']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(n_input/batch_size)):
epoch_x = wholedataset['wholedata'].astype('float32')
epoch_y = wholedataset['wholedata'].astype('float32')
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y:
epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out
of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:mnist.test.images,
y:mnist.test.labels}))
train_neural_network(x)
I got the following error:
ValueError: Cannot feed value of shape (3375, 50, 50, 2) for Tensor 'Reshape:0', which has shape '(?, 5000)'
Does anyone know what is the issue with my code, and how can I fix it?
The data value is (3375, 50, 50, 2)
Thank you for anyone's input!
I think that the problem is that you use the same variable name x for the placeholder and the reshape, in lines
x = tf.placeholder('float', shape = [None,50,50,2])
and
x = tf.reshape(x, [-1, dim])
so that when you
feed_dict={x: your_val}
you are feeding the output of the reshape operation.
You should have different names, for instance
x_placeholder = tf.placeholder('float', shape = [None,50,50,2])
x_reshaped = tf.reshape(x, [-1, dim])
and then
feed_dict={x_placeholder: your_val}