TF CuDNNLSTM minimum value is always higher than 0, even when train data was 0... Like it was shifted + 5 - tensorflow

Basically I've tried this code
np.random.seed(7)
dataframe = read_csv('c:/data/suicides.csv', usecols=[1], engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
# Initialising the RNN
#regressor = Sequential()
model = Sequential()
# # In[25]:
# # Adding the first LSTM layer and some Dropout regularisation
model.add(CuDNNLSTM(units = 10, return_sequences = True, input_shape = (trainX.shape[1], 1)))
model.add(Dropout(0.1))
# # In[26]:
# # Adding a second LSTM layer and some Dropout regularisation
model.add(CuDNNLSTM(units = 5, return_sequences = True))
model.add(Dropout(0.1))
# # In[27]:
# # Adding a third LSTM layer and some Dropout regularisation
model.add(CuDNNLSTM(units = 4, return_sequences = True))
model.add(Dropout(0.1))
# # In[28]:
# ## Adding a fourth LSTM layer and some Dropout regularisation
model.add(CuDNNLSTM(units = 2))
model.add(Dropout(0.2))
# # In[29]:
# # Adding the output layer
model.add(Dense(units = 1))
# # In[30]:
# # Compiling the RNN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# # In[33]:
# #epoch = [10, 15, 20, 25, 30, 35, 40, 45, 50]
# # Fitting the RNN to the Training set
model.fit(trainX, trainY, epochs = _epoch, batch_size = _batch)
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
Unfortunately for some reason while there are long time series of 0, it won't predict zero.
Ever. Min/max for train / test data starts with 0, but predict data always is like 5-6 for min. value.
Train/test data is from 0 - ~ 40
I've tried different settings, number of epochs, Activation, optimizer, loss but always min. value for predict data > ~ 15% of max train value....

For some reason batch > 1 in time series was generating too high minimum predictions.
Solved by modification of a batch settings + model training settings.
So if you have predictions that are > 0, then probably there is something wrong with training settings.

Related

Dimensions must be equal, but are 2 and 3 for node binary_crossentropy/mul

I was checking the code I found here, the example at Multivariate Multi-Step LSTM Models - > Multiple Input Multi-Step Output.
I altered the code and used binary_crossentropy and sigmoid activation for the last layer.
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
X, y = list(), list()
for i in range(len(sequences)):
# find the end of this pattern
end_ix = i + n_steps_in
out_end_ix = end_ix + n_steps_out-1
# check if we are beyond the dataset
if out_end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
# define input sequence
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
# choose a number of time steps
n_steps_in, n_steps_out = 3, 3
# convert into input/output
X, y = split_sequences(dataset, n_steps_in, n_steps_out)
n_features = X.shape[2]
# define model
model = Sequential()
model.add((LSTM(5, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features))))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# fit model
model.fit(X, y, epochs=20, verbose=0, batch_size=1)
The above code runs fine. But, when I try to change the n_steps_in, n_steps_out and use for example: n_steps_in, n_steps_out = 3, 2, it gives:
ValueError: Dimensions must be equal, but are 2 and 3 for '{{node binary_crossentropy/mul}} = Mul[T=DT_FLOAT](binary_crossentropy/Cast, binary_crossentropy/Log)' with input shapes: [1,2], [1,3].
Why this error comes up and how can I overcome this?
this is because your network is build to output 3D sequences of shape (None, 3, 1) while your targets have shape (None, 2, 1)
The best and automated way to handle this situation correctly is to build an encoder-decoder structure... Below the example:
model = Sequential()
model.add(LSTM(5, activation='relu', return_sequences=False,
input_shape=(n_steps_in, n_features))) # ENCODER
model.add(RepeatVector(n_steps_out))
model.add(LSTM(5, activation='relu', return_sequences=True)) # DECODER
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=20, batch_size=1)

Mean of Tensorflow Keras's Glorot Normal Initializer is not zero

As per the documentation of Glorot Normal, mean of the Normal Distribution of the Initial Weights should be zero.
Draws samples from a truncated normal distribution centered on 0
But it doesn't seem to be zero, am I missing something?
Please find the code below:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
print(tf.__version__)
initializer = tf.keras.initializers.GlorotNormal(seed = 1234)
model = Sequential([Dense(units = 3, input_shape = [1], kernel_initializer = initializer,
bias_initializer = initializer),
Dense(units = 1, kernel_initializer = initializer,
bias_initializer = initializer)])
batch_size = 1
x = np.array([-1.0, 0, 1, 2, 3, 4.0], dtype = 'float32')
y = np.array([-3, -1.0, 1, 3.0, 5.0, 7.0], dtype = 'float32')
x = np.reshape(x, (-1, 1))
# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
train_dataset = train_dataset.shuffle(buffer_size=64).batch(batch_size)
epochs = 1
learning_rate=1e-3
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
for epoch in range(epochs):
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = tf.keras.losses.MSE(y_batch_train, logits)
Initial_Weights_1st_Hidden_Layer = model.trainable_weights[0]
Mean_Weights_Hidden_Layer = tf.reduce_mean(Initial_Weights_1st_Hidden_Layer)
Initial_Weights_Output_Layer = model.trainable_weights[2]
Mean_Weights_Output_Layer = tf.reduce_mean(Initial_Weights_Output_Layer)
Initial_Bias_1st_Hidden_Layer = model.trainable_weights[1]
Mean_Bias_Hidden_Layer = tf.reduce_mean(Initial_Bias_1st_Hidden_Layer)
Initial_Bias_Output_Layer = model.trainable_weights[3]
Mean_Bias_Output_Layer = tf.reduce_mean(Initial_Bias_Output_Layer)
if epoch ==0 and step==0:
print('\n Initial Weights of First-Hidden Layer = ', Initial_Weights_1st_Hidden_Layer)
print('\n Mean of Weights of Hidden Layer = %s' %Mean_Weights_Hidden_Layer.numpy())
print('\n Initial Weights of Second-Hidden/Output Layer = ', Initial_Weights_Output_Layer)
print('\n Mean of Weights of Output Layer = %s' %Mean_Weights_Output_Layer.numpy())
print('\n Initial Bias of First-Hidden Layer = ', Initial_Bias_1st_Hidden_Layer)
print('\n Mean of Bias of Hidden Layer = %s' %Mean_Bias_Hidden_Layer.numpy())
print('\n Initial Bias of Second-Hidden/Output Layer = ', Initial_Bias_Output_Layer)
print('\n Mean of Bias of Output Layer = %s' %Mean_Bias_Output_Layer.numpy())
Because you don't draw too many samples from that distribution.
initializer = tf.keras.initializers.GlorotNormal(seed = 1234)
mean = tf.reduce_mean(initializer(shape=(1, 3))).numpy()
print(mean) # -0.29880756
But if you increase the samples:
initializer = tf.keras.initializers.GlorotNormal(seed = 1234)
mean = tf.reduce_mean(initializer(shape=(1, 500))).numpy()
print(mean) # 0.003004579
Same thing applies for your example too. If you increase first dense layer's units to 500, you should see 0.003004579 with same seed.

Keras Input Layer Shape On Input Layer Error

I am trying to learn ai algorithms by building. I found a question on Stackoverflow which is here.
I copied this code to try it out, and then modified it to this.
import numpy as np
import tensorflow as tf
from tensorflow import keras as keras
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.python.keras import activations
# Importing the dataset
dataset = np.genfromtxt("data.txt", delimiter='')
X = dataset[:, :-1]
y = dataset[:, -1]
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.08, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Initialising the ANN
#model = Sequential()
# Adding the input layer and the first hidden layer
#model.add(Dense(32, activation = 'relu', input_dim = 6))
# Adding the second hidden layer
#model.add(Dense(units = 32, activation = 'relu'))
# Adding the third hidden layer
#model.add(Dense(units = 32, activation = 'relu'))
# Adding the output layer
#model.add(Dense(units = 1))
#model = Sequential([
# keras.Input(shape= (6),name= "digits"),
# Dense(units = 32, activation = "relu"),
# Dense(units = 32, activation = "relu"),
# Dense(units = 1 , name = "predict")##
#])
#
input = keras.Input(shape= (6),name= "digits")
#x0 = Dense(units = 6)(input)
x1 = Dense(units = 32, activation = "relu")(input)
x2 = Dense(units = 32, activation = "relu")(x1)
output = Dense(units = 1 , name = "predict")(x2)
model = keras.Model(inputs = input , outputs= output)
#model.add(Dense(1))
# Compiling the ANN
#model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Fitting the ANN to the Training set
#model.fit(X_train, y_train, batch_size = 10, epochs = 200)
optimizer = keras.optimizers.Adam(learning_rate=1e-3)
loss = keras.losses.MeanSquaredError()
epochs = 200
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
# Iterate over the batches of the dataset.
for step in range(len(X_train)):
# Open a GradientTape to record the operations run
# during the forward pass, which enables auto-differentiation.
with tf.GradientTape() as tape:
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
logits = model( X_train[step] , training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = loss(y_train[step], logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
# Log every 200 batches.
if step % 200 == 0:
print(
"Training loss (for one batch) at step %d: %.4f"
% (step, float(loss_value))
)
print("Seen so far: %s samples" % ((step + 1) * 64))
y_pred = model.predict(X_test)
plt.plot(y_test, color = 'red', label = 'Real data')
plt.plot(y_pred, color = 'blue', label = 'Predicted data')
plt.title('Prediction')
plt.legend()
plt.show()
I modified the code for creating data when processing. If I use model.fit, it uses data I have given but I wanted to when epochs start to create data from a simulation and then process it.(sorry for bad english. if i couldn't explain very well)
When I start code in line 81:
Exception has occurred: ValueError
Input 0 of layer dense is incompatible with the layer: : expected min_ndim=2, found ndim=1. Full shape received: (6,)
It gives an Exception. I tried to use shape=(6,) shape=(6,1) or similar to this but it doesn't fix anything.
You need to add a batch dimension when calling the keras model:
logits = model( X_train[step][np.newaxis,:] , training=True) # Logits for this minibatch
A batch dimension is used to feed multiple samples to the network. By default, Keras assumes that the input has a batch dimension. To feed one sample, Keras expects a batch of 1 sample. In that case, it means a shape of (1,6). If you want to feed a batch of 2 samples, then the shape will be (2,6), etc.

How to stop training CNN part while continue training ANN part in a Multi-input Model?

I made a multi-input model in Keras which takes image shape=[N, 640, 480, 3] as well as numerical data shape=[N, 19] and does prediction on 12 classes.
Following is the model defining part of code:
# # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# # MODEL === CNN
# # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
base_model = keras.applications.ResNet50(
weights='imagenet', # Load weights pre-trained on ImageNet.
input_shape=(640, 480, 3),
include_top=False) # Do not include the ImageNet classifier at the top.
base_model.trainable = False
input_Cnn = keras.Input(shape=(640, 480, 3))
x = base_model(input_Cnn, training=False)
# Convert features of shape `base_model.output_shape[1:]` to vectors
x = keras.layers.GlobalAveragePooling2D()(x)
# A Dense classifier with a single unit (binary classification)
x1 = keras.layers.Dense(1024, activation="relu")(x)
out_Cnn = keras.layers.Dense(12, activation="relu")(x1)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# MODEL === NN
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
inp_num = keras.layers.Input(shape=(19,)) # no. of columns of the numerical data
fc1 = keras.layers.Dense(units=2 ** 6, activation="relu")(inp_num)
fc2 = keras.layers.Dense(units=2 ** 8, activation="relu")(fc1)
fc3 = keras.layers.Dense(units=2 ** 10, activation="relu")(fc2)
fc4 = keras.layers.Dense(units=2 ** 8, activation="relu")(fc3)
fc5 = keras.layers.Dense(units=2 ** 6, activation="relu")(fc4)
out_NN = keras.layers.Dense(12, activation="relu")(fc5)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# CONCATENATION
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
result = keras.layers.concatenate((out_Cnn, out_NN), axis=-1) # [N, 12] --- concatenate [N, 12] ==> [N, 24]
result = keras.layers.Dense(1024, activation='relu')(result)
result = keras.layers.Dense(units=12, activation="softmax")(result)
model = keras.Model([input_Cnn, inp_num], result)
print(model.summary())
Problem is that the CNN part (if independently trained) trains in a less number of epochs while the ANN part (if independently trained) takes a longer time (more epochs). But here in this code when both are combined, accuracy doesn't go beyond 10%. Is there any way to stop gradients flowing into the CNN part after a certain number of epochs so that after that model trains only the ANN part?
Im not using keras but after a quick google search this should be the answer:
You can freeze layers, so that certain parameters are not learnable anymore:
# this freezes the first N layers
for layer in model.layers[:N]:
layer.trainable = False
Where N is the amount of convolutional layers you have.

logits and labels must be same size logits_size

hi i used my own dataset for train the model but i have error that i mention below . my dataset has 124 class and lables are 0 to 123 , size is 60*60 gray , batch is 10 and result is :
lables.eval() --> [ 1 101 101 103 103 103 103 100 102 1] -- len(lables.eval())= 10
orginal pic size -- > (?, 60, 60, 1)
First convolutional layer (?, 30, 30, 32)
Second convolutional layer. (?, 15, 15, 64)
flatten. (?, 14400)
dense .1 (?, 2048)
dense .2 (?, 124)
error
ensorflow.python.framework.errors_impl.InvalidArgumentError: logits and
labels must have the same first dimension, got logits shape [40,124] and
labels shape [10]
code
def model_fn(features, labels, mode, params):
# Reference to the tensor named "image" in the input-function.
x = features["image"]
# The convolutional layers expect 4-rank tensors
# but x is a 2-rank tensor, so reshape it.
net = tf.reshape(x, [-1, img_size, img_size, num_channels])
# First convolutional layer.
net = tf.layers.conv2d(inputs=net, name='layer_conv1',
filters=32, kernel_size=3,
padding='same', activation=tf.nn.relu)
net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)
# Second convolutional layer.
net = tf.layers.conv2d(inputs=net, name='layer_conv2',
filters=64, kernel_size=3,
padding='same', activation=tf.nn.relu)
net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=2)
# Flatten to a 2-rank tensor.
net = tf.contrib.layers.flatten(net)
# Eventually this should be replaced with:
# net = tf.layers.flatten(net)
# First fully-connected / dense layer.
# This uses the ReLU activation function.
net = tf.layers.dense(inputs=net, name='layer_fc1',
units=2048, activation=tf.nn.relu)
# Second fully-connected / dense layer.
# This is the last layer so it does not use an activation function.
net = tf.layers.dense(inputs=net, name='layer_fc_2',
units=num_classes)
# Logits output of the neural network.
logits = net
y_pred = tf.nn.softmax(logits=logits)
y_pred_cls = tf.argmax(y_pred, axis=1)
if mode == tf.estimator.ModeKeys.PREDICT:
spec = tf.estimator.EstimatorSpec(mode=mode,
predictions=y_pred_cls)
else:
cross_entropy =
tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
logits=logits)
loss = tf.reduce_mean(cross_entropy)
optimizer =
tf.train.AdamOptimizer(learning_rate=params["learning_rate"])
train_op = optimizer.minimize(
loss=loss, global_step=tf.train.get_global_step())
metrics = \
{
"accuracy": tf.metrics.accuracy(labels, y_pred_cls)
}
spec = tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
return spec`
this lables comes from here via tfrecords:
def input_fn(filenames, train, batch_size=10, buffer_size=2048):
# Args:
# filenames: Filenames for the TFRecords files.
# train: Boolean whether training (True) or testing (False).
# batch_size: Return batches of this size.
# buffer_size: Read buffers of this size. The random shuffling
# is done on the buffer, so it must be big enough.
# Create a TensorFlow Dataset-object which has functionality
# for reading and shuffling data from TFRecords files.
dataset = tf.data.TFRecordDataset(filenames=filenames)
# Parse the serialized data in the TFRecords files.
# This returns TensorFlow tensors for the image and labels.
dataset = dataset.map(parse)
if train:
# If training then read a buffer of the given size and
# randomly shuffle it.
dataset = dataset.shuffle(buffer_size=buffer_size)
# Allow infinite reading of the data.
num_repeat = None
else:
# If testing then don't shuffle the data.
# Only go through the data once.
num_repeat = 1
# Repeat the dataset the given number of times.
dataset = dataset.repeat(num_repeat)
# Get a batch of data with the given size.
dataset = dataset.batch(batch_size)
# Create an iterator for the dataset and the above modifications.
iterator = dataset.make_one_shot_iterator()
# Get the next batch of images and labels.
images_batch, labels_batch = iterator.get_next()
# The input-function must return a dict wrapping the images.
x = {'image': images_batch}
y = labels_batch
print(x, ' - ', y.get_shape())
return x, y
i generate labeles via this code for example image name=math-1 , lable = 1
def get_lable_and_image(path):
lbl = []
img = []
for filename in glob.glob(os.path.join(path, '*.png')):
img.append(filename)
lable = filename[41:].split()[0].split('-')[1]
lbl.append(int(lable))
lables = np.array(lbl)
images = np.array(img)
# print(images[1], lables[1])
return images, lables
i push images and lables to create tfrecords
def convert(image_paths, labels, out_path):
# Args:
# image_paths List of file-paths for the images.
# labels Class-labels for the images.
# out_path File-path for the TFRecords output file.
print("Converting: " + out_path)
# Number of images. Used when printing the progress.
num_images = len(image_paths)
# Open a TFRecordWriter for the output-file.
with tf.python_io.TFRecordWriter(out_path) as writer:
# Iterate over all the image-paths and class-labels.
for i, (path, label) in enumerate(zip(image_paths, labels)):
# Print the percentage-progress.
print_progress(count=i, total=num_images-1)
# Load the image-file using matplotlib's imread function.
img = imread(path)
# Convert the image to raw bytes.
img_bytes = img.tostring()
# Create a dict with the data we want to save in the
# TFRecords file. You can add more relevant data here.
data = \
{
'image': wrap_bytes(img_bytes),
'label': wrap_int64(label)
}
# Wrap the data as TensorFlow Features.
feature = tf.train.Features(feature=data)
# Wrap again as a TensorFlow Example.
example = tf.train.Example(features=feature)
# Serialize the data.
serialized = example.SerializeToString()
# Write the serialized data to the TFRecords file.
writer.write(serialized)