I wrote a simple code to train a neural network with Keras model.fit() with generator. It is important for me to know
how many times the model.fit() calls the generator??
It seems that the generator is called one time before the first epoch and I don't know why?? This one extra call is visible in the attached figure --> 1
Here is my simple code.
import numpy
from keras.models import Model
from keras import layers, losses
from tensorflow.keras import optimizers
input_layer = layers.Input(shape=(256,))
x_layer = layers.BatchNormalization()(input_layer)
x_layer = layers.Dense(500, activation='relu')(input_layer)
x_layer = layers.BatchNormalization()(x_layer)
x_layer = layers.Dense(250, activation='relu')(x_layer)
x_layer = layers.BatchNormalization()(x_layer)
x_layer = layers.Dense(120, activation='relu')(x_layer)
x_layer = layers.BatchNormalization()(x_layer)
output_layer = layers.Dense(16, activation='sigmoid')(x_layer)
NN_model = Model(inputs=input_layer, outputs=output_layer)
NN_model.compile(optimizer=optimizers.RMSprop(), loss=losses.mean_squared_error)
NN_model.summary()
def training_generator(number_of_samples):
while True:
print('\n ---------- One Refer to This File')
NN_inputs = []
NN_labels = []
for _ in range(number_of_samples):
NN_inputs.append(numpy.arange(256))
NN_labels.append(numpy.arange(16))
yield numpy.asarray(NN_inputs), numpy.asarray(NN_labels)
NN_history = NN_model.fit(training_generator(1000),
steps_per_epoch=4,
epochs=10,
verbose=2)
Related
I am using TF2 (2.3.0) NN to approximate the function y which solves the ODE: y'+3y=0
I have defined cutsom loss class and function in which I am trying to differentiate the single output with respect to the single input so the equation holds, provided that y_true is zero:
from tensorflow.keras.losses import Loss
import tensorflow as tf
class CustomLossOde(Loss):
def __init__(self, x, model, name='ode_loss'):
super().__init__(name=name)
self.x = x
self.model = model
def call(self, y_true, y_pred):
with tf.GradientTape() as tape:
tape.watch(self.x)
y_p = self.model(self.x)
dy_dx = tape.gradient(y_p, self.x)
loss = tf.math.reduce_mean(tf.square(dy_dx + 3 * y_pred - y_true))
return loss
but running the following NN:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from custom_loss_ode import CustomLossOde
num_samples = 1024
x_train = 4 * (tf.random.uniform((num_samples, )) - 0.5)
y_train = tf.zeros((num_samples, ))
inputs = Input(shape=(1,))
x = Dense(16, 'tanh')(inputs)
x = Dense(8, 'tanh')(x)
x = Dense(4)(x)
y = Dense(1)(x)
model = Model(inputs=inputs, outputs=y)
loss = CustomLossOde(model.input, model)
model.compile(optimizer=Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99),loss=loss)
model.run_eagerly = True
model.fit(x_train, y_train, batch_size=16, epochs=30)
for now I am getting 0 loss from the fisrt epoch, which doesn't make any sense.
I have printed both y_true and y_test from within the function and they seem OK so I suspect that the problem is in the gradien which I didn't succeed to print.
Apprecitate any help
Defining a custom loss with the high level Keras API is a bit difficult in that case. I would instead write the training loop from scracth, as it allows a finer grained control over what you can do.
I took inspiration from those two guides :
Advanced Automatic Differentiation
Writing a training loop from scratch
Basically, I used the fact that multiple tape can interact seamlessly. I use one to compute the loss function, the other to calculate the gradients to be propagated by the optimizer.
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
num_samples = 1024
x_train = 4 * (tf.random.uniform((num_samples, )) - 0.5)
y_train = tf.zeros((num_samples, ))
inputs = Input(shape=(1,))
x = Dense(16, 'tanh')(inputs)
x = Dense(8, 'tanh')(x)
x = Dense(4)(x)
y = Dense(1)(x)
model = Model(inputs=inputs, outputs=y)
# using the high level tf.data API for data handling
x_train = tf.reshape(x_train,(-1,1))
dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train)).batch(1)
opt = Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.99)
for step, (x,y_true) in enumerate(dataset):
# we need to convert x to a variable if we want the tape to be
# able to compute the gradient according to x
x_variable = tf.Variable(x)
with tf.GradientTape() as model_tape:
with tf.GradientTape() as loss_tape:
loss_tape.watch(x_variable)
y_pred = model(x_variable)
dy_dx = loss_tape.gradient(y_pred, x_variable)
loss = tf.math.reduce_mean(tf.square(dy_dx + 3 * y_pred - y_true))
grad = model_tape.gradient(loss, model.trainable_variables)
opt.apply_gradients(zip(grad, model.trainable_variables))
if step%20==0:
print(f"Step {step}: loss={loss.numpy()}")
am working on a classification problem for binary classes, I have finished the training and testing the model in single images now using the below code
import warnings
import time
from urllib.request import urlopen
import os
import urllib.request
start_time = time.time()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
import numpy as np
from keras.preprocessing.image import img_to_array, load_img
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.applications.vgg16 import VGG16
import tensorflow as tf
import logging
logging.getLogger('tensorflow').disabled = True
img_size = 224
class PersonPrediction:
def __init__(self):
self.class_dictionary = np.load(
'class_indices_vgg.npy',
allow_pickle=True).item()
self.top_model_weights_path = 'v2/weights/bottleneck_fc_model_2020-10-10-05.h5'
self.num_classes = len(self.class_dictionary)
self.model = self.create_model(self.num_classes)
self.graph = tf.compat.v1.get_default_graph()
def create_model(self, num_of_cls):
model = Sequential()
vgg_model = VGG16(include_top=False, weights='imagenet', input_shape=(img_size, img_size, 3))
for layer in vgg_model.layers[:-4]:
layer.trainable = False
model.add(vgg_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
return model
def predict(self, path=None, file_name=None):
if path:
image_path = path
path = self.url_to_image(image_path)
else:
path = os.path.join('imgs', file_name)
print("[INFO] loading and preprocessing image...")
image = load_img(path, target_size=(224, 224))
image = img_to_array(image)
# important! otherwise the predictions will be '0'
image = image / 255
image = np.expand_dims(image, axis=0)
label_idx = self.model.predict_classes(image)[0][0]
probability = self.model.predict(image)[0]
inv_map = {v: k for k, v in self.class_dictionary.items()}
label = inv_map[label_idx]
return label, probability[0]
path = 'temp.jpg'
tax_model = PersonPrediction()
label, proba = tax_model.predict(
file_name='frame303.jpg')
print(label, proba)
Problem is I keep getting chaning predictions of both label and accuracy every time I rerun the code, am not sure what is causing that
There are a number of sources that create randomness in the results when training a model. First the weights are randomly initialized so your model is starting from a different point in N space (N is the number of trainable parameters). Second layers like dropout have randomness in terms of which nodes will be nodes will be selected. Some GPU processes particularly with multi-processing can also have some degree of randomness. I have seen a number of posts on getting repeatable results in tensorflow but I have not found one that seems to really work. In general though the results should be reasonably close if your model is working correctly and you run enough epochs. Now once the model is trained and you use it for predictions as long as you use the same trained model you should get identical prediction results.
I have the following custom tf function:
import tensorflow as tf
#tf.function
def top10_accuracy_scorer(y_true, y_pred):
values, indeces = tf.math.top_k(y_pred, 10)
lab_indeces_tensor = tf.argmax(y_true,1)
lab_indeces_tensor = tf.reshape(lab_indeces_tensor,
shape=(tf.shape(lab_indeces_tensor)[0],1))
lab_indeces_tensor = tf.dtypes.cast(lab_indeces_tensor,dtype=tf.int32)
equal_tensor = tf.equal(lab_indeces_tensor, indeces)
sum_tensor = tf.reduce_sum(tf.cast(equal_tensor, tf.float32))
top10_accuracy = sum_tensor/tf.cast(tf.shape(lab_indeces_tensor)[0], tf.float32)
return top10_accuracy
It works fine as a metric in my model but when I try to use it as a loss function I get the error: ValueError: No gradients provided for any variable. Obviously, some part of it is not differentiable but I cannot figure out how to fix it. Any help is appreciated.
Working example:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
X_temp = np.random.uniform(0,1,(1000,100))
y_temp = np.random.uniform(0,1,(1000,10))
y_temp = np.argmax(y_temp, axis=1)
y_temp = tf.keras.utils.to_categorical(y_temp)
model = Sequential()
model.add(Dense(y_temp.shape[1], input_shape = (X_temp.shape[1],), activation='softmax'))
model.compile(optimizer='adam',
loss=top10_accuracy_scorer,
metrics=['accuracy'])
model.fit(X_temp, y_temp)
This isn't really a question that's code-specific, but I haven't been able to find any answers or resources.
I'm currently trying to teach myself some "pure" TensorFlow rather than just using Keras, and I felt that it would be very helpful if there were some sources where they have TensorFlow code and the equivalent Keras code side-by-side for comparison.
Unfortunately, most of the results I find on the Internet talk about performance-wise differences or have very simple comparison examples (e.g. "and so this is why Keras is much simpler to use"). I'm not so much interested in those details as much as I am in the code itself.
Does anybody know if there are any resources out there that could help with this?
Here you have two models, in Tensorflow and in Keras, that are correspondent:
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
Tensorflow
X = tf.placeholder(dtype=tf.float64)
Y = tf.placeholder(dtype=tf.float64)
num_hidden=128
# Build a hidden layer
W_hidden = tf.Variable(np.random.randn(784, num_hidden))
b_hidden = tf.Variable(np.random.randn(num_hidden))
p_hidden = tf.nn.sigmoid( tf.add(tf.matmul(X, W_hidden), b_hidden) )
# Build another hidden layer
W_hidden2 = tf.Variable(np.random.randn(num_hidden, num_hidden))
b_hidden2 = tf.Variable(np.random.randn(num_hidden))
p_hidden2 = tf.nn.sigmoid( tf.add(tf.matmul(p_hidden, W_hidden2), b_hidden2) )
# Build the output layer
W_output = tf.Variable(np.random.randn(num_hidden, 10))
b_output = tf.Variable(np.random.randn(10))
p_output = tf.nn.softmax( tf.add(tf.matmul(p_hidden2, W_output), b_output) )
loss = tf.reduce_mean(tf.losses.mean_squared_error(
labels=Y,predictions=p_output))
accuracy=1-tf.sqrt(loss)
minimization_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
feed_dict = {
X: x_train.reshape(-1,784),
Y: pd.get_dummies(y_train)
}
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for step in range(10000):
J_value = session.run(loss, feed_dict)
acc = session.run(accuracy, feed_dict)
if step % 100 == 0:
print("Step:", step, " Loss:", J_value," Accuracy:", acc)
session.run(minimization_op, feed_dict)
pred00 = session.run([p_output], feed_dict={X: x_test.reshape(-1,784)})
Keras
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from keras.models import Model
l = tf.keras.layers
model = tf.keras.Sequential([
l.Flatten(input_shape=(784,)),
l.Dense(128, activation='relu'),
l.Dense(128, activation='relu'),
l.Dense(10, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
model.summary()
model.fit(x_train.reshape(-1,784),pd.get_dummies(y_train),nb_epoch=15,batch_size=128,verbose=1)
You can take a look to this toy example, but it may be too simple.
I'm classifying digits of the MNIST dataset using a simple feed forward neural net with Keras. So I execute the code below.
import os
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)
# Path to Computation graphs
LOGDIR = './graphs_3'
# start session
sess = tf.Session()
#Hyperparameters
LEARNING_RATE = 0.01
BATCH_SIZE = 1000
EPOCHS = 10
# Layers
HL_1 = 1000
HL_2 = 500
# Other Parameters
INPUT_SIZE = 28*28
N_CLASSES = 10
model = Sequential
model.add(Dense(HL_1, input_dim=(INPUT_SIZE,), activation="relu"))
#model.add(Activation(activation="relu"))
model.add(Dense(HL_2, activation="relu"))
#model.add(Activation("relu"))
model.add(Dropout(rate=0.9))
model.add(Dense(N_CLASSES, activation="softmax"))
model.compile(
optimizer="Adam",
loss="categorical_crossentropy",
metrics=['accuracy'])
# one_hot_labels = keras.utils.to_categorical(labels, num_classes=10)
model.fit(
x=mnist.train.images,
y=mnist.train.labels,
epochs=EPOCHS,
batch_size=BATCH_SIZE)
score = model.evaluate(
x=mnist.test.images,
y=mnist.test.labels)
print("score = ", score)
However, I get the following error:
model.add(Dense(1000, input_dim=(INPUT_SIZE,), activation="relu"))
TypeError: add() missing 1 required positional argument: 'layer'
The syntax is exactly as shown in the keras docs. I am using keras 2.0.9, so I don't think it's a version control problem. Did I do something wrong?
It seems perfect indeed....
But I noticed you're not creating "an instance" of a sequential model, your using the class name instead:
#yours: model = Sequential
#correct:
model = Sequential()
Since the methods in a class are always declared containing self as the first argument, calling the methods without an instance will probably require the instance as the first argument (which is self).
The method's definition is def add(self,layer,...):