I went over
a basic example of tf2.0
containing very simple code
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import tensorflow as tf
import cProfile
# Fetch and format the mnist data
(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data()
dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32),
tf.cast(mnist_labels,tf.int64)))
dataset = dataset.shuffle(1000).batch(32)
# Build the model
mnist_model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16,[3,3], activation='relu',
input_shape=(None, None, 1)),
tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(10)
])
for images,labels in dataset.take(1):
print("Logits: ", mnist_model(images[0:1]).numpy())
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_history = []
def train_step(model, images, labels):
with tf.GradientTape() as tape:
logits = model(images, training=True)
# Add asserts to check the shape of the output.
tf.debugging.assert_equal(logits.shape, (32, 10))
loss_value = loss_object(labels, logits)
loss_history.append(loss_value.numpy().mean())
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
def train(epochs):
for epoch in range(epochs):
for (batch, (images, labels)) in enumerate(dataset):
train_step(mnist_model, images, labels)
print ('Epoch {} finished'.format(epoch))
I trained it and save trainable_variables before and after by the following
t0=mnist_model.trainable_variables
train(epochs = 3)
t1=mnist_model.trainable_variables
diff = tf.reduce_mean(tf.abs(t0[0] - t1[0]))
# whethere indexing [0] or [1] etc. gets the same outcome of diff
print(diff.numpy())
They are the same!!!
So am I checking somethere incorrect? If that is the case, how can I observe those updated variables correctly?
You aren't creating new arrays of variables, just 2 pointers on the same object
Try to do so
t0 = np.array(mnist_model.trainable_variables)
Related
I'm working on a CNN classification problem. I used keras and a pre-trained model. Now I want to evaluate my model and need the precision, recall and f1-Score. When I use sklearn.metrics classification_report I get above error. I know where the numbers are coming from, first is the length of my test dataset in batches and second are the number of actual sampels (predictions) in there. However I don't know how to "convert" them.
See my code down below:
# load train_ds
train_ds = tf.keras.utils.image_dataset_from_directory(
directory ='/gdrive/My Drive/Flies_dt/224x224',
image_size = (224, 224),
validation_split = 0.40,
subset = "training",
seed = 123,
shuffle = True)
# load val_ds
val_ds = tf.keras.utils.image_dataset_from_directory(
directory ='/gdrive/My Drive/Flies_dt/224x224',
image_size = (224, 224),
validation_split = 0.40,
subset = "validation",
seed = 123,
shuffle = True)
# move some batches of val_ds to test_ds
test_ds = val_ds.take((1*len(val_ds)) // 2)
print('test_ds =', len(test_ds))
val_ds = val_ds.skip((1*len(val_ds)) // 2)
print('val_ds =', len(val_ds)) #test_ds = 18 val_ds = 18
# Load Model
base_model = keras.applications.vgg19.VGG19(
include_top=False,
weights='imagenet',
input_shape=(224,224,3)
)
# Freeze base_model
base_model.trainable = False
#
inputs = keras.Input(shape=(224,224,3))
x = data_augmentation(inputs) #apply data augmentation
# Preprocessing
x = tf.keras.applications.vgg19.preprocess_input(x)
# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x) # Regularize with dropout
outputs = keras.layers.Dense(5, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(
loss="sparse_categorical_crossentropy",
optimizer="Adam",
metrics=['acc']
)
model.fit(train_ds, epochs=8, validation_data=val_ds, callbacks=[tensorboard_callback])
# Unfreeze the base_model. Note that it keeps running in inference mode
# since we passed `training=False` when calling it. This means that
# the batchnorm layers will not update their batch statistics.
# This prevents the batchnorm layers from undoing all the training
# we've done so far.
base_model.trainable = True
model.summary()
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.000001), # Low learning rate
loss="sparse_categorical_crossentropy",
metrics=['acc']
)
model.fit(train_ds, epochs=5, validation_data=val_ds)
#Evaluate
from sklearn.metrics import classification_report
y_pred = model.predict(test_ds, batch_size=64, verbose=1)
y_pred_bool = np.argmax(y_pred, axis=1)
print(classification_report(test_ds, y_pred_bool))
I also tried something like this, but I'm not sure if this gives me the correct values for multiclass classification.
from keras import backend as K
def recall_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def f1_m(y_true, y_pred):
precision = precision_m(y_true, y_pred)
recall = recall_m(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc',f1_m,precision_m, recall_m])
# fit the model
history = model.fit(Xtrain, ytrain, validation_split=0.3, epochs=10, verbose=0)
# evaluate the model
loss, accuracy, f1_score, precision, recall = model.evaluate(Xtest, ytest, verbose=0)
This is a lot, Sorry. Hope somebody can help.
I'm trying to use GradientTape mechanism for the first time. I've looked at some examples but I'm getting the "No gradients provided for any variable" error and was wondering how to overcome this?
I want to define some complex loss functions, so I tried using GradientTape to produce its gradient for the CNN training. What was I doing wrong and can I fix it?
Attached is a run-able example code that demonstrates my problem:
# imports
import numpy as np
import tensorflow as tf
import sklearn
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
tf.config.run_functions_eagerly(True)
#my loss function
def my_loss_fn(y_true, y_pred):
` # train SVM classifier
VarC=1E6
VarGamma='scale'
clf = SVC(kernel='rbf', C=VarC, gamma=VarGamma, probability=True )
clf.fit(y_pred, y_true)
y_pred = clf.predict_proba(y_pred)
scce = tf.keras.losses.SparseCategoricalCrossentropy()
return scce(y_true, y_pred)
`
#creating inputs to demontration
X0=0.5*np.ones((12,12))
X0[2:12:4,:]=0
X0[3:12:4,:]=0
X1=0.5*np.ones((12,12))
X1[1:12:4,:]=0
X1[2:12:4,:]=0
X1=np.transpose(X1)
X=np.zeros((2000,12,12))
for i in range(0,1000):
X[i]=X0+np.random.rand(12,12)
for i in range(1000,2000):
X[i]=X1+np.random.rand(12,12)
y=np.zeros(2000, dtype=int)
y[1000:2000]=1
x_train, x_val, y_train, y_val = train_test_split(X, y, train_size=0.5)
x_val, x_test, y_val, y_test = train_test_split(x_val, y_val, train_size=0.5)
x_train = tf.convert_to_tensor(x_train)
x_val = tf.convert_to_tensor(x_val)
x_test = tf.convert_to_tensor(x_test)
y_train = tf.convert_to_tensor(y_train)
y_val = tf.convert_to_tensor(y_val)
y_test = tf.convert_to_tensor(y_test)
inputs = keras.Input((12,12,1), name='images')
x0 = tf.keras.layers.Conv2D(8,4,strides=4)(inputs)
x0 = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), name='pooling')(x0)
outputs = tf.keras.layers.Flatten(name='predictions')(x0)
model = keras.Model(inputs=inputs, outputs=outputs)
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
# Instantiate a loss function.
loss_fn = my_loss_fn
# Prepare the training dataset.
batch_size = 256
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
epochs = 100
for epoch in range(epochs):
print('Start of epoch %d' % (epoch,))
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
# Open a GradientTape to record the operations run
# during the forward pass, which enables autodifferentiation.
with tf.GradientTape() as tape:
tape.watch(model.trainable_weights)
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = loss_fn(y_batch_train, logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
# Log every 200 batches.
if step % 200 == 0:
print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
print('Seen so far: %s samples' % ((step + 1) * 64))
And when running, I get:
ValueError: No gradients provided for any variable: (['conv2d_2/kernel:0', 'conv2d_2/bias:0'],). Provided grads_and_vars is ((None, <tf.Variable 'conv2d_2/kernel:0' shape=(4, 4, 1, 8) dtype=float32, nump
If I use some standard loss function:
For example the following model and loss function
inputs = keras.Input((12,12,1), name='images')
x0 = tf.keras.layers.Conv2D(8,4,strides=4)(inputs)
x0 = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), name='pooling')(x0)
x0 = tf.keras.layers.Flatten(name='features')(x0)
x0 = layers.Dense(16, name='meta_features')(x0)
outputs = layers.Dense(2, name='predictions')(x0)
model = keras.Model(inputs=inputs, outputs=outputs)
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
Everything works fine and converges well.
What am I doing wrong and can I fix it?
When I am using custom loss function with batch gradient descent. I am getting error give in photos below after first epoch.
The code runs fine with binaryCrossEntropy.
I am getting the error below:
optimizer.apply_gradients(zip(grads, model_2.trainable_weights))
No gradients provided for any variable: (['dense_22/kernel:0', 'dense_22/bias:0', 'dense_23/kernel:0', 'dense_23/bias:0', 'dense_24/kernel:0', 'dense_24/bias:0'],).
The code:
# importing necessary libraries and functions
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import InputLayer, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.applications.densenet import DenseNet121, preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import Mean, CategoricalAccuracy
import matplotlib.pyplot as plt
import keras.backend as K
import pandas as pd
import tensorflow_datasets as tfds
from collections import deque
from sklearn.model_selection import train_test_split #train test split
from sklearn.model_selection import StratifiedKFold #Stratifying the data (for test train split)
from sklearn.preprocessing import MinMaxScaler #data normalization with sklearn
import matplotlib.pyplot as plt
import math
lambda_par = tf.Variable(0.5)
def fairnessLoss(y_true,y_pred):
print("HI",y_true,y_pred)
cse_min = cse_maj = tf.Variable(0.0)
n_min = n_maj = tf.Variable(0.0)
print(y_pred.shape[0])
for i in range(y_pred.shape[0]):
print(i)
if(y_true[i][0]==1):
cse_min.assign_add(tf.math.log(y_pred[i][0]))
n_min.assign_add(1.0)
else:
cse_maj.assign_add(tf.math.log(1-y_pred[i][0]))
n_maj.assign_add(1.0)
print("First step")
tem1 = tf.divide(cse_min,n_min)
tem2 = tf.divide(cse_maj,n_maj)
fe = tf.Variable(tem1)
fe.assign_add(-tem2)
fe = tf.math.multiply(fe,fe)
ans = tf.Variable(0.0)
ans.assign_add(cse_min)
ans.assign_add(cse_maj)
ans.assign_add(tf.math.multiply(lambda_par,fe))
return ans
model = tf.keras.Sequential([
tf.keras.layers.Dense(8, activation=tf.keras.activations.sigmoid), # hidden layer 1, ReLU activation
tf.keras.layers.Dense(8, activation=tf.keras.activations.sigmoid),
tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid)
])
batch_size=len(train_X)
train_yy = []
for i in range(len(train_y)):
train_yy.append([train_y[i]])
train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_yy))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# # Prepare the validation dataset.
# val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
# val_dataset = val_dataset.batch(batch_size)
train_acc_metric = keras.metrics.BinaryAccuracy()
val_acc_metric = keras.metrics.BinaryAccuracy()
epochs = 500
# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.Adam()
# Instantiate a loss function.
loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)
# storing variables to plot loss and accuracy
losses = []
accuracy = []
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
epoch_loss_avg = Mean()
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):[
# Open a GradientTape to record the operations run
# during the forward pass, which enables auto-differentiation.
with tf.GradientTape() as tape:
# Run the forward pass of the layer.
# The operations that the layer applies
# to its inputs are going to be recorded
# on the GradientTape.
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = fairnessLoss(y_batch_train, logits)
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)
# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))
epoch_loss_avg.update_state(loss_value)
train_acc_metric.update_state(y_batch_train, logits)
losses.append(epoch_loss_avg.result())
accuracy.append(train_acc_metric.result())
# Log every 200 batches.
if step % 200 == 0:
print(
"Training loss (for one batch) at step %d: %.4f"
% (step, float(loss_value))
)
print("Seen so far: %s samples" % ((step + 1) * batch_size))
print(train_acc_metric.result())
train_acc_metric.reset_states()
Photo of the error-1
Photo of the error-2
Loads and Optimizers are dual parallel in the statistics, accelerate to hear the Optimizers or vary their rates to see the true.
Sample: Gradient Tape when applying value to tf.variables, loss functions is what change or apply, and measurement from you provided logics but optimizers are how you achieved it or setting to goals.
Dataset: Image categories problem, image, and labels for categories.
Index Image Label
1 F:\datasets\downloads\Actors\train\Candidt Kibt\01.tif 0
2 F:\datasets\downloads\Actors\train\Candidt Kibt\02.tif 0
19 F:\datasets\downloads\Actors\train\Pikaploy\01.tif 1
Codes: For test Tape and Gradients only
import os
from os.path import exists
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
num_iter = 1000
train_generator_batch_size = 1
batch_size = 1
WIDTH = 256
HEIGHT = 256
CHANNEL = 3
checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
if not exists(checkpoint_dir) :
os.mkdir(checkpoint_dir)
print("Create directory: " + checkpoint_dir)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Definition / Class
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def create_image_generator( ):
variables = pd.read_excel('F:\\temp\\Python\\excel\\Book 7.xlsx', index_col=None, header=[0], dtype=str)
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
validation_split=0.2,
)
train_image_ds = train_generator.flow_from_dataframe(
dataframe = variables,
directory=None,
x_col= 'Image',
y_col= 'Label',
weight_col=None,
target_size=( WIDTH, HEIGHT ),
color_mode='rgb',
classes=None,
class_mode='categorical', ####
batch_size=train_generator_batch_size,
shuffle=True,
seed=None,
save_to_dir=None,
save_prefix='',
save_format='png',
subset=None,
interpolation='nearest',
validate_filenames=True,
)
return train_image_ds
class gradient_tape_optimizer( ):
def __init__ ( self, model, num_iter, content_iter, batch_size ):
self.num_iter = num_iter
self.content_iter = content_iter
self.style_iter = content_iter
self.batch_size = batch_size
self.model = model
self.loss = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy' )
self.optimizer = tf.keras.optimizers.Nadam( learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Nadam' )
def _compute_mean_std( self, feats : tf.Tensor, eps=1e-8 ):
"""
feats: Features should be in shape N x H x W x C
"""
mean = tf.math.reduce_mean(feats, axis=[1,2], keepdims=True)
std = tf.math.reduce_std(feats, axis=[1,2], keepdims=True) + eps
return mean, std
def criterion( self, stylized_img : tf.Tensor, style_img : tf.Tensor, t : tf.Tensor ):
stylized_content_feats = self.model.encode(stylized_img)
stylized_feats = self.model.encode(stylized_img, return_all=True)
style_feats = self.model.encode(style_img, return_all=True)
content_loss = self.mse_loss(t, stylized_content_feats)
style_loss = 0
for f1, f2 in zip(stylized_feats, style_feats):
m1, s1 = self._compute_mean_std(f1)
m2, s2 = self._compute_mean_std(f2)
style_loss += self.mse_loss(m1, m2) + self.mse_loss(s1, s2)
return content_loss + self.style_weight * style_loss
def train( self ):
step = 0
while step < self.num_iter:
content_batch = self.content_iter.get_next()
if content_batch[0].shape[1] != self.batch_size:
content_batch = self.content_iter.get_next()
style_batch = self.style_iter.get_next()
if style_batch[0].shape[1] != self.batch_size:
style_batch = self.style_iter.get_next()
current_label = tf.constant( content_batch[1], shape=( 2, 1 ) ).numpy()
loss_value = tf.Variable( 10.0 )
with tf.GradientTape() as tape:
result = self.model( inputs=tf.constant( content_batch[0], shape=( 1, WIDTH, HEIGHT, CHANNEL ) ) )
result = tf.constant( result, shape=( 2, 1 ) )
predict_label = tf.Variable( tf.constant( self.model.trainable_weights[len(self.model.trainable_weights) - 1], shape=( 2, 1 ) ) )
loss_value = self.loss( result.numpy(), current_label )
loss_value = tf.Variable( tf.constant( loss_value, shape=( 1, ) ).numpy() )
tape.watch( loss_value )
gradients = tape.gradient( loss_value, loss_value )
self.optimizer.apply_gradients(zip(gradients, self.model.trainable_weights))
# log and save every 200 batches
if step % 200 == 0:
if result[tf.math.argmax(result).numpy()[0]][0] > 0 :
print(f'Training loss (for one batch) at step {step}: {self.loss} value {result[tf.math.argmax(result).numpy()[0]]}')
else :
print(f'Training loss (for one batch) at step {step}: {self.loss} value {result[abs( 1 - tf.math.argmax(result).numpy()[0]) ]}')
print(f'Seen so far: {(step+1)*self.batch_size} samples')
self.model.save_weights(checkpoint_path)
step += 1
print("Finished training...")
self.model.save_weights(checkpoint_path)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Dataset
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
variables = pd.read_excel('F:\\temp\\Python\\excel\\Book 7.xlsx', index_col=None, header=[0], dtype=str)
train_image_ds = tf.data.Dataset.from_generator(
create_image_generator,
output_types=None,
output_shapes=None,
args=None,
output_signature=(
tf.TensorSpec(shape=( 1, WIDTH, HEIGHT, CHANNEL ), dtype=tf.float32, name=None), tf.TensorSpec(shape=(1, 2), dtype=tf.float32, name=None),
),
name='train_image_ds'
)
train_image_ds = train_image_ds.batch( 1 )
iterator = iter( train_image_ds )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( WIDTH, HEIGHT, CHANNEL )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 127 * 127)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(2),
])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=0.0000001,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=False,
reduction=tf.keras.losses.Reduction.AUTO,
name='sparse_categorical_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
gradient_tape_optimizer = gradient_tape_optimizer( model, num_iter, iterator, batch_size )
result = gradient_tape_optimizer.train()
input( '...' )
Result: Loss is change less that is because custom Optimizers is simply algorithms.
2022-10-15 14:23:57.141863: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
Training loss (for one batch) at step 0: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.06285592]
Seen so far: 1 samples
Training loss (for one batch) at step 200: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.05492945]
Seen so far: 201 samples
Training loss (for one batch) at step 400: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.05577546]
Seen so far: 401 samples
Training loss (for one batch) at step 600: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.06180618]
Seen so far: 601 samples
Training loss (for one batch) at step 800: <keras.losses.SparseCategoricalCrossentropy object at 0x00000238B5054550> value [0.05990243]
Seen so far: 801 samples
Finished training...
...
I use CIFAR10 dataset to learn how to code using Keras and PyTorch.
The environment is Python 3.6.7, Torch 1.0.0, Keras 2.2.4, Tensorflow 1.14.0.
I use the same batch size, number of epochs, learning rate and optimizer.
I use DenseNet121 as the model.
After training, Keras get 69% accuracy in test data.
PyTorch just get 54% in test data.
I know the results are different, but why is the result so bad in PyTorch?
Here is the Keras code:
import os, keras
from keras.datasets import cifar10
from keras.applications.densenet import DenseNet121
batch_size = 32
num_classes = 10
epochs = 20
# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
# model
model = DenseNet121(include_top=True, weights=None, input_shape=(32,32,3), classes=10)
# initiate RMSprop optimizer
opt = keras.optimizers.SGD(lr=0.001, momentum=0.9)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(x_test, y_test),
shuffle=True)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])
Here is the Pytorch code:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import flatten
import torch.optim as optim
from torchvision import transforms, models
from torch.nn import Linear, Softmax, Module, Sequential, CrossEntropyLoss
import numpy as np
from tqdm import tqdm
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.CIFAR10(root='./DataSet', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR10(root='./DataSet', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)
import torch.nn as nn
import torch.nn.functional as F
class Net(Module):
def __init__(self):
super(Net, self).__init__()
self.funFeatExtra = Sequential(*[i for i in list(models.densenet121().children())[:-1]])
self.funFlatten = flatten
self.funOutputLayer = Linear(1024, 10)
self.funSoftmax = Softmax(dim=1)
def forward(self, x):
x = self.funFeatExtra(x)
x = self.funFlatten(x, 1)
x = self.funOutputLayer(x)
x = self.funSoftmax(x)
return x
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(20): # loop over the dataset multiple times
running_loss = 0.0
for i, data in tqdm(enumerate(trainloader, 0)):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net.cuda()(inputs.cuda())
loss = criterion(outputs, labels.cuda())
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
# if i % 2000 == 1999: # print every 2000 mini-batches
# print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
# running_loss = 0.0
print('Finished Training')
########################################################################
# The results seem pretty good.
#
# Let us look at how the network performs on the whole dataset.
correct = 0
total = 0
with torch.no_grad():
for data in tqdm(testloader):
images, labels = data
outputs = net.cpu()(images.cpu())
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
You are not supposed to softmax the model output before you pass it to CrossEntropyLoss. Per the documentation:
This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.
...
The input is expected to contain raw, unnormalized scores for each class.
You can softmax them separately (outside of forward()) when calculating accuracy.
This problem only happens in Pycharm:
I made a very simple NN based on TF2.0 website tutorial. The weird thing about it is when I change batch_size, it keeps going with the old one as if I did nothing. In fact, everything I do is irrelevant.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
class Prototype(tf.keras.models.Model):
def __init__(self, **kwargs):
super(Prototype, self).__init__(**kwargs)
self.l1 = layers.Dense(64, activation='relu', name='dense_1')
self.l2 = layers.Dense(64, activation='relu', name='dense_2')
self.l3 = layers.Dense(10, activation='softmax', name='predictions')
def call(self, ip):
x = self.l1(ip)
x = self.l2(x)
return self.l3(x)
model = Prototype()
model.build(input_shape=(None, 784,))
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
loss_fn = keras.losses.SparseCategoricalCrossentropy()
batch_size = 250
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
def train_one_epoch():
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
print(x_batch_train.shape)
with tf.GradientTape() as tape:
logits = model(x_batch_train) # Logits for this minibatch
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
I run the train_one_epoch(), it trains for one epoch. Then I change batch size and consequently dataset object to give new chunk sizes, BUT when I run train_one_epoch() again, it keeps going with the old batch_size.
Proof: