CNN Training in Keras freezes - tensorflow

I am training a CNN model in Keras (Tensorflow backend). I have used on the fly augmentation with fit_generator(). The model takes images aa input and is supposed to predict the steering angle for a self driving car. The training just freezes after this point. I have tried changing the batch size, learning rate etc, but it doesn't work.
The training freezes at the end of first epoch.
Please help!
[BATCH_SIZE=32
INPUT_IMAGE_ROWS=160
INPUT_IMAGE_COLS=320
INPUT_IMAGE_CHANNELS=3
AUGMENTATION_NUM_BINS=200
NUM_EPOCHS=3
AUGMENTATION_BIN_MAX_PERC=0.5
AUGMENTATION_FACTOR=3
import csv
import cv2
import numpy as np
from random import shuffle
from sklearn.model_selection import train_test_split
import keras
from keras.callbacks import Callback
import math
from keras.preprocessing.image import *
print("\nLoading the dataset from file ...")
def load_dataset(file_path):
dataset = \[\]
with open(file_path) as csvfile:
reader = csv.reader(csvfile)
for line in reader:
try:
dataset.append({'center':line\[0\], 'left':line\[1\], 'right':line\[2\], 'steering':float(line\[3\]),
'throttle':float(line\[4\]), 'brake':float(line\[5\]), 'speed':float(line\[6\])})
except:
continue # some images throw error during loading
return dataset
dataset = load_dataset('C:\\Users\\kiit1\\Documents\\steering angle prediction\\dataset_coldivision\\data\\driving_log.csv')
print("Loaded {} samples from file {}".format(len(dataset),'C:\\Users\\kiit1\\Documents\\steering angle prediction\\dataset_coldivision\\data\\driving_log.csv'))
print("Partioning the dataset:")
shuffle(dataset)
#partitioning data into 80% training, 19% validation and 1% testing
X_train,X_validation=train_test_split(dataset,test_size=0.2)
X_validation,X_test=train_test_split(X_validation,test_size=0.05)
print("X_train has {} elements.".format(len(X_train)))
print("X_validation has {} elements.".format(len(X_validation)))
print("X_test has {} elements.".format(len(X_test)))
print("Partitioning the dataset complete.")
def generate_batch_data(dataset, batch_size = 32):
global augmented_steering_angles
global epoch_steering_count
global epoch_bin_hits
batch_images = np.zeros((batch_size, INPUT_IMAGE_ROWS, INPUT_IMAGE_COLS, INPUT_IMAGE_CHANNELS))
batch_steering_angles = np.zeros(batch_size)
while 1:
for batch_index in range(batch_size):
# select a random image from the dataset
image_index = np.random.randint(len(dataset))
image_data = dataset\[image_index\]
while 1:
try:
image, steering_angle = load_and_augment_image(image_data)
except:
continue
bin_idx = int (steering_angle * AUGMENTATION_NUM_BINS / 2)
if( epoch_bin_hits\[bin_idx\] < epoch_steering_count/AUGMENTATION_NUM_BINS*AUGMENTATION_BIN_MAX_PERC
or epoch_steering_count<500 ):
batch_images\[batch_index\] = image
batch_steering_angles\[batch_index\] = steering_angle
augmented_steering_angles.append(steering_angle)
epoch_bin_hits\[bin_idx\] = epoch_bin_hits\[bin_idx\] + 1
epoch_steering_count = epoch_steering_count + 1
break
yield batch_images, batch_steering_angles
print("\nTraining the model ...")
class LifecycleCallback(keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs={}):
pass
def on_epoch_end(self, epoch, logs={}):
global epoch_steering_count
global epoch_bin_hits
global bin_range
epoch_steering_count = 0
epoch_bin_hits = {k:0 for k in range(-bin_range, bin_range)}
def on_batch_begin(self, batch, logs={}):
pass
def on_batch_end(self, batch, logs={}):
self.losses.append(logs.get('loss'))
def on_train_begin(self, logs={}):
print('Beginning training')
self.losses = \[\]
def on_train_end(self, logs={}):
print('Ending training')
# Compute the correct number of samples per epoch based on batch size
def compute_samples_per_epoch(array_size, batch_size):
num_batches = array_size / batch_size
samples_per_epoch = math.ceil(num_batches)
samples_per_epoch = samples_per_epoch * batch_size
return samples_per_epoch
def load_and_augment_image(image_data, side_camera_offset=0.2):
# select a value between 0 and 2 to swith between center, left and right image
index = np.random.randint(3)
if (index==0):
image_file = image_data\['left'\].strip()
angle_offset = side_camera_offset
elif (index==1):
image_file = image_data\['center'\].strip()
angle_offset = 0.
elif (index==2):
image_file = image_data\['right'\].strip()
angle_offset = - side_camera_offset
steering_angle = image_data\['steering'\] + angle_offset
image = cv2.imread(image_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# apply a misture of several augumentation methods
image, steering_angle = random_transform(image, steering_angle)
return image, steering_angle
augmented_steering_angles = \[\]
epoch_steering_count = 0
bin_range = int(AUGMENTATION_NUM_BINS / 4 * 3)
epoch_bin_hits = {k:0 for k in range(-bin_range, bin_range)}
#flips image about y-axis
def horizontal_flip(image,steering_angle):
flipped_image=cv2.flip(image,1);
steering_angle=-steering_angle
return flipped_image,steering_angle
def translate(image,steering_angle,width_shift_range=50.0,height_shift_range=5.0):
tx = width_shift_range * np.random.uniform() - width_shift_range / 2
ty = height_shift_range * np.random.uniform() - height_shift_range / 2
# new steering angle
steering_angle += tx / width_shift_range * 2 * 0.2
transformed_matrix=np.float32(\[\[1,0,tx\],\[0,1,ty\]\])
rows,cols=(image.shape\[0\],image.shape\[1\])
translated_image=cv2.warpAffine(image,transformed_matrix,(cols,rows))
return translated_image,steering_angle
def brightness(image,bright_increase=None):
if(image.shape\[2\]>1):
image_hsv=cv2.cvtColor(image,cv2.COLOR_RGB2HSV)
else:
image_hsv=image
if bright_increase:
image_hsv\[:,:,2\] += bright_increase
else:
bright_increase = int(30 * np.random.uniform(-0.3,1))
image_hsv\[:,:,2\] = image\[:,:,2\] + bright_increase
image = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2RGB)
return image
def rotation(image,rotation_range=5):
image=random_rotation(image,rotation_range);
return image
# Shift range for each channels
def channel_shift(image, intensity=30, channel_axis=2):
image = random_channel_shift(image, intensity, channel_axis)
return image
# Crop and resize the image
def crop_resize_image(image, cols=INPUT_IMAGE_COLS, rows=INPUT_IMAGE_ROWS, top_crop_perc=0.1, bottom_crop_perc=0.2):
height = image.shape\[0\]
width= image.shape\[1\]
# crop top and bottom
top_rows = int(height*top_crop_perc)
bottom_rows = int(height*bottom_crop_perc)
image = image\[top_rows:height-bottom_rows, 0:width\]
# resize to the final sizes even the aspect ratio is destroyed
image = cv2.resize(image, (cols, rows), interpolation=cv2.INTER_LINEAR)
return image
# Apply a sequence of random tranformations for a better generalization and to prevent overfitting
def random_transform(image, steering_angle):
# all further transformations are done on the smaller image to reduce the processing time
image = crop_resize_image(image)
# every second image is flipped horizontally
if np.random.random() < 0.5:
image, steering_angle = horizontal_flip(image, steering_angle)
image, steering_angle = translate(image, steering_angle)
image = rotation(image)
image = brightness(image)
image = channel_shift(image)
return img_to_array(image), steering_angle
from keras.models import Sequential, Model
from keras.layers.core import Lambda, Dense, Activation, Flatten, Dropout
from keras.layers.convolutional import Cropping2D, Convolution2D
from keras.layers.advanced_activations import ELU
from keras.layers.noise import GaussianNoise
from keras.optimizers import Adam
print("\nBuilding and compiling the model ...")
model = Sequential()
model.add(Lambda(lambda x: (x / 127.5) - 1.0, input_shape=(INPUT_IMAGE_ROWS, INPUT_IMAGE_COLS, INPUT_IMAGE_CHANNELS)))
# Conv Layer1 of 16 filters having size(8, 8) with strides (4,4)
model.add(Convolution2D(16, 8, 8, subsample=(4, 4), border_mode="same"))
model.add(ELU())
# Conv Layer1 of 32 filters having size(5, 5) with strides (2,2)
model.add(Convolution2D(32, 5, 5, subsample=(2, 2), border_mode="same"))
model.add(ELU())
# Conv Layer1 of 64 filters having size(5, 5) with strides (2,2)
model.add(Convolution2D(64, 5, 5, subsample=(2, 2), border_mode="same"))
model.add(Flatten())
model.add(Dropout(.5))
model.add(ELU())
model.add(Dense(512))
model.add(Dropout(.5))
model.add(ELU())
model.add(Dense(1))
model.summary()
adam = Adam(lr=0.0001)
model.compile(loss='mse', optimizer=adam)
lifecycle_callback = LifecycleCallback()
train_generator = generate_batch_data(X_train, BATCH_SIZE)
validation_generator = generate_batch_data(X_validation, BATCH_SIZE)
samples_per_epoch = compute_samples_per_epoch((len(X_train)*AUGMENTATION_FACTOR), BATCH_SIZE)
nb_val_samples = compute_samples_per_epoch((len(X_validation)*AUGMENTATION_FACTOR), BATCH_SIZE)
history = model.fit_generator(train_generator,
validation_data = validation_generator,
samples_per_epoch = ((len(X_train) // BATCH_SIZE ) * BATCH_SIZE) * 2,
nb_val_samples = ((len(X_validation) // BATCH_SIZE ) * BATCH_SIZE) * 2,
nb_epoch = NUM_EPOCHS, verbose=1,
)
print("\nTraining the model ended.")][1]

You have a weird structure for the data generator and that is most likely causing this issue, though I cannot be completely sure.
You structure is as follows:
while 1:
....
for _ in range(batch_size):
randomly select an image # this is inefficient, see below for comments
while 1:
process image
if epoch is not done:
collect images in a list
break
yield ...
Now,
Do not choose images randomly at each iteration. Instead shuffle your dataset once at the starting of each epoch and then choose sequentially.
As far as I understood, if epoch is not done, then break is a typo. Did you mean if epoch is not done then collect images, otherwise break? Your break is inside the if which means when it enters if for the first time, it will come out of the innermost while 1 loop. Surely not what you intend to do, right?
The yield is outside the for loop. You should yield each batch, so if for is iterating over batches, then yield should be inside for.
The structure of a basic data generator should be:
while 1:
shuffle entire dataset once # not applicable for massive datasets
for _ in range(n_batches_per_epoch):
get a data batch
Optionally, do some preprocessing # preferably on the entire batch,
not one by one, you could also preprocess the entire dataset if its simple
enough, such as mean subtraction.
yield batches, labels
I would suggest you to again write the data generator. You could see the myGenerator() function on this page for a basic data generator. Once you write the generator, then test it as a stand-alone function to make sure it outputs the data indefinitely and keeps the track of epochs.

In short, it is hard to say which part is problematic, maybe data, maybe a model, or something else. So please be patient, and you will resolve the issue eventually.
First of all, you can train a baseLine model without data augmentation. If your data augmentation is helpful, you shall expect performance improvement after applying data augmentation to the new augmLine model.
If baseLine behaves similarly to augmLine, you may consider changing your network design. For example, in your current design, 1) Conv2D layers without any activation are very rare, and you may want to use relu or tanh, and 2) ELU(alpha) is known to be sensitive to the alpha value.
If baseLine actually works fine, this is an indicator that your augmLine's data is problematic. To ensure the correctness of the augmented data, you'd better plot both image data and target values and manually verify them. One common mistake for image data augmentation is that if the target values depend on the input image, then you have to generate new target values according to the augmented image. Sometimes this task is not trivial.
Note, to have a fair comparison, you need to keep validation data unchanged for both experiments.

Related

How to re-write tensorflow code to make model training faster?

QUESTION: My training is super slow. How do I rewrite my code to make my deep learning model training faster?
BACKGROUND: I have built a CNN with TensorFlow 2.8.1 to classify CIFAR-100 images using a custom loss function. The CIFAR dataset includes 32x32-pixel RGB images of 100 fine classes (e.g., bear, car) categorized into 20 coarse classes (e.g., large omnivore, vehicle). My custom loss function is a weighted sum of two other loss functions (see code below). The first component is the crossentropy loss for the fine label. The second component is the crossentropy loss for the coarse label. My hope is that this custom loss function will enforce accurate classification of the coarse label to get a more accurate classifications of the fine label (fingers crossed). The comparator will be crossentropy loss of just the fine label (the baseline model). Note that to derive the coarse (hierarchical) loss component, I had to map the y_true (true fine label, integer) and y_pred (predicted softmax probabilities for the fine labels, vector) to the y_true_coarse_int (true coarse label, integer) and y_pred_coarse_hot (predicted coarse label, one hot encoded vector), respectively. FineInts_to_CoarseInts is a python dictionary that allows this mapping.
The training takes >5-hours to run with the custom loss function, whereas training with regular crossentropy loss for the fine classes takes ~1hr. Code was run on a high performance computing cluster with a 32GB CPU and 1 GPU.
See below:
# THIS CODE CELL IS TO DEFINE A CUSTOM LOSS FUNCTION
def crossentropy_loss(y_true, y_pred):
return SparseCategoricalCrossentropy()(y_true, y_pred)
def hierarchical_loss(y_true, y_pred):
y_true = tensorflow.cast(y_true, dtype=float)
y_true_reshaped = tensorflow.reshape(y_true, -1)
y_true_coarse_int = [FineInts_to_CoarseInts[K.eval(y_true_reshaped[i])] for i in range(y_true_reshaped.shape[0])]
y_true_coarse_int = tensorflow.cast(y_true_coarse_int, dtype=tensorflow.float32)
y_pred = tensorflow.cast(y_pred, dtype=float)
y_pred_int = tensorflow.argmax(y_pred, axis=1)
y_pred_coarse_int = [FineInts_to_CoarseInts[K.eval(y_pred_int[i])] for i in range(y_pred_int.shape[0])]
y_pred_coarse_int = tensorflow.cast(y_pred_coarse_int, dtype=tensorflow.float32)
y_pred_coarse_hot = to_categorical(y_pred_coarse_int, 20)
return SparseCategoricalCrossentropy()(y_true_coarse_int, y_pred_coarse_hot)
def custom_loss(y_true, y_pred):
H = 0.5
total_loss = (1 - H) * crossentropy_loss(y_true, y_pred) + H * hierarchical_loss(y_true, y_pred)
return total_loss
During model compilation I had to set the run_eagerly parameter to True. See below:
# THIS CODE CELL IS TO COMPILE THE MODEL
model.compile(optimizer="adam", loss=custom_loss, metrics="accuracy", run_eagerly=True)
The full code is below:
# THIS CODE CELL LOADS THE PACKAGES USED IN THIS NOTEBOOK
# Load core packages for data analysis and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import sys
!{sys.executable} -m pip install pydot
!{sys.executable} -m pip install graphviz
# Load deep learning packages
import tensorflow
from tensorflow.keras.datasets.cifar100 import load_data
from tensorflow.keras import (Model, layers)
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import tensorflow.keras.backend as K
from tensorflow.keras.utils import (to_categorical, plot_model)
from tensorflow.lookup import (StaticHashTable, KeyValueTensorInitializer)
# Load model evaluation packages
import sklearn
from sklearn.metrics import (confusion_matrix, classification_report)
# Print versions of main ML packages
print("Tensorflow version " + tensorflow.__version__)
print("Scikit learn version " + sklearn.__version__)
# THIS CODE CELL LOADS DATASETS AND CHECKS DATA DIMENSIONS
# There is an option to load the "fine" (100 fine classes) or "coarse" (20 super classes) labels with integer (int) encodings
# We will load both labels for hierarchical classification tasks
(x_train, y_train_fine_int), (x_test, y_test_fine_int) = load_data(label_mode="fine")
(_, y_train_coarse_int), (_, y_test_coarse_int) = load_data(label_mode="coarse")
# EXTRACT DATASET PARAMETERS FOR USE LATER ON
num_fine_classes = 100
num_coarse_classes = 20
input_shape = x_train.shape[1:]
# THIS CODE CELL PROVIDES THE CODE TO LINK INTEGER LABELS TO MEANINGFUL WORD LABELS
# Fine and coarse labels are provided as integers. We will want to link them both to meaningful world labels.
# CREATE A DICTIONARY TO MAP THE 20 COARSE LABELS TO THE 100 FINE LABELS
# This mapping comes from https://keras.io/api/datasets/cifar100/
# Except "computer keyboard" should just be "keyboard" for the encoding to work
CoarseLabels_to_FineLabels = {
"aquatic mammals": ["beaver", "dolphin", "otter", "seal", "whale"],
"fish": ["aquarium fish", "flatfish", "ray", "shark", "trout"],
"flowers": ["orchids", "poppies", "roses", "sunflowers", "tulips"],
"food containers": ["bottles", "bowls", "cans", "cups", "plates"],
"fruit and vegetables": ["apples", "mushrooms", "oranges", "pears", "sweet peppers"],
"household electrical devices": ["clock", "keyboard", "lamp", "telephone", "television"],
"household furniture": ["bed", "chair", "couch", "table", "wardrobe"],
"insects": ["bee", "beetle", "butterfly", "caterpillar", "cockroach"],
"large carnivores": ["bear", "leopard", "lion", "tiger", "wolf"],
"large man-made outdoor things": ["bridge", "castle", "house", "road", "skyscraper"],
"large natural outdoor scenes": ["cloud", "forest", "mountain", "plain", "sea"],
"large omnivores and herbivores": ["camel", "cattle", "chimpanzee", "elephant", "kangaroo"],
"medium-sized mammals": ["fox", "porcupine", "possum", "raccoon", "skunk"],
"non-insect invertebrates": ["crab", "lobster", "snail", "spider", "worm"],
"people": ["baby", "boy", "girl", "man", "woman"],
"reptiles": ["crocodile", "dinosaur", "lizard", "snake", "turtle"],
"small mammals": ["hamster", "mouse", "rabbit", "shrew", "squirrel"],
"trees": ["maple", "oak", "palm", "pine", "willow"],
"vehicles 1": ["bicycle", "bus", "motorcycle", "pickup" "truck", "train"],
"vehicles 2": ["lawn-mower", "rocket", "streetcar", "tank", "tractor"]
}
# CREATE A DICTIONARY TO MAP THE INTEGER-ENCODED COARSE LABEL TO THE WORD LABEL
# Create list of Course Labels
CoarseLabels = list(CoarseLabels_to_FineLabels.keys())
# The target variable in CIFER100 is encoded such that the coarse class is assigned an integer based on its alphabetical order
# The CoarseLabels list is already alphabetized, so no need to sort
CoarseInts_to_CoarseLabels = dict(enumerate(CoarseLabels))
# CREATE A DICTIONARY TO MAP THE WORD LABEL TO THE INTEGER-ENCODED COARSE LABEL
CoarseLabels_to_CoarseInts = dict(zip(CoarseLabels, range(20)))
# CREATE A DICTIONARY TO MAP THE 100 FINE LABELS TO THE 20 COARSE LABELS
FineLabels_to_CoarseLabels = {}
for CoarseLabel in CoarseLabels:
for FineLabel in CoarseLabels_to_FineLabels[CoarseLabel]:
FineLabels_to_CoarseLabels[FineLabel] = CoarseLabel
# CREATE A DICTIONARY TO MAP THE INTEGER-ENCODED FINE LABEL TO THE WORD LABEL
# Create a list of the Fine Labels
FineLabels = list(FineLabels_to_CoarseLabels.keys())
# The target variable in CIFER100 is encoded such that the fine class is assigned an integer based on its alphabetical order
# Sort the fine class list.
FineLabels.sort()
FineInts_to_FineLabels = dict(enumerate(FineLabels))
# CREATE A DICTIONARY TO MAP THE INTEGER-ENCODED FINE LABELS TO THE INTEGER-ENCODED COARSE LABELS
b = list(dict(sorted(FineLabels_to_CoarseLabels.items())).values())
FineInts_to_CoarseInts = dict(zip(range(100), [CoarseLabels_to_CoarseInts[i] for i in b]))
#Tensor version of dictionary
#fine_to_coarse = tensorflow.constant(list((FineInts_to_CoarseInts).items()), dtype=tensorflow.int8)
# THIS CODE CELL IS TO BUILD A FUNCTIONAL MODEL
inputs = layers.Input(shape=input_shape)
x = layers.BatchNormalization()(inputs)
x = layers.Conv2D(64, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.Conv2D(256, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.Conv2D(256, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.Conv2D(1024, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.30)(x)
x = layers.Dense(512, activation = "relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.30)(x)
output_fine = layers.Dense(num_fine_classes, activation="softmax", name="output_fine")(x)
model = Model(inputs=inputs, outputs=output_fine)
# THIS CODE CELL IS TO DEFINE A CUSTOM LOSS FUNCTION
def crossentropy_loss(y_true, y_pred):
return SparseCategoricalCrossentropy()(y_true, y_pred)
def hierarchical_loss(y_true, y_pred):
y_true = tensorflow.cast(y_true, dtype=float)
y_true_reshaped = tensorflow.reshape(y_true, -1)
y_true_coarse_int = [FineInts_to_CoarseInts[K.eval(y_true_reshaped[i])] for i in range(y_true_reshaped.shape[0])]
y_true_coarse_int = tensorflow.cast(y_true_coarse_int, dtype=tensorflow.float32)
y_pred = tensorflow.cast(y_pred, dtype=float)
y_pred_int = tensorflow.argmax(y_pred, axis=1)
y_pred_coarse_int = [FineInts_to_CoarseInts[K.eval(y_pred_int[i])] for i in range(y_pred_int.shape[0])]
y_pred_coarse_int = tensorflow.cast(y_pred_coarse_int, dtype=tensorflow.float32)
y_pred_coarse_hot = to_categorical(y_pred_coarse_int, 20)
return SparseCategoricalCrossentropy()(y_true_coarse_int, y_pred_coarse_hot)
def custom_loss(y_true, y_pred):
H = 0.5
total_loss = (1 - H) * crossentropy_loss(y_true, y_pred) + H * hierarchical_loss(y_true, y_pred)
return total_loss
# THIS CODE CELL IS TO COMPILE THE MODEL
model.compile(optimizer="adam", loss=crossentropy_loss, metrics="accuracy", run_eagerly=False)
# THIS CODE CELL IS TO TRAIN THE MODEL
history = model.fit(x_train, y_train_fine_int, epochs=200, validation_split=0.25, batch_size=100)
# THIS CODE CELL IS TO VISUALIZE THE TRAINING
history_frame = pd.DataFrame(history.history)
history_frame.to_csv("history.csv")
history_frame.loc[:, ["accuracy", "val_accuracy"]].plot()
history_frame.loc[:, ["loss", "val_loss"]].plot()
plt.show()
# THIS CODE CELL IS TO EVALUATE THE MODEL ON AN INDEPENDENT DATASET
score = model.evaluate(x_test, y_test_fine_int, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])
Quantization
Quantization is the technique that converts your number type float32 to int8. It means your model size will be lesser.
There are two types of quantization before training and after training.
Try to apply quantization before training and let me know the results.
Refer to this video for Quantization

Unexpected input data type. Actual: (tensor(double)) , expected: (tensor(float))

I am learning this new ONNX framework that allows us to deploy the deep learning (and others) model into production.
However, there is one thing I am missing. I thought that the main reason for having such a framework is so that for inference purposes e.g. when we have a trained model and want to use it in a different venv (where for example we cannot have PyTorch) the model still can be used.
I have preped a "from scratch" example here:
# Modules
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torchvision
import onnx
import onnxruntime
import matplotlib.pyplot as plt
import numpy as np
# %config Completer.use_jedi = False
# MNIST Example dataset
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST(
'data', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])),
batch_size=800)
# Take data and labels "by hand"
inputs_batch, labels_batch = next(iter(train_loader))
# Simple Model
class CNN(nn.Module):
def __init__(self, in_channels, num_classes):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channels,
out_channels = 10, kernel_size = (3, 3), stride = (1, 1), padding=(1, 1))
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride = (2, 2))
self.conv2 = nn.Conv2d(in_channels = 10, out_channels=16, kernel_size = (3, 3), stride = (1, 1), padding=(1, 1))
self.fc1 = nn.Linear(16*7*7, num_classes)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.reshape(x.shape[0], -1)
x = self.fc1(x)
return x
# Training setting
device = 'cpu'
batch_size = 64
learning_rate = 0.001
n_epochs = 10
# Dataset prep
dataset = TensorDataset(inputs_batch, labels_batch)
TRAIN_DF = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True)
# Model Init
model = CNN(in_channels=1, num_classes=10)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
# Training Loop
for epoch in range(n_epochs):
for data, labels in TRAIN_DF:
model.train()
# Send Data to GPU
data = data.to(device)
# Send Data to GPU
labels = labels.to(device)
# data = data.reshape(data.shape[0], -1)
# Forward
pred = model(data)
loss = F.cross_entropy(pred, labels)
# Backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Check Accuracy
def check_accuracy(loader, model):
num_correct = 0
num_total = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device)
y = y.to(device)
# x = x.reshape(x.shape[0], -1)
scores = model(x)
_, pred = scores.max(1)
num_correct += (pred == y).sum()
num_total += pred.size(0)
print(F"Got {num_correct} / {num_total} with accuracy {float(num_correct)/float(num_total)*100: .2f}")
check_accuracy(TRAIN_DF, model)
# Inference with ONNX
# Create Artifical data of the same size
img_size = 28
dummy_data = torch.randn(1, img_size, img_size)
dummy_input = torch.autograd.Variable(dummy_data).unsqueeze(0)
input_name = "input"
output_name = "output"
model_eval = model.eval()
torch.onnx.export(
model_eval,
dummy_input,
"model_CNN.onnx",
input_names=["input"],
output_names=["output"],
)
# Take Random Image from Training Data
X_pred = data[4].unsqueeze(0)
# Convert the Tensor image to PURE numpy and pretend we are working in venv where we only have numpy - NO PYTORCH
X_pred_np = X_pred.numpy()
X_pred_np = np.array(X_pred_np)
IMG_Rando = np.random.rand(1, 1, 28, 28)
np.shape(X_pred_np) == np.shape(IMG_Rando)
ort_session = onnxruntime.InferenceSession(
"model_CNN.onnx"
)
def to_numpy(tensor):
return (
tensor.detach().gpu().numpy()
if tensor.requires_grad
else tensor.cpu().numpy()
)
# compute ONNX Runtime output prediction
# WORKS
# ort_inputs = {ort_session.get_inputs()[0].name: X_pred_np}
# DOES NOT WORK
ort_inputs = {ort_session.get_inputs()[0].name: IMG_Rando}
# WORKS
# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(X_pred)}
ort_outs = ort_session.run(None, ort_inputs)
ort_outs
Firstly, we create a simple model and train it on the MNIST dataset.
Then we export the trained model using the ONNX framework.
Now, when I want to classify an image using the X_pred_np It works even though it is a "pure" NumPy, which is what I want.
However, I suspect that this particular case works only because it has been derived from the PyTorch tensor object, and thus "under the hood" it still has PyTorch attributes.
While when I try to inference on the random "pure" NumPy object IMG_Rando, there seems to be a problem:
Unexpected input data type. Actual: (tensor(double)) , expected: (tensor(float)).
Referring that PyTorch form is needed.
Is there a way how to be able to use only numpy Images for the ONNX predictions?. So the inference can be performed in separated venv where no pytorch is installed?
Secondly, is there a way that ONNX would remember the actual classes?
In this particular case, the index corresponds to the label of the image. However, in animal classification, ONNX would not provide us with the "DOG" and "CAT" and other labels but would only provide us the index of the predicted label. Which we would need to run throw our own "prediction dictionary" so we know that the fifth label is associated with "cat" and sixth label is associated with "dog" etc.
Numpy defaults to float64 while pytorch defaults to float32. Cast the input to float32 before the inference:
IMG_Rando = np.random.rand(1, 1, 28, 28).astype(np.float32)
double is short for double-precision floating-point format, which is a floating point number representation on 64 bits, while float refers to a floating point number on 32 bits.
As an improvement to the accepted answer, the idiomatic way to generate random numbers in Numpy is now by using a Generator. This offers the benefit of being able to create the array in the right type directly, rather than using the expensive astype operation, which copies the array (as in the accepted answer). Thus, the improved solution would look like:
rng = np.random.default_rng() # set seed if desired
IMG_Rando = rng.random((1, 1, 28, 28), dtype=np.float32)

keras - `sample_weight` results in NaN when zero passed - also not efficient for unbalanced data

I am designing a model with two outputs, y and dy, where I have much more training data for y than dy while the location (x) of those data points are the same (please check the image bellow).
I am handling this issue with sample_weight in keras.model.fit. There are two concerns:
If I pass 'zero' for a sample weight, after the first training, it results into NaN. I instead have to pass a very small number, which I am not sure how it affects the training.
This is inefficient if I have multiple outputs with many of them have available training data at very few locations. Because, all the training data will be included in the updates. Is there any other way to handle this case?
Note that Keras works fine training the model, however, I am looking for more efficient way to also be able to pass zero for unwanted weights.
Please check the code bellow:
import numpy as np
import keras as k
import tensorflow as tf
from matplotlib.pyplot import plot, show, legend
# Note this is needed to handle lambda layers as Keras' gradient does not work in this setup.
def custom_grad(y, x):
return tf.gradients(y, x, unconnected_gradients='zero', colocate_gradients_with_ops=True)
# Setting up keras model.
x = k.Input((1,), name='x', dtype='float32')
lay = k.layers.Dense(10, activation='tanh')(x)
lay = k.layers.Dense(10, activation='tanh')(lay)
y = k.layers.Dense(1, name='y')(lay)
dy = k.layers.Lambda(lambda f: custom_grad(f, x), name='dy')(y)
model = k.Model(x, [y, dy])
# Preparing training data.
num_samples = 10000
x_true = np.linspace(0.0, np.pi, num_samples)
y_true = np.sin(x_true)
dy_true = np.zeros_like(y_true)
# for dy, we only have values at certain points -
# say 10% of what is available for yfrom initial and the end.
percentage = 0.1
dy_ids = np.concatenate((np.arange(0, num_samples*percentage, dtype=int),
np.arange(num_samples*(1-percentage), 10000, dtype=int)))
dy_true[dy_ids] = np.cos(x_true[dy_ids])
# I use sample weight to circumvent unbalanced available data.
y_sample_weight = np.ones_like(y_true)
dy_sample_weight = np.zeros_like(y_true) + 1.0e-8
dy_sample_weight[dy_ids] = num_samples/dy_ids.size
assert abs(dy_sample_weight.sum() - num_samples) <= 1.0e-3
# training the model.
model.compile("adam", loss="mse")
model.fit(x_true, [y_true, dy_true],
sample_weight=[y_sample_weight, dy_sample_weight],
epochs=50, shuffle=True)
[y_pred, dy_pred] = model.predict(x_true)
# expected outputs.
plot(x_true, y_true, '.k', label='y true')
plot(x_true[dy_ids], dy_true[dy_ids], '.r', label='dy true')
plot(x_true, y_pred, '--b', label='y pred')
plot(x_true, dy_pred, '--b', label='dy pred')
legend()
show()

MNIST GAN generators white area in middle surrounded by black

The following code is copied from a GAN MNIST tutorial on UDEMY. When I run the code, it converges towards creating images with a large white area in the center that is black at the sides (picture an empty filled circle against a black background). I have no idea what the problem is as I have only done what the tutorial told me to do word for word. The only difference is that I extract the MNIST data differently. Is there something about tensorflow that has changed recently?
import tensorflow as tf
import numpy as np
import gzip
from PIL import Image
import os.path
def extract_data(filename, num_images):
"""Extract the images into a 4D tensor [image index, y, x, channels].
Values are rescaled from [0, 255] down to [-0.5, 0.5].
"""
print('Extracting', filename)
with gzip.open(filename) as bytestream:
bytestream.read(16)
buf = bytestream.read(28 * 28 * num_images)
data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
#data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
data = data.reshape(num_images, 28, 28, 1)
return data
fname_img_train = extract_data('../Data/MNIST/train-images-idx3-ubyte.gz', 60000)
def generator(z, reuse=None):
with tf.variable_scope('gen',reuse=reuse):
hidden1 = tf.layers.dense(inputs=z,units=128)
alpha = 0.01
hidden1=tf.maximum(alpha*hidden1,hidden1)
hidden2=tf.layers.dense(inputs=hidden1,units=128)
hidden2 = tf.maximum(alpha*hidden2,hidden2)
output=tf.layers.dense(hidden2,units=784, activation=tf.nn.tanh)
return output
def discriminator(X, reuse=None):
with tf.variable_scope('dis',reuse=reuse):
hidden1=tf.layers.dense(inputs=X,units=128)
alpha=0.01
hidden1=tf.maximum(alpha*hidden1,hidden1)
hidden2=tf.layers.dense(inputs=hidden1,units=128)
hidden2=tf.maximum(alpha*hidden2,hidden2)
logits=tf.layers.dense(hidden2,units=1)
output=tf.sigmoid(logits)
return output, logits
real_images=tf.placeholder(tf.float32,shape=[None,784])
z=tf.placeholder(tf.float32,shape=[None,100])
G = generator(z)
D_output_real, D_logits_real = discriminator(real_images)
D_output_fake, D_logits_fake = discriminator(G,reuse=True)
def loss_func(logits_in,labels_in):
return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
logits=logits_in,labels=labels_in))
D_real_loss = loss_func(D_logits_real,tf.ones_like(D_logits_real)*0.9)
D_fake_loss = loss_func(D_logits_fake,tf.zeros_like(D_logits_real))
D_loss = D_real_loss + D_fake_loss
G_loss = loss_func(D_logits_fake,tf.ones_like(D_logits_fake))
learning_rate = 0.001
tvars = tf.trainable_variables()
d_vars= [var for var in tvars if 'dis' in var.name]
g_vars = [var for var in tvars if 'gen' in var.name]
D_trainer = tf.train.AdamOptimizer(learning_rate).minimize(D_loss,var_list=d_vars)
G_trainer = tf.train.AdamOptimizer(learning_rate).minimize(G_loss,var_list=g_vars)
batch_size=100
epochs=30
set_size=60000
init = tf.global_variables_initializer()
samples=[]
def create_image(img, name):
img = np.reshape(img, (28, 28))
print("before")
print(img)
img = (np.multiply(np.divide(np.add(img, 1.0), 2.0),255.0).astype(np.int16))
print("after")
print(img)
im = Image.fromarray(img.astype('uint8'))
im.save(name)
with tf.Session() as sess:
sess.run(init)
for epoch in range(epochs):
np.random.shuffle(fname_img_train)
num_batches=int(set_size/batch_size)
for i in range(num_batches):
batch = fname_img_train[i*batch_size:((i+1)*batch_size)]
batch_images = np.reshape(batch, (batch_size,784))
batch_images = batch_images*2.0-1.0
batch_z = np.random.uniform(-1,1,size=(batch_size,100))
_ = sess.run(D_trainer, feed_dict={real_images:batch_images,z:batch_z})
_ = sess.run(G_trainer,feed_dict={z:batch_z})
print("ON EPOCH {}".format(epoch))
sample_z = np.random.uniform(-1,1,size=(batch_size,100))
gen_sample = sess.run(G,feed_dict={z:sample_z})
create_image(gen_sample[0], "img"+str(epoch)+".png")
As far as I can see, you are not normalizing the training data. Instead of using your extract_data() function, it is much easier to do the following:
from tensorflow.keras.datasets.mnist import load_data()
(train_data, train_labels), _ = load_data()
train_data /= 255.
Besides, usually people sample twice from the latent space each epoch: once for the discriminator and once for the generator. Still, it did not seem to make a difference.
After implementing these changes, using a batch size of 200 and training for 100 epochs, I got the following result: gen_sample. The result is pretty bad, but it is definitely better than an "empty filled circle against a black background".
Note that the architecture of the generator and of the discriminator that you are using is very simple. From my experience, stacking some convolutional layers gives perfect results. In addition, I would not use the tf.maximum() function, since it creates discontinuities that may negatively impact the flow of the gradients.
Finally, instead of your create_image() function, I used the following:
def plot_mnist(samples, name):
fig = plt.figure(figsize=(6,6))
gs = gridspec.GridSpec(6,6)
gs.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
ax = plt.subplot(gs[i])
plt.axis('off')
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_aspect('equal')
plt.imshow(sample.reshape(28,28), cmap='Greys_r')
plt.savefig('{}.png'.format(name))
plt.close()
There are many different ways of improving the quality of a GAN model, and the majority of those techniques can be easily found online. Please let me know if you have any specific question.

Calling the same batch tensorflow

I have a tensorflow graph that is reading from .tfrecords files, as described in the process here (taken from Tflow docs):
def read_my_file_format(filename_queue):
reader = tf.SomeReader()
key, record_string = reader.read(filename_queue)
example, label = tf.some_decoder(record_string)
processed_example = some_processing(example)
return processed_example, label
def input_pipeline(filenames, batch_size, num_epochs=None):
filename_queue = tf.train.string_input_producer(
filenames, num_epochs=num_epochs, shuffle=True)
example, label = read_my_file_format(filename_queue)
# min_after_dequeue defines how big a buffer we will randomly sample
# from -- bigger means better shuffling but slower start up and more
# memory used.
# capacity must be larger than min_after_dequeue and the amount larger
# determines the maximum we will prefetch. Recommendation:
# min_after_dequeue + (num_threads + a small safety margin) * batch_size
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch`
In my code, a single batch (as returned by input_pipeline above) is used as an input to multiple networks (let's call them A,B) in my graph per iteration. So if I call:
#...define graph...
sess.run([A,B])
does tensorflow guarantee that it will use the same batch for each sess.run call?
If input of model A and B is example_batch and you evaluate the models simultaneously (as in your example sess.run([A,B])) then I expect to see the same batch. Because both models are fed by the same dequeuing operation. As soon as you break the synchronization (i.e., running separately) then inputs will be different.
The following code snippet looks trivial but shows my point.
import tensorflow as tf
import numpy as np
import time
batch_size = 16
input_shape, target_shape = (128), () # input with dimensionality 128.
num_threads = 4 # for input pipeline
queue_capacity = 10 # for input pipeline
def get_random_data_sample():
# Random inputs and targets
np_input = np.float32(np.random.normal(0,1, input_shape))
np_target = np.int32(1)
# Sleep randomly between 1 and 3 seconds.
#time.sleep(np.random.randint(1,3,1)[0])
return np_input, np_target
tensorflow_input, tensorflow_target = tf.py_func(get_random_data_sample, [], [tf.float32, tf.int32])
def create_model(inputs, hidden_size, num_hidden_layers):
# Create a dummy dense network.
dense_layer = inputs
for i in range(num_hidden_layers):
dense_layer = tf.layers.dense(
inputs=dense_layer,
units=hidden_size,
kernel_initializer= tf.zeros_initializer(),
bias_initializer= tf.zeros_initializer(),
activation=None,
use_bias=True,
reuse=False)
return dense_layer, inputs
# input pipeline
batch_inputs, batch_targets = tf.train.batch([tensorflow_input, tensorflow_target],
batch_size=batch_size,
num_threads=num_threads,
shapes=[input_shape, target_shape],
capacity=queue_capacity)
# Different models A and B using the same input operation.
modelA, modelA_inputs = create_model(batch_inputs, 32, 1) # 1 hidden layer
modelB, modelB_inputs = create_model(batch_inputs, 64, 2) # 2 hidden layers
sess = tf.InteractiveSession()
tf.train.start_queue_runners()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
sess = tf.InteractiveSession()
tf.train.start_queue_runners()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
# (1) Evaluate the models simultaneously.
resultA, resultB, inputsA, inputsB = sess.run([modelA, modelB, modelA_inputs, modelB_inputs])
assert((inputsA == inputsB).all())
# (2) Evaluate the models separately.
resultA2, inputsA2 = sess.run([modelA, modelA_inputs])
resultB2, inputsB2 = sess.run([modelB, modelB_inputs])
assert((inputsA2 == inputsB2).all())
Naturally the second evaluation uses different input batches and fails assertion. I hope this helps.