I have had some considerable trouble in getting my TensorFlow model to actually run on my own input data.
I am drawing images from labeled directories. I have two classes of images, "good" and "bad," which are stored in their own respective directories.
I read them in, using TensorFlow's built-in list_files(glob), and process them with strictly TensorFlow operations. However, now that I am trying to run my model, it ceases to run on the first epoch and outputs the error code: tensorflow/core/framework/op_kernel.cc:1730] OP_REQUIRES failed at cast_op.cc:123 : Unimplemented: Cast string to float is not supported
My code is as follows:
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import tensorflow as tf
from tensorflow.keras import layers, models
import random
import os
class E6Classifier:
image_size = 256
train_test_proportion = .8
current_working_directory = pathlib.Path.cwd()
data_directory = current_working_directory.parent / 'Jupyter Notebooks' / 'Tensorflow' / 'e6Classifier' / 'data'
categories = ['good', 'bad']
good_images = []
bad_images = []
batch_size = 32
total_dataset = None
train_dataset = None
test_dataset = None
def __init__(self):
self.read_images()
self.print_statistics()
#TensorFlow implementation
self.make_tensorflow_dataset()
self.train_test_split()
self.create_model()
self.train_model()
def read_images(self):
good_path = self.data_directory / self.categories[0]
bad_path = self.data_directory / self.categories[1]
filetypes = ('*.jpg', '*.png')
for filetype in filetypes:
self.good_images.extend(good_path.glob(filetype))
self.bad_images.extend(bad_path.glob(filetype))
def print_statistics(self):
self.num_good_images = len(self.good_images)
self.num_bad_images = len(self.bad_images)
self.total_images = self.num_good_images + self.num_bad_images
self.proportion_good = round(self.num_good_images / self.total_images * 100, 2)
print(str(self.total_images) + ' total images | ' + str(self.num_good_images) + ' good images, ' + str(self.num_bad_images) + ' bad images | ' + str(self.proportion_good) + ' percent good to bad')
def make_tensorflow_dataset(self):
directory_strings = []
for filetype in ['*.jpg', '*.png']:
directory_strings.append(str(self.data_directory / 'good' / filetype))
directory_strings.append(str(self.data_directory / 'bad' / filetype))
list_dataset = tf.data.Dataset.list_files(directory_strings)
labeled_dataset = list_dataset.map(self.process_tensor_path)
self.total_dataset = labeled_dataset
def process_tensor_path(self, filepath):
label = tf.strings.split(filepath, os.sep)[-2]
image = tf.io.read_file(filepath)
image = tf.image.decode_image(image, channels = 3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize_with_pad(image, target_width = self.image_size, target_height = self.image_size)
return image, label
def train_test_split(self):
num_training_images = int(round(self.total_images * self.train_test_proportion,0))
self.total_dataset.shuffle(buffer_size = self.total_images)
self.train_dataset = self.total_dataset.take(num_training_images)
self.test_dataset = self.total_dataset.skip(num_training_images)
def create_model(self):
#Batch datasets
self.train_dataset = self.train_dataset.batch(self.batch_size, drop_remainder = True)
self.test_dataset = self.test_dataset.batch(self.batch_size, drop_remainder = True)
self.total_dataset = self.total_dataset.batch(self.batch_size, drop_remainder = True)
#Create model
self.model = models.Sequential()
#Add a convolutional layer to detect features in the image
self.model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (self.image_size, self.image_size, 3)))
#Add a pooling layer to remove sensitivity to position in the image of the feature
self.model.add(layers.MaxPooling2D((2, 2)))
#Repeat ad nauseum
self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
self.model.add(layers.MaxPooling2D(2, 2))
self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
self.model.add(layers.MaxPooling2D(2, 2))
self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
self.model.add(layers.MaxPooling2D(2, 2))
self.model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
#Add dense layers
self.model.add(layers.Flatten())
self.model.add(layers.Dense(64, activation = 'relu'))
self.model.add(layers.Dense(2))
#Compile the model for training
self.model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), metrics = ['accuracy'])
def train_model(self):
#self.train_dataset.repeat()
#self.test_dataset.repeat()
self.model.fit(self.train_dataset, epochs = 1, validation_data = self.test_dataset, verbose = True)
def main():
E6Classifier()
if __name__ == '__main__':
main()
I don't understand what is causing this error to be thrown, and there doesn't seem to be a tremendous amount of information about where the call is actually failing. I have looked at the datatypes of my TensorFlow datasets, and they indicate they are tuples of Tensors with types float32 and string, respectively.
Is this an issue with having strings for category names? If so, how would I go about replacing the category names with numbers?
You want to classify you images into two categories: good and bad. Since training a network involves calculating a loss value (which is a numberical value) and then backpropagating it to update the weights (also numerical values), you should have numerical outputs and labels.
Convert your 'good' label to eg. 1, and the 'bad' label to 0. You can do this in tensorflow (see the example below) or in you folder structure (rename the folders and modify your code accordingly).
string_label = tf.strings.split(filepath, os.sep)[-2]
label = tf.constant(1.) if tf.math.equal(string_label, tf.constant('good', dtype=tf.string)) else tf.constant(0.)
For tensorflow version 2.10.0 the accepted answer produced the following error:
OperatorNotAllowedInGraphError: Using a symbolic tf.Tensor as a Python bool is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
This can be rectified by using the tf.cond function as so:
str_label = tf.strings.split(file_path, os.sep)[-2]
label = tf.cond(tf.math.equal(str_label, tf.constant('land', dtype=tf.string)),
lambda:tf.constant(1, shape=(1,)),
lambda:tf.constant(0, shape=(1,)))
n.b. including the shape was required for some use cases so I included here.
Related
QUESTION: My training is super slow. How do I rewrite my code to make my deep learning model training faster?
BACKGROUND: I have built a CNN with TensorFlow 2.8.1 to classify CIFAR-100 images using a custom loss function. The CIFAR dataset includes 32x32-pixel RGB images of 100 fine classes (e.g., bear, car) categorized into 20 coarse classes (e.g., large omnivore, vehicle). My custom loss function is a weighted sum of two other loss functions (see code below). The first component is the crossentropy loss for the fine label. The second component is the crossentropy loss for the coarse label. My hope is that this custom loss function will enforce accurate classification of the coarse label to get a more accurate classifications of the fine label (fingers crossed). The comparator will be crossentropy loss of just the fine label (the baseline model). Note that to derive the coarse (hierarchical) loss component, I had to map the y_true (true fine label, integer) and y_pred (predicted softmax probabilities for the fine labels, vector) to the y_true_coarse_int (true coarse label, integer) and y_pred_coarse_hot (predicted coarse label, one hot encoded vector), respectively. FineInts_to_CoarseInts is a python dictionary that allows this mapping.
The training takes >5-hours to run with the custom loss function, whereas training with regular crossentropy loss for the fine classes takes ~1hr. Code was run on a high performance computing cluster with a 32GB CPU and 1 GPU.
See below:
# THIS CODE CELL IS TO DEFINE A CUSTOM LOSS FUNCTION
def crossentropy_loss(y_true, y_pred):
return SparseCategoricalCrossentropy()(y_true, y_pred)
def hierarchical_loss(y_true, y_pred):
y_true = tensorflow.cast(y_true, dtype=float)
y_true_reshaped = tensorflow.reshape(y_true, -1)
y_true_coarse_int = [FineInts_to_CoarseInts[K.eval(y_true_reshaped[i])] for i in range(y_true_reshaped.shape[0])]
y_true_coarse_int = tensorflow.cast(y_true_coarse_int, dtype=tensorflow.float32)
y_pred = tensorflow.cast(y_pred, dtype=float)
y_pred_int = tensorflow.argmax(y_pred, axis=1)
y_pred_coarse_int = [FineInts_to_CoarseInts[K.eval(y_pred_int[i])] for i in range(y_pred_int.shape[0])]
y_pred_coarse_int = tensorflow.cast(y_pred_coarse_int, dtype=tensorflow.float32)
y_pred_coarse_hot = to_categorical(y_pred_coarse_int, 20)
return SparseCategoricalCrossentropy()(y_true_coarse_int, y_pred_coarse_hot)
def custom_loss(y_true, y_pred):
H = 0.5
total_loss = (1 - H) * crossentropy_loss(y_true, y_pred) + H * hierarchical_loss(y_true, y_pred)
return total_loss
During model compilation I had to set the run_eagerly parameter to True. See below:
# THIS CODE CELL IS TO COMPILE THE MODEL
model.compile(optimizer="adam", loss=custom_loss, metrics="accuracy", run_eagerly=True)
The full code is below:
# THIS CODE CELL LOADS THE PACKAGES USED IN THIS NOTEBOOK
# Load core packages for data analysis and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import sys
!{sys.executable} -m pip install pydot
!{sys.executable} -m pip install graphviz
# Load deep learning packages
import tensorflow
from tensorflow.keras.datasets.cifar100 import load_data
from tensorflow.keras import (Model, layers)
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import tensorflow.keras.backend as K
from tensorflow.keras.utils import (to_categorical, plot_model)
from tensorflow.lookup import (StaticHashTable, KeyValueTensorInitializer)
# Load model evaluation packages
import sklearn
from sklearn.metrics import (confusion_matrix, classification_report)
# Print versions of main ML packages
print("Tensorflow version " + tensorflow.__version__)
print("Scikit learn version " + sklearn.__version__)
# THIS CODE CELL LOADS DATASETS AND CHECKS DATA DIMENSIONS
# There is an option to load the "fine" (100 fine classes) or "coarse" (20 super classes) labels with integer (int) encodings
# We will load both labels for hierarchical classification tasks
(x_train, y_train_fine_int), (x_test, y_test_fine_int) = load_data(label_mode="fine")
(_, y_train_coarse_int), (_, y_test_coarse_int) = load_data(label_mode="coarse")
# EXTRACT DATASET PARAMETERS FOR USE LATER ON
num_fine_classes = 100
num_coarse_classes = 20
input_shape = x_train.shape[1:]
# THIS CODE CELL PROVIDES THE CODE TO LINK INTEGER LABELS TO MEANINGFUL WORD LABELS
# Fine and coarse labels are provided as integers. We will want to link them both to meaningful world labels.
# CREATE A DICTIONARY TO MAP THE 20 COARSE LABELS TO THE 100 FINE LABELS
# This mapping comes from https://keras.io/api/datasets/cifar100/
# Except "computer keyboard" should just be "keyboard" for the encoding to work
CoarseLabels_to_FineLabels = {
"aquatic mammals": ["beaver", "dolphin", "otter", "seal", "whale"],
"fish": ["aquarium fish", "flatfish", "ray", "shark", "trout"],
"flowers": ["orchids", "poppies", "roses", "sunflowers", "tulips"],
"food containers": ["bottles", "bowls", "cans", "cups", "plates"],
"fruit and vegetables": ["apples", "mushrooms", "oranges", "pears", "sweet peppers"],
"household electrical devices": ["clock", "keyboard", "lamp", "telephone", "television"],
"household furniture": ["bed", "chair", "couch", "table", "wardrobe"],
"insects": ["bee", "beetle", "butterfly", "caterpillar", "cockroach"],
"large carnivores": ["bear", "leopard", "lion", "tiger", "wolf"],
"large man-made outdoor things": ["bridge", "castle", "house", "road", "skyscraper"],
"large natural outdoor scenes": ["cloud", "forest", "mountain", "plain", "sea"],
"large omnivores and herbivores": ["camel", "cattle", "chimpanzee", "elephant", "kangaroo"],
"medium-sized mammals": ["fox", "porcupine", "possum", "raccoon", "skunk"],
"non-insect invertebrates": ["crab", "lobster", "snail", "spider", "worm"],
"people": ["baby", "boy", "girl", "man", "woman"],
"reptiles": ["crocodile", "dinosaur", "lizard", "snake", "turtle"],
"small mammals": ["hamster", "mouse", "rabbit", "shrew", "squirrel"],
"trees": ["maple", "oak", "palm", "pine", "willow"],
"vehicles 1": ["bicycle", "bus", "motorcycle", "pickup" "truck", "train"],
"vehicles 2": ["lawn-mower", "rocket", "streetcar", "tank", "tractor"]
}
# CREATE A DICTIONARY TO MAP THE INTEGER-ENCODED COARSE LABEL TO THE WORD LABEL
# Create list of Course Labels
CoarseLabels = list(CoarseLabels_to_FineLabels.keys())
# The target variable in CIFER100 is encoded such that the coarse class is assigned an integer based on its alphabetical order
# The CoarseLabels list is already alphabetized, so no need to sort
CoarseInts_to_CoarseLabels = dict(enumerate(CoarseLabels))
# CREATE A DICTIONARY TO MAP THE WORD LABEL TO THE INTEGER-ENCODED COARSE LABEL
CoarseLabels_to_CoarseInts = dict(zip(CoarseLabels, range(20)))
# CREATE A DICTIONARY TO MAP THE 100 FINE LABELS TO THE 20 COARSE LABELS
FineLabels_to_CoarseLabels = {}
for CoarseLabel in CoarseLabels:
for FineLabel in CoarseLabels_to_FineLabels[CoarseLabel]:
FineLabels_to_CoarseLabels[FineLabel] = CoarseLabel
# CREATE A DICTIONARY TO MAP THE INTEGER-ENCODED FINE LABEL TO THE WORD LABEL
# Create a list of the Fine Labels
FineLabels = list(FineLabels_to_CoarseLabels.keys())
# The target variable in CIFER100 is encoded such that the fine class is assigned an integer based on its alphabetical order
# Sort the fine class list.
FineLabels.sort()
FineInts_to_FineLabels = dict(enumerate(FineLabels))
# CREATE A DICTIONARY TO MAP THE INTEGER-ENCODED FINE LABELS TO THE INTEGER-ENCODED COARSE LABELS
b = list(dict(sorted(FineLabels_to_CoarseLabels.items())).values())
FineInts_to_CoarseInts = dict(zip(range(100), [CoarseLabels_to_CoarseInts[i] for i in b]))
#Tensor version of dictionary
#fine_to_coarse = tensorflow.constant(list((FineInts_to_CoarseInts).items()), dtype=tensorflow.int8)
# THIS CODE CELL IS TO BUILD A FUNCTIONAL MODEL
inputs = layers.Input(shape=input_shape)
x = layers.BatchNormalization()(inputs)
x = layers.Conv2D(64, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.Conv2D(256, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.Conv2D(256, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.Conv2D(1024, (3, 3), padding='same', activation="relu")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)
x = layers.Dropout(0.30)(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.30)(x)
x = layers.Dense(512, activation = "relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.30)(x)
output_fine = layers.Dense(num_fine_classes, activation="softmax", name="output_fine")(x)
model = Model(inputs=inputs, outputs=output_fine)
# THIS CODE CELL IS TO DEFINE A CUSTOM LOSS FUNCTION
def crossentropy_loss(y_true, y_pred):
return SparseCategoricalCrossentropy()(y_true, y_pred)
def hierarchical_loss(y_true, y_pred):
y_true = tensorflow.cast(y_true, dtype=float)
y_true_reshaped = tensorflow.reshape(y_true, -1)
y_true_coarse_int = [FineInts_to_CoarseInts[K.eval(y_true_reshaped[i])] for i in range(y_true_reshaped.shape[0])]
y_true_coarse_int = tensorflow.cast(y_true_coarse_int, dtype=tensorflow.float32)
y_pred = tensorflow.cast(y_pred, dtype=float)
y_pred_int = tensorflow.argmax(y_pred, axis=1)
y_pred_coarse_int = [FineInts_to_CoarseInts[K.eval(y_pred_int[i])] for i in range(y_pred_int.shape[0])]
y_pred_coarse_int = tensorflow.cast(y_pred_coarse_int, dtype=tensorflow.float32)
y_pred_coarse_hot = to_categorical(y_pred_coarse_int, 20)
return SparseCategoricalCrossentropy()(y_true_coarse_int, y_pred_coarse_hot)
def custom_loss(y_true, y_pred):
H = 0.5
total_loss = (1 - H) * crossentropy_loss(y_true, y_pred) + H * hierarchical_loss(y_true, y_pred)
return total_loss
# THIS CODE CELL IS TO COMPILE THE MODEL
model.compile(optimizer="adam", loss=crossentropy_loss, metrics="accuracy", run_eagerly=False)
# THIS CODE CELL IS TO TRAIN THE MODEL
history = model.fit(x_train, y_train_fine_int, epochs=200, validation_split=0.25, batch_size=100)
# THIS CODE CELL IS TO VISUALIZE THE TRAINING
history_frame = pd.DataFrame(history.history)
history_frame.to_csv("history.csv")
history_frame.loc[:, ["accuracy", "val_accuracy"]].plot()
history_frame.loc[:, ["loss", "val_loss"]].plot()
plt.show()
# THIS CODE CELL IS TO EVALUATE THE MODEL ON AN INDEPENDENT DATASET
score = model.evaluate(x_test, y_test_fine_int, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])
Quantization
Quantization is the technique that converts your number type float32 to int8. It means your model size will be lesser.
There are two types of quantization before training and after training.
Try to apply quantization before training and let me know the results.
Refer to this video for Quantization
I am learning this new ONNX framework that allows us to deploy the deep learning (and others) model into production.
However, there is one thing I am missing. I thought that the main reason for having such a framework is so that for inference purposes e.g. when we have a trained model and want to use it in a different venv (where for example we cannot have PyTorch) the model still can be used.
I have preped a "from scratch" example here:
# Modules
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torchvision
import onnx
import onnxruntime
import matplotlib.pyplot as plt
import numpy as np
# %config Completer.use_jedi = False
# MNIST Example dataset
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST(
'data', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
])),
batch_size=800)
# Take data and labels "by hand"
inputs_batch, labels_batch = next(iter(train_loader))
# Simple Model
class CNN(nn.Module):
def __init__(self, in_channels, num_classes):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channels,
out_channels = 10, kernel_size = (3, 3), stride = (1, 1), padding=(1, 1))
self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride = (2, 2))
self.conv2 = nn.Conv2d(in_channels = 10, out_channels=16, kernel_size = (3, 3), stride = (1, 1), padding=(1, 1))
self.fc1 = nn.Linear(16*7*7, num_classes)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.reshape(x.shape[0], -1)
x = self.fc1(x)
return x
# Training setting
device = 'cpu'
batch_size = 64
learning_rate = 0.001
n_epochs = 10
# Dataset prep
dataset = TensorDataset(inputs_batch, labels_batch)
TRAIN_DF = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True)
# Model Init
model = CNN(in_channels=1, num_classes=10)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
# Training Loop
for epoch in range(n_epochs):
for data, labels in TRAIN_DF:
model.train()
# Send Data to GPU
data = data.to(device)
# Send Data to GPU
labels = labels.to(device)
# data = data.reshape(data.shape[0], -1)
# Forward
pred = model(data)
loss = F.cross_entropy(pred, labels)
# Backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Check Accuracy
def check_accuracy(loader, model):
num_correct = 0
num_total = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device)
y = y.to(device)
# x = x.reshape(x.shape[0], -1)
scores = model(x)
_, pred = scores.max(1)
num_correct += (pred == y).sum()
num_total += pred.size(0)
print(F"Got {num_correct} / {num_total} with accuracy {float(num_correct)/float(num_total)*100: .2f}")
check_accuracy(TRAIN_DF, model)
# Inference with ONNX
# Create Artifical data of the same size
img_size = 28
dummy_data = torch.randn(1, img_size, img_size)
dummy_input = torch.autograd.Variable(dummy_data).unsqueeze(0)
input_name = "input"
output_name = "output"
model_eval = model.eval()
torch.onnx.export(
model_eval,
dummy_input,
"model_CNN.onnx",
input_names=["input"],
output_names=["output"],
)
# Take Random Image from Training Data
X_pred = data[4].unsqueeze(0)
# Convert the Tensor image to PURE numpy and pretend we are working in venv where we only have numpy - NO PYTORCH
X_pred_np = X_pred.numpy()
X_pred_np = np.array(X_pred_np)
IMG_Rando = np.random.rand(1, 1, 28, 28)
np.shape(X_pred_np) == np.shape(IMG_Rando)
ort_session = onnxruntime.InferenceSession(
"model_CNN.onnx"
)
def to_numpy(tensor):
return (
tensor.detach().gpu().numpy()
if tensor.requires_grad
else tensor.cpu().numpy()
)
# compute ONNX Runtime output prediction
# WORKS
# ort_inputs = {ort_session.get_inputs()[0].name: X_pred_np}
# DOES NOT WORK
ort_inputs = {ort_session.get_inputs()[0].name: IMG_Rando}
# WORKS
# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(X_pred)}
ort_outs = ort_session.run(None, ort_inputs)
ort_outs
Firstly, we create a simple model and train it on the MNIST dataset.
Then we export the trained model using the ONNX framework.
Now, when I want to classify an image using the X_pred_np It works even though it is a "pure" NumPy, which is what I want.
However, I suspect that this particular case works only because it has been derived from the PyTorch tensor object, and thus "under the hood" it still has PyTorch attributes.
While when I try to inference on the random "pure" NumPy object IMG_Rando, there seems to be a problem:
Unexpected input data type. Actual: (tensor(double)) , expected: (tensor(float)).
Referring that PyTorch form is needed.
Is there a way how to be able to use only numpy Images for the ONNX predictions?. So the inference can be performed in separated venv where no pytorch is installed?
Secondly, is there a way that ONNX would remember the actual classes?
In this particular case, the index corresponds to the label of the image. However, in animal classification, ONNX would not provide us with the "DOG" and "CAT" and other labels but would only provide us the index of the predicted label. Which we would need to run throw our own "prediction dictionary" so we know that the fifth label is associated with "cat" and sixth label is associated with "dog" etc.
Numpy defaults to float64 while pytorch defaults to float32. Cast the input to float32 before the inference:
IMG_Rando = np.random.rand(1, 1, 28, 28).astype(np.float32)
double is short for double-precision floating-point format, which is a floating point number representation on 64 bits, while float refers to a floating point number on 32 bits.
As an improvement to the accepted answer, the idiomatic way to generate random numbers in Numpy is now by using a Generator. This offers the benefit of being able to create the array in the right type directly, rather than using the expensive astype operation, which copies the array (as in the accepted answer). Thus, the improved solution would look like:
rng = np.random.default_rng() # set seed if desired
IMG_Rando = rng.random((1, 1, 28, 28), dtype=np.float32)
I am trying to use the colab to run the gym package with pacman, since the spec in colab is more powerful than my notebook. This program is successful simulate in Jupyter in my notebook, which using tensorflow 1.14. However, the errors keep appears when I put in google colab to simulate, and I also debug and change part of the code, so that the code can be used in tensor flow 2.0. Below is my code
#First we import all the necessary libraries
import numpy as np
import gym
import tensorflow as tf
from tensorflow import keras
from keras.layers import Flatten, Conv2D, Dense
#from tensorflow.contrib.layers import Flatten, conv2d, Dense
from collections import deque, Counter
import random
from datetime import datetime
#Now we define a function called preprocess_observation for preprocessing our input game screen.
#We reduce the image size and convert the image into greyscale.
color = np.array([210, 164, 74]).mean()
def preprocess_observation(obs):
# Crop and resize the image
img = obs[1:176:2, ::2]
# Convert the image to greyscale
img = img.mean(axis=2)
# Improve image contrast
img[img==color] = 0
# Next we normalize the image from -1 to +1
img = (img - 128) / 128-1
return img.reshape(88,80,1)
#Let us initialize our gym environment
env = gym.make('MsPacman-v0')
n_outputs = env.action_space.n
print(n_outputs)
print(env.env.get_action_meanings())
observation = env.reset()
import tensorflow as tf
import matplotlib.pyplot as plt
for i in range(22):
if i > 20:
plt.imshow(observation)
plt.show()
observation, _, _, _ = env.step(1)
#Okay, Now we define a function called q_network for building our Q network. We input the game state to the Q network
#and get the Q values for all the actions in that state.
#We build Q network with three convolutional layers with same padding followed by a fully connected layer.
tf.compat.v1.reset_default_graph()
def q_network(X, name_scope):
# Initialize layers
initializer = tf.compat.v1.keras.initializers.VarianceScaling(scale=2.0)
with tf.compat.v1.variable_scope(name_scope) as scope:
# initialize the convolutional layers
#layer_1 = tf.keras.layers.Conv2D(X, 32, kernel_size=(8,8), stride=4, padding='SAME', weights_initializer=initializer)
layer_1_set = Conv2D(32, (8,8), strides=4, padding="SAME", kernel_initializer=initializer)
layer_1= layer_1_set(X)
tf.compat.v1.summary.histogram('layer_1',layer_1)
#layer_2 = tf.keras.layers.Conv2D(layer_1, 64, kernel_size=(4,4), stride=2, padding='SAME', weights_initializer=initializer)
layer_2_set = Conv2D(64, (4,4), strides=2, padding="SAME", kernel_initializer=initializer)
layer_2= layer_2_set(layer_1)
tf.compat.v1.summary.histogram('layer_2',layer_2)
#layer_3 = tf.keras.layers.Conv2D(layer_2, 64, kernel_size=(3,3), stride=1, padding='SAME', weights_initializer=initializer)
layer_3_set = Conv2D(64, (3,3), strides=1, padding="SAME", kernel_initializer=initializer)
layer_3= layer_3_set(layer_2)
tf.compat.v1.summary.histogram('layer_3',layer_3)
flatten_layer = Flatten() # instantiate the layer
flat = flatten_layer(layer_3)
fc_set = Dense(128, kernel_initializer=initializer)
fc=fc_set(flat)
tf.compat.v1.summary.histogram('fc',fc)
#Add final output layer
output_set = Dense(n_outputs, activation= None, kernel_initializer=initializer)
output= output_set(fc)
tf.compat.v1.summary.histogram('output',output)
vars = {v.name[len(scope.name):]: v for v in tf.compat.v1.get_collection(key=tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)}
#Return both variables and outputs together
return vars, output
#Next we define a function called epsilon_greedy for performing epsilon greedy policy. In epsilon greedy policy we either select the best action
#with probability 1 - epsilon or a random action with probability epsilon.
#We use decaying epsilon greedy policy where value of epsilon will be decaying over time
#as we don't want to explore forever. So over time our policy will be exploiting only good actions.
epsilon = 0.5
eps_min = 0.05
eps_max = 1.0
eps_decay_steps = 500000
def epsilon_greedy(action, step):
p = np.random.random(1).squeeze()
epsilon = max(eps_min, eps_max - (eps_max-eps_min) * step/eps_decay_steps)
if np.random.rand() < epsilon:
return np.random.randint(n_outputs)
else:
return action
#Now, we initialize our experience replay buffer of length 20000 which holds the experience.
#We store all the agent's experience i.e (state, action, rewards) in the
#experience replay buffer and we sample from this minibatch of experience for training the network.
buffer_len = 20000
exp_buffer = deque(maxlen=buffer_len)
# Now we define our network hyperparameters,
num_episodes = 800
batch_size = 48
input_shape = (None, 88, 80, 1)
learning_rate = 0.001
X_shape = (None, 88, 80, 1)
discount_factor = 0.97
global_step = 0
copy_steps = 100
steps_train = 4
start_steps = 2000
logdir = 'logs'
tf.compat.v1.reset_default_graph()
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# Now we define the placeholder for our input i.e game state
X = tf.placeholder(tf.float32, shape=X_shape)
#X = tf.Variable(tf.float32, tf.ones(shape=X_shape))
# we define a boolean called in_training_model to toggle the training
in_training_mode = tf.placeholder(tf.bool)
# we build our Q network, which takes the input X and generates Q values for all the actions in the state
mainQ, mainQ_outputs = q_network(X, 'mainQ')
# similarly we build our target Q network, for policy evaluation
targetQ, targetQ_outputs = q_network(X, 'targetQ')
# define the placeholder for our action values
X_action = tf.placeholder(tf.int32, shape=(None,))
Q_action = tf.reduce_sum(targetQ_outputs * tf.one_hot(X_action, n_outputs), axis=-1, keepdims=True)
#Copy the primary Q network parameters to the target Q network
copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
copy_target_to_main = tf.group(*copy_op)
#Compute and optimize loss using gradient descent optimizer
# define a placeholder for our output i.e action
y = tf.placeholder(tf.float32, shape=(None,1))
# now we calculate the loss which is the difference between actual value and predicted value
loss = tf.reduce_mean(tf.square(y - Q_action))
# we use adam optimizer for minimizing the loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
loss_summary = tf.summary.scalar('LOSS', loss)
merge_summary = tf.summary.merge_all()
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
Ok up to here, the error come out when i run this cell in colab :
#Copy the primary Q network parameters to the target Q network
copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
copy_target_to_main = tf.group(*copy_op)
The error gives:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-13-58715282cea8> in <module>()
----> 1 copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
2 copy_target_to_main = tf.group(*copy_op)
<ipython-input-13-58715282cea8> in <listcomp>(.0)
----> 1 copy_op = [tf.compat.v1.assign(main_name, targetQ[var_name]) for var_name, main_name in mainQ.items()]
2 copy_target_to_main = tf.group(*copy_op)
KeyError: '/conv2d_1/kernel:0'
I have two question?
First, how to solve the question that already stated above.
Second, in tensor-flow 2.0 above,the placeholder command is replaced by tf.Variable, i rewrite the code:
X = tf.placeholder(tf.float32, shape=X_shape) to become
X = tf.Variable(tf.float32, tf.ones(shape=X_shape))
and still get error, and i have to use command below:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
X = tf.placeholder(tf.float32, shape=X_shape)
but get warning like this:
WARNING:tensorflow:From /usr/local/lib/python3.6/dist- packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating: non-resource variables are not supported in the long term
I doing intensive searching in the Stack overflow website by using keyword, yet i can't find solution. Really looking forward to any advise. Thank you very much.
I am having issue with the shape of the output of model.predict.
Lets condiser the following simple dummy model:
import numpy as np
import pandas as pd
import tensorflow as tf
import string
from tensorflow.keras import layers
###Creating pseudo data
a = pd.DataFrame(np.random.randn(1000, 10), columns = [i for i in string.ascii_lowercase[:10]])
### Splitting data between train and test set
train = a.sample(frac=0.75, random_state=0)
test = a.drop(train.index)
### Creating features columns
f = []
for c in a.columns:
if c == 'j':
continue
x = tf.feature_column.numeric_column(c)
f.append(x)
l = layers.DenseFeatures(f)
### Creating model
model = tf.keras.models.Sequential()
model.add(l)
model.add(tf.keras.layers.Dense(
units = 64,
activation = 'relu',
name = 'hidden1'
)
)
model.add(tf.keras.layers.Dense(
units = 128,
activation = 'relu',
name = 'hidden2'
)
)
### Prepping training data for input in the model
features = {name : np.array(value) for name, value in train.items()}
label = np.array(features.pop('j'))
lr = 0.001
batch_size = 40
epochs = 40
model.compile(optimizer = tf.keras.optimizers.RMSprop(lr = lr),
loss = 'mean_squared_error',
metrics = [tf.keras.metrics.MeanSquaredError()])
model.fit(x=features, y=label, batch_size = batch_size, epochs = epochs, shuffle = True)
features_predict = {name : np.array(value) for name, value in test.items()}
label_predict = np.array(features_predict.pop('j'))
p = model.predict(x=features_predict)
now if I inspect p:
p.shape
it returns (250, 128) (where obviously i would expect to predict the j value and be of shape (250,1)
What am I doing wrong ?
The last layer of your model is a Dense layer with 128 units. It is supposed to return a tensor with dimensions [batch_size, 128]. So everything is as expected. You should change the number of units to 1 if you want to have an output with a single value per sample instead.
Trying to implement this article.
Edit1: found one error, my output size is 10 and not 1. (one number per each sentence, there are 10 sentences per document)
Edit2: I got another error that involves the batch size. when I make it 10 the model trains (!!!!). but I think it's not the right way... the error I had given batch size 3 is
Edit 3 Solved!! stuff with sizes + the fact the BIDIRECTIONAL returns different stuff from LSTM so I need to concat myself. Will put right code in answer.
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Incompatible shapes: [10] vs. [3]
[[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
[[metrics_8/acc/Mean_1/_5481]]
(1) Invalid argument: Incompatible shapes: [10] vs. [3]
[[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
0 successful operations.
0 derived errors ignored.
The target is extractive document summarization.
Link to colab with code
What they do is (you can see in the picture at page 3)
100 units of BI_LSTM + Attention for each sentence of the document.
Concat those and insert them to 1 BI_LSTM + Attention to get document embeddings.
Use document embeddings + hidden states from the LSTM to get some features.
Classify according to features
After a lot of struggle with keras low level api, I got a simple version to work.
What I did was to get the already sentence embedding and just do the last LSTM.
Or get words embedding of a sentence and make the small unit of sentence LSTM to work.
Now I am trying to put every thing together but can not get the sizes to fit.
My input size is
number_of_document * sentences_in_document * words_in_sentence * word_embedding
In the code I set those to be 20 * 10 * 50 * 100
(10 sentence in document is for everything to run faster for now..).
My output is
10 * 1 meaning for each sentence I get 1/0 if it's part of the document summary.
(I have not yet did the features extraction part, I just use another dense layer to give me probabilities instead..)
I think the problem is with this part of the code
X_doc = Lambda(lambda x: x[:,t, :, :])(X)
The code with sample data
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
import keras
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tensorflow as tf
from keras import backend as K
num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10
model_output_size = 10
word_embeddings_size = 100
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400
x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)
# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []
for i in range(sentences_in_doc):
bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
return_sequences=False, name='bidirectional_' + str(i)), merge_mode='concat'))
densors_1.append(Dense(densor1_output_size, activation = "tanh"))
densors_2.append(Dense(densor2_output_size, activation = "softmax"))
def invoke_sentence(sentence_matrix, index):
if index==0:
print(type(sentence_matrix))
print(tf.shape(sentence_matrix))
Ys = bi_lstms[index](sentence_matrix)
attention_middle = densors_1[index](Ys)
output = densors_2[index](attention_middle)
if index==0:
print(f'Ys shape is {Ys.shape}')
print(f'attention_middle shape is {attention_middle.shape}')
print(f'output shape is {output.shape}')
return output
def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
"""
Arguments:
words_in_sentence -- Tx -- length of the input sequence - max words per sentence
sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
lstm_hidden_size -- hidden state size of the Bi-LSTM
word_embeddings_size -- size of the word embeddings
model_output_size -- size of each sentence label (1 or 0)
Returns:
model -- Keras model instance
"""
sentences_embeddings = []
X = Input(shape=(sentences_in_doc , words_in_sentence, word_embeddings_size), name= 'X')
for t in range(Ty):
X_doc = Lambda(lambda x: x[:,t, :, :])(X)
print(type(X_doc))
print(X_doc)
print(X_doc.shape)
sentences_embeddings.append(invoke_sentence(X_doc, t))
sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=0))(sentences_embeddings)
Ys = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc , lstm_hidden_size*2),
return_sequences=False, name='bidirectional_document'),
merge_mode='concat')(sentences_embeddings_stacked)
attention_middle = Dense(densor1_output_size, activation = "tanh")(Ys)
document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)
outputs = Dense(model_output_size, activation = "softmax")(document_embedding)
# compute_features(document_embeddings, sentences_embeddings, ys)
model = Model(inputs=X, outputs=outputs)
return model
model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=2, epochs=1)
# Sizes
num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10
model_output_size = 10
word_embeddings_size = 100
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400
# Data
x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)
# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []
for i in range(sentences_in_doc):
bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
return_sequences=True, return_state=True, name='bidirectional_' + str(i))))
densors_1.append(Dense(densor1_output_size, activation = "tanh",name='senteence_dense_tanh' + str(i)))
densors_2.append(Dense(densor2_output_size, activation = "softmax",name='senteence_dense_softmax' + str(i)))
def invoke_sentence(sentence_matrix, index):
if index==0:
print(type(sentence_matrix))
print(tf.shape(sentence_matrix))
lstm, forward_h, forward_c, backward_h, backward_c = bi_lstms[index](sentence_matrix)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
attention_middle = densors_1[index](state_h)
output = densors_2[index](attention_middle)
if index==0:
print(f'lstm shape is {lstm.shape}')
print(f'state_h shape is {state_h.shape}')
print(f'state_c shape is {state_c.shape}')
print(f'attention_middle shape is {attention_middle.shape}')
print(f'output shape is {output.shape}')
return output
def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
"""
Arguments:
words_in_sentence -- Tx -- length of the input sequence - max words per sentence
sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
lstm_hidden_size -- hidden state size of the Bi-LSTM
word_embeddings_size -- size of the word embeddings
model_output_size -- size of each sentence label (1 or 0)
Returns:
model -- Keras model instance
"""
sentences_embeddings = []
X = Input(shape=(sentences_in_doc, words_in_sentence, word_embeddings_size), name= 'X')
for t in range(sentences_in_doc):
X_doc = Lambda(lambda x: x[:, t,:, :])(X)
if(t==0):
print("X_doc")
print(type(X_doc))
print(X_doc)
print(X_doc.shape)
sentence_embedding = invoke_sentence(X_doc, t)
sentences_embeddings.append(sentence_embedding)
if(t==0):
print("sentence_embedding")
print(type(sentence_embedding))
print(sentence_embedding)
print(sentence_embedding.shape)
sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=1))(sentences_embeddings)
print("sentences_embeddings_stacked")
print(type(sentences_embeddings_stacked))
print(sentences_embeddings_stacked)
print(sentences_embeddings_stacked.shape)
doc_lstm, doc_forward_h, doc_forward_c, doc_backward_h, doc_backward_c = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc, lstm_hidden_size*2),
return_sequences=True, return_state=True, name='bidirectional_document'),
merge_mode='concat')(sentences_embeddings_stacked)
doc_state_h = Concatenate()([doc_forward_h, doc_backward_h])
doc_state_c = Concatenate()([doc_forward_c, doc_backward_c])
print(f'doc_lstm shape is {doc_lstm.shape}')
print(f'doc_state_h shape is {doc_state_h.shape}')
print(f'doc_state_c shape is {doc_state_c.shape}')
attention_middle = Dense(densor1_output_size, activation = "tanh")(doc_state_h)
document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)
print(f'document_embedding shape is {document_embedding.shape}')
# my_layer = MyLayer(input_shape=((400), (10,400), (10,400)), output_dim=2)
# custom_output = my_layer([document_embedding, sentences_embeddings_stacked, doc_state_h])
# print(f'custom_output shape is {custom_output.shape}')
outputs = Dense(model_output_size, activation = "softmax")(document_embedding)
model = Model(inputs=X, outputs=outputs)
return model
model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=5, epochs=1)