I am working on a modified version of the TPU fmnist example. While the original works fine, in my version I reduce the frequency of some classes according to a percentile (also omitting some classes, but that does not seem to cause the problem).
Training on TPU results in a very uninformative error regarding shape mismatch:
Run-time shape mismatch for TPUExecute argument[0] (cond_15/Merge).
Expected element_type: F32
Running the exact same code on GPU works.
I assume it is due to workload sharing between the TPU's, but I am a bit lost on how to fix it.
I am attaching a link to the notebook, and the code below it. It is currently on TPU, to run on GPU uncomment the relevant startegy, while commenting the TPU parts, and change the colab runtime.
colab notebook
import tensorflow as tf
import numpy as np
import distutils
if distutils.version.LooseVersion(tf.__version__) < '1.14':
raise Exception('This notebook is compatible with TensorFlow 1.14 or higher, for TensorFlow 1.13 or lower please use the previous version at')
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
def make_tail_ds(x_train, y_train,x_test,y_test, tail_classes, omitted_classes, precentile):
true_indices = np.invert(np.isin(y_train,tail_classes+omitted_classes))
tail_indices = np.isin(y_train,tail_classes)
normal_data = x_train[true_indices]
normal_gt = y_train[true_indices]
tail_data = x_train[tail_indices]
tail_gt = y_train[tail_indices]
tail_len = int(np.floor(len(tail_gt)*precentile))
tail_data =tail_data[:tail_len]
tail_gt =tail_gt[:tail_len]
new_train = np.concatenate([normal_data,tail_data])
new_gt = np.concatenate([normal_gt,tail_gt])
test_indices = np.invert(np.isin(y_test,omitted_classes))
return np.copy(new_train),np.copy(new_gt), np.copy(x_test[test_indices]),np.copy(y_test[test_indices])
x_train, y_train,x_test,y_test = make_tail_ds(x_train,y_train,x_test,y_test, [3,4], [8,9],0.9)
# add empty color dimension
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
def create_model(num_of_classes):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='elu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='elu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Conv2D(256, (5, 5), padding='same', activation='elu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
return model
import os
resolver = tf.contrib.cluster_resolver.TPUClusterResolver('grpc://' + os.environ['COLAB_TPU_ADDR'])
strategy = tf.contrib.distribute.TPUStrategy(resolver)
# strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
with strategy.scope():
model = create_model(len(np.unique(y_train)))
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, ),
x_train.astype(np.float32), y_train.astype(np.float32),
validation_data=(x_test.astype(np.float32), y_test.astype(np.float32)), batch_size = 100
model.save_weights('./fashion_mnist.h5', overwrite=True)


Great validation accuracy but terrible prediction

I have got the following CNN:
import os
import numpy as np
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
# Load the data
data_dir = PATH_DIR
x_train = []
y_train = []
total_files = 0
for subdir in os.listdir(data_dir):
subdir_path = os.path.join(data_dir, subdir)
if os.path.isdir(subdir_path):
total_files += len([f for f in os.listdir(subdir_path) if f.endswith('.npy')])
with tqdm(total=total_files, unit='file') as pbar:
for subdir in os.listdir(data_dir):
subdir_path = os.path.join(data_dir, subdir)
if os.path.isdir(subdir_path):
for image_file in os.listdir(subdir_path):
if image_file.endswith('.npy'):
image_path = os.path.join(subdir_path, image_file)
image = np.load(image_path)
x_train = np.array(x_train)
y_train = np.array(y_train)
# Preprocess the labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_train = to_categorical(y_train)
# Create the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(57, 57, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Dense(128, activation='relu'))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']), y_train, epochs=10)'GeneratedModels/units_model_np.h5')
And then the following function that is called within a loop about 15 times a second. Where image is a numpy array.
def guess_unit(image, classList):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
model = tf.keras.models.load_model(MODEL_PATH)
image = np.expand_dims(image, axis=0)
prediction = model.predict(image, verbose=0)
index = np.argmax(prediction)
# Return the predicted unit
return classList[index]
The problem is that when i train the model the accuracy is very high (99,99976%) but when I am using the predict the output is terribily wrong, to the point it does not make any sense. Sometimes the image received will be the same but the predict will return 2 different things.
I have no idea what am I doing wrong. It's the first time I am tinkering with Neural Networks.
I have tried to use the model.predict with the images that it was trained on and it's always getting them right. Is just when it receives dynamic images that it's terribly wrong.
NOTE: I have 8 classes and it was trained using about 13000 images.
Generally to get performance on your training data you have to split your data into training, testing and validation (which I see you haven't done). This can be done manually or done via adding validation_split into your fit function.
Without seeing any curves on how your loss and accuracy it's behaving it's difficult to make any suggestions. However it might be the case that your are underfitting or overfitting to your data (I would assume that your facing overfitting in your case). In case you are overfitting to your data, I would suggest you to add some regularization or change your model architecture as the one used might not be appropriate. Options that one could think of would be to add regularization via Dropout or adding regularization to your weights.

Converting from keras functions to tensorFlow.keras functions to utilize the GPU

I run a hybrid model in keras (external library). It keeps on running for more than 24 hours.
However, the program is not utilizing the GPU.
How can I transform my program to run on tensorFlow environment using tensorFlow.keras to utilize my GPU? following is the code:
model = keras.Sequential(
keras.layers.InputLayer(input_shape=(231, 231, 3)),
layers.Conv2D(128, 3, padding="same", activation="relu"),
layers.Conv2D(128, 3, padding="same", activation="relu", kernel_initializer = 'glorot_uniform'),
extraction_model = Model(model.input, model.layers[1].output)
new_X = extraction_model.predict(X)
x_train = new_X.reshape(-1, new_X.shape[3])
y_train = Y.reshape(-1)
RF_clf = RandomForestClassifier(random_state=42, oob_score=True)
SV_clf = SVC(random_state=42, probability=True)
LR_clf = LogisticRegression(random_state=42,)
estimators = [('RF', RF_clf), ('SV', SV_clf), ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()), y_train)
print("Stacking model score: %.3f" % clf.score(x_test, y_test))
the Keras was already built under the hood of Tensorflow. if you train your model ad use the training functions, it will use GPU if you have a GPU available with all Cuda and dependency installed.
if you are still worried about the GPU issue, better use the google colab for running the training which can help you in many ways.

SavedModel - TFLite - SignatureDef - TensorInfo - Get intermediate Layer outputs

I would like to get intermediate layers output of a TFLite graph. Something in the lines of below.
Visualize TFLite graph and get intermediate values of a particular node?
The above solution works on frozen graphs only. Since SavedModel is the preferred way of serializing the model in TF 2.0, I would like to have a solution with a saved model. I tried to pass --output_arrays for "toco" with savedModelDir as input. This is not helping.
From the documentation, it looks like SignatureDefs in SavedModel is the option to achieve this. But, I could not get it working.
x = test_images[0:1]
output = model.predict(x, batch_size=1)
signature_def = signature_def_utils.build_signature_def(
inputs={name:"x:0", dtype: DT_FLOAT, tensor_shape: (1, 28,28, 1)})
outputs = {{name: "output:0", dtype: DT_FLOAT, tensor_shape: (1, 10)},
{name:"Dense_1:0", dtype: DT_FLOAT, tensor_shape: (1, 10)}}), './tf-saved-model-sigdefs', signature_def)
Can you share an example usage of SignatureDefs for this purpose?
BTW, I have been playing around with the below tutorial for this experiment.
Below is the best solution that I have so far.
from __future__ import absolute_import, division, print_function
#!pip install -q tensorflow==2.0.0-alpha0
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
metrics=['accuracy']), train_labels, epochs=5)
test_loss, test_acc = model.evaluate(test_images, test_labels)
#Saving in saved_model format
tf.keras.experimental.export_saved_model(model, './tf-saved-model')
#Saving in TFLIte FlatBUffer format
tflite_mnist_model = "mnist_model.tflite"
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open(tflite_mnist_model, "wb").write(tflite_model)
#cloning Keras model with debug outputs
outputs = [model.get_layer("dense").output, model.get_layer("dense_1").output]
model_debug = tf.keras.Model(inputs=model.inputs, outputs=outputs)
#Saving in TFLIte FlatBUffer format with debug information
tflite_mnist_model = "mnist_model_debug.tflite"
converter = tf.lite.TFLiteConverter.from_keras_model(model_debug)
tflite_model = converter.convert()
open(tflite_mnist_model, "wb").write(tflite_model)
tf.lite.Interpreter has a new parameter as of tf 2.5: experimental_preserve_all_tensors, when set to True it allows you to query the output from any node. Here is how I used it:
import numpy as np
import tensorflow as tf
import cv2
import argparse
parser = argparse.ArgumentParser(description='Use tensorflow framework to determine class')
parser.add_argument('--model-path', '-m', default='model.tflite', type=str, help='path to the model')
parser.add_argument('--nodes', '-n', default='concat,convert_scores', type=str, help='comma separated list of node names')
parser.add_argument('--image', '-i', default='test.jpg', type=str, help='image to process')
args = parser.parse_args()
# read in model
interpreter = tf.lite.Interpreter(
args.model_path, experimental_preserve_all_tensors=True
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# prepare image
img = cv2.imread(args.image)
inp = cv2.resize(img, tuple(input_details[0]["shape"][1:3]))
inp = inp[:, :, [2, 1, 0]] # BGR2RGB
inp = inp[np.newaxis, :, :, :].astype(input_details[0]["dtype"])
# invoke mode
interpreter.set_tensor(input_details[0]["index"], inp)
# write out layer output to files
for node in args.nodes.split(','):
for tensor_details in interpreter.get_tensor_details():
if tensor_details["name"] == node:
tensor = interpreter.get_tensor(tensor_details["index"]), tensor)
You can use netron to graphically view the network described by your tflite model file, the node names can be found by clicking on a node.

How ensure that Keras is using GPU with tensorflow backend?

I've created virtual notebook on Paperspace cloud infrastructure with Tensorflow GPU P5000 virtual instance on the backend.
When i am starting to train my network, it woks 2x SLOWER than on my MacBook Pro with pure CPU runtime engine.
How could i ensure that Keras NN is using GPU instead of CPU during training process?
Please find my code below:
from tensorflow.contrib.keras.api.keras.models import Sequential
from tensorflow.contrib.keras.api.keras.layers import Dense
from tensorflow.contrib.keras.api.keras.layers import Dropout
from tensorflow.contrib.keras.api.keras import utils as np_utils
import numpy as np
import pandas as pd
# Read data
pddata= pd.read_csv('data/data.csv', delimiter=';')
# Helper function (prepare & test data)
def split_to_train_test (data):
trainLenght = len(data) - len(data)//10
trainData = data.loc[:trainLenght].sample(frac=1).reset_index(drop=True)
testData = data.loc[trainLenght+1:].sample(frac=1).reset_index(drop=True)
trainLabels = trainData.loc[:,"Label"].as_matrix()
testLabels = testData.loc[:,"Label"].as_matrix()
trainData = trainData.loc[:,"Feature 0":].as_matrix()
testData = testData.loc[:,"Feature 0":].as_matrix()
return (trainData, testData, trainLabels, testLabels)
# prepare train & test data
(X_train, X_test, y_train, y_test) = split_to_train_test (pddata)
# Convert labels to one-hot notation
Y_train = np_utils.to_categorical(y_train, 3)
Y_test = np_utils.to_categorical(y_test, 3)
# Define model in Keras
def create_model(init):
model = Sequential()
model.add(Dense(101, input_shape=(101,), kernel_initializer=init, activation='tanh'))
model.add(Dense(101, kernel_initializer=init, activation='tanh'))
model.add(Dense(101, kernel_initializer=init, activation='tanh'))
model.add(Dense(101, kernel_initializer=init, activation='tanh'))
model.add(Dense(3, kernel_initializer=init, activation='softmax'))
return model
# Train the model
uniform_model = create_model("glorot_normal")
uniform_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']), Y_train, batch_size=1, epochs=300, verbose=1, validation_data=(X_test, Y_test))
You need to run your network with log_device_placement = True set in the TensorFlow session (the line before the last in the sample code below.) Interestingly enough, if you set that in a session, it will still apply when Keras does the fitting. So this code below (tested) does output the placement for each tensor. Please note, I've short-circuited the data reading because your data wan't available, so I'm just running the network with random data. The code this way is self-contained and runnable by anyone. Another note: if you run this from Jupyter Notebook, the output of the log_device_placement will go to the terminal where Jupyter Notebook was started, not the notebook cell's output.
from tensorflow.contrib.keras.api.keras.models import Sequential
from tensorflow.contrib.keras.api.keras.layers import Dense
from tensorflow.contrib.keras.api.keras.layers import Dropout
from tensorflow.contrib.keras.api.keras import utils as np_utils
import numpy as np
import pandas as pd
import tensorflow as tf
# Read data
#pddata=pd.read_csv('data/data.csv', delimiter=';')
pddata = "foobar"
# Helper function (prepare & test data)
def split_to_train_test (data):
return (
np.random.uniform( size = ( 100, 101 ) ),
np.random.uniform( size = ( 100, 101 ) ),
np.random.randint( 0, size = ( 100 ), high = 3 ),
np.random.randint( 0, size = ( 100 ), high = 3 )
trainLenght = len(data) - len(data)//10
trainData = data.loc[:trainLenght].sample(frac=1).reset_index(drop=True)
testData = data.loc[trainLenght+1:].sample(frac=1).reset_index(drop=True)
trainLabels = trainData.loc[:,"Label"].as_matrix()
testLabels = testData.loc[:,"Label"].as_matrix()
trainData = trainData.loc[:,"Feature 0":].as_matrix()
testData = testData.loc[:,"Feature 0":].as_matrix()
return (trainData, testData, trainLabels, testLabels)
# prepare train & test data
(X_train, X_test, y_train, y_test) = split_to_train_test (pddata)
# Convert labels to one-hot notation
Y_train = np_utils.to_categorical(y_train, 3)
Y_test = np_utils.to_categorical(y_test, 3)
# Define model in Keras
def create_model(init):
model = Sequential()
model.add(Dense(101, input_shape=(101,), kernel_initializer=init, activation='tanh'))
model.add(Dense(101, kernel_initializer=init, activation='tanh'))
model.add(Dense(101, kernel_initializer=init, activation='tanh'))
model.add(Dense(101, kernel_initializer=init, activation='tanh'))
model.add(Dense(3, kernel_initializer=init, activation='softmax'))
return model
# Train the model
uniform_model = create_model("glorot_normal")
uniform_model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
with tf.Session( config = tf.ConfigProto( log_device_placement = True ) ):, Y_train, batch_size=1, epochs=300, verbose=1, validation_data=(X_test, Y_test))
Terminal output (partial, it was way too long):
VarIsInitializedOp_13: (VarIsInitializedOp): /job:localhost/replica:0/task:0/device:GPU:0
2018-04-21 21:54:33.485870: I tensorflow/core/common_runtime/]
VarIsInitializedOp_13: (VarIsInitializedOp)/job:localhost/replica:0/task:0/device:GPU:0
training/SGD/mul_18/ReadVariableOp: (ReadVariableOp): /job:localhost/replica:0/task:0/device:GPU:0
2018-04-21 21:54:33.485895: I tensorflow/core/common_runtime/]
training/SGD/mul_18/ReadVariableOp: (ReadVariableOp)/job:localhost/replica:0/task:0/device:GPU:0
training/SGD/Variable_9/Read/ReadVariableOp: (ReadVariableOp): /job:localhost/replica:0/task:0/device:GPU:0
2018-04-21 21:54:33.485903: I tensorflow/core/common_runtime/]
training/SGD/Variable_9/Read/ReadVariableOp: (ReadVariableOp)/job:localhost/replica:0/task:0/device:GPU:0
Note the GPU:0 at the end of many lines.
Tensorflow manual's relevant page: Using GPU: Logging Device Placement.
Put this near the top of your jupyter notebook. Comment out what you don't need.
# confirm TensorFlow sees the GPU
from tensorflow.python.client import device_lib
assert 'GPU' in str(device_lib.list_local_devices())
# confirm Keras sees the GPU (for TensorFlow 1.X + Keras)
from keras import backend
assert len(backend.tensorflow_backend._get_available_gpus()) > 0
# confirm PyTorch sees the GPU
from torch import cuda
assert cuda.is_available()
assert cuda.device_count() > 0
NOTE: With the release of TensorFlow 2.0, Keras is now included as part of the TF API.
Originally answerwed here.
Considering keras is a built-in of tensorflow since version 2.0:
import tensorflow as tf
tf.test.is_gpu_available(cuda_only = True)
NOTE: the latter method may take several minutes to run.

Tensorboard event file size is growing after consecutive model training

I'm training 8 models in a for loop and saving each tensorboard log file into a seperate directory. Folder structure is like Graph is my main directory for graphs and directories under Graph such as net01 net02... net08 are the ones I'm outputting my event files. By doing this I can visualize training logs in Tensorboard in that fancy fashion with every single training process gets its own colour.
My problem is the growing sizes of eventfiles. The first event file is apporoximately 300KB's, but the second event file have a size of 600KB's, third is 900 KB and so on. They each reside in their own seperate directory and each of them are different training sessions from each other but somehow tensorboard appends the earlier sessions into last one. In the end I should've a total size of 12*300Kb= 3600 KB of session files, but I endup with something like 10800KB of session files. As the nets are getting deeper I endup with session file sizes of like 600 MB. So clearly I'm missing something out.
I tried to visualize last file with the biggest size to check whether it includes all the previous training sessions and can draw like 8 nets but it failed. SO a big bunch of irrelevant information is stored in this session file.
I'm using Anaconda3-Spyder on Win7-64. Database is divided into 8 and for each run I'm leaving one out for validation and using the rest as training. Here is a simplified version of my code:
from keras.models import Model
from keras.layers import Dense, Flatten, Input, Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard, ModelCheckpoint, CSVLogger
import os.path
import shutil
import numpy
# ------------------------------------------------------------------
img_width, img_height = 48, 48
folds_path= "8fold_folds"
nets_path = "8fold_nets_simplenet"
csv_logpath = 'simplenet_log.csv'
nets_string = "simplenet_nets0"
nb_epoch = 50
batch_size = 512
cvscores = []
def foldpath(foldnumber):
pathbase= os.path.join(folds_path,'F')
train_data_dir = os.path.join(pathbase+str(foldnumber),"train")
valid_data_dir = os.path.join(pathbase+str(foldnumber),"test")
return train_data_dir,valid_data_dir
for i in range(1, num_folds+1):
modelpath= os.path.join(nets_path,nets_string+str(i))
if os.path.exists(modelpath):
[train_data_dir, valid_data_dir]=foldpath(i)
img_input = Input(shape=(img_width,img_height,1),name='input')
x = Conv2D(32, (3,3), activation='relu', padding='same', name='conv1-'+str(i))(img_input)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1-'+str(i))(x)
x = Conv2D(64, (3,3), activation='relu', padding='same', name='conv2-'+str(i))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2-'+str(i))(x)
x = Conv2D(128, (3,3), activation='relu', padding='same', name='conv3-'+str(i))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3-'+str(i))(x)
x = Flatten()(x)
x = Dense(512, name='dense1-'+str(i))(x)
#x = Dropout(0.5)(x)
x = Dense(512, name='dense2-'+str(i))(x)
#x = Dropout(0.5)(x)
predictions = Dense(6, activation='softmax', name='predictions-'+str(i))(x)
model = Model(inputs=img_input, outputs=predictions)
# compile model-----------------------------------------------------------
model.compile(optimizer='Adam', loss='binary_crossentropy',
# ----------------------------------------------------------------
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(rescale=1./255,
valid_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
target_size=(img_width, img_height),
classes = ['1','3','4','5','6','7'],
validation_generator = valid_datagen.flow_from_directory(
target_size=(img_width, img_height),
classes = ['1','3','4','5','6','7'],
# --------------------callbacks---------------------------
csv_logger = CSVLogger(csv_logpath, append=True, separator=';')
graph_path = os.path.join('Graphs',modelpath)
tensorboard = TensorBoard(log_dir= graph_path, write_graph=True, write_images=False)
# ------------------
print("Starting to fit the model")
steps_per_epoch = train_generator.samples/batch_size,
validation_data = validation_generator,
validation_steps = validation_generator.samples/batch_size,
epochs = nb_epoch, verbose=1, callbacks=callbacks_list)
Not sure about this one but my guess would be that it has to do with your graphs being stored after each loop iteration. To check if your graphs are responsible for this, you could try write_graph = False, and see if you still have the same problem. To make sure the graph is reset, you could try to clear the tensorflow graph at the end of each iteration using this:
The problem is that with training of each model, the next model still contains all the graph elements of previous trainings. Thus before training each model, reset the Tensorflow graph and then continue with the training.