Making prediction in Keras using a custom generator - tensorflow

I have an image-classifier model in TensorFlow which I wanna make predictions with. I have created a custom generator to avoid loading it all in the RAM at the same time.
def load_and_preprocess_image(url_path_x):
with requests.Session() as s:
request_x=s.get(url_path_x).content
img = Image.open(BytesIO(request_x))
img = img.convert('RGB')
img = img.resize((224,224), Image.NEAREST)
img = tensorflow.keras.preprocessing.image.img_to_array(img)
return(img)
def prediction_generator(urls_x):
for url_x in urls_x:
try:
yield load_and_preprocess_image(path_x=url_x, is_url=True).reshape(1,224,224,3)
except:
yield load_and_preprocess_image(path_x=dummy_image_path, is_url=True).reshape(1,224,224,3)
my_path_gen = prediction_generator(df['url_path_column'])
preds_probas = model_i.predict(my_path_gen, batch_size=1, verbose=0, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False)
However, it seems that my code is consuming extensive RAM as if the code is loading all the images into the RAM at the same time. Is there anything wrong with my custom generator?

I'm definetly not the expert in this topic but shouldn't the generator work with __len__ and __getitem__ ?
From this link
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y

The generators are fine guys and they aren't using excessive RAM. The issue was somewhere else.
Anyways, I'm leaving the question here so that maybe the code be useful to someone.

Related

Training runs out of memory as RAM consumption keeps growing

I am not sure since when am having this issue and I have to believe that this happened at some point between today and a few months ago but it would seem that the RAM (CPU) consumption grows over time during epochs.
self.model.fit(
train_data,
initial_epoch=self.status.valid_last.epoch,
epochs=train_config.epochs,
steps_per_epoch=train_config.steps_per_epoch,
callbacks=self._get_experiment_callbacks(),
validation_data=valid_data,
validation_steps=train_config.validation_steps,
)
The only thing out of the ordinary here might be the callbacks I am passing but there's actually nothing special here. One is a TensorBoard (TB) callback and the other is a custom Metric which is not doing much except plotting the learning rate and other general metrics to TB.
def _get_experiment_callbacks(self) -> List[tf.keras.callbacks.Callback]:
tensorboard_cb = tf.keras.callbacks.TensorBoard(
log_dir=os.path.join(out_dir, "logs"),
update_freq="epoch",
profile_batch=profile_batch,
write_images=True,
)
# Not interested in whatever is plotted in those
tensorboard_cb.on_epoch_end = lambda *args: ...
tensorboard_cb.on_test_end = lambda *args: ...
return [
tensorboard_cb,
Metrics(tensorboard_cb, update_freq=100),
]
This leaves us with the last suspect which is the valid_data itself. This is essentially just a list of protobuf files (shards) which I am loading like so:
def load_shards(
decode_example_fn: Callable,
shard_fps: List[str],
training: bool,
buffer_size: int = None # 50 * 1000 ** 2,
) -> tf.data.Dataset:
if not len(shard_fps) > 0:
raise ValueError("Argument shard_fps must be a list to shards but is empty.")
def make_dense_(example):
for k, v in example.items():
if isinstance(v, tf.SparseTensor):
example[k] = tf.sparse.to_dense(v)
return example
def load_records_(filenames):
record_dataset = tf.data.TFRecordDataset(filenames, buffer_size=buffer_size)
record_dataset = record_dataset.map(decode_example_fn)
record_dataset = record_dataset.map(make_dense_)
return record_dataset
if not training:
shard_fps = sorted(shard_fps)
dataset = tf.data.Dataset.from_tensor_slices(tf.constant(shard_fps))
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
dataset = dataset.with_options(options)
if training:
dataset = dataset.interleave(load_records_, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False)
else:
dataset = dataset.apply(load_records_)
dataset = dataset.prefetch(tf.data.AUTOTUNE)
return dataset
and from then on there's just preprocessing and transformation mappings on the inputs. So.. I would not expect any memory leak at this point
Still, I am observing a continuous increase of memory consumption over time. The screenshot below shows the consumption after a restart.
At first we use ~28GB of RAM. After 100 steps there's a sharp increase, to ~33GB and from there it kind of seems to stabilize at around 38GB. The next big jump at 216k steps is coming from an evaluation. From there it's just constantly growing ..
From the looks it appears as if the memory usage stabilized and the jump only occurs after each epoch (1 epoch = 6000 steps).
There could be any number of things that could be wrong. TensorBoard could possibly not be reusing the same graph, but instead is adding graphs, which leads to OOM. I don't use TensorBoard myself because I remember this as happening to me a few years back. It's also possible that using model.fit is the problem and that you're loading your data at every epoch. You could try writing the training loop something like:
for epoch in tf.range(epochs):
batch_train_loss = []
batch_train_acc = []
for batch, (X, Y) in train_dataset.enumerate():
train_loss = train_fn(X, Y, model, loss, optimizer, metric, batch) # do the actual training
train_acc = metric.result().numpy() # get the training accuracy
batch_train_loss.append(train_loss) # save the training loss above
batch_train_acc.append(train_acc) # save the training accuracy above
metric.reset_states() # reset the metric after every batch
where the train_fn is:
def get_apply_train_fn():
#tf.function
def train_function(X, Y, model, loss, optimizer, metric, step):
with tf.GradientTape() as tape:
predictions = model(X, training=True)
loss_value = loss(Y, predictions)
gradients = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_acc = metric.update_state(Y, predictions)
return loss_value
return train_function
train_fn = get_apply_train_fn()
Now, this is a stupidly complicated way of writing model.fit, but it does work.
Another way in which I've had to combat OOM on GPU side is use Python's multiprocessing, but this was in a context where I was doing 10-fold cross-validation and the training would crash after 7 or 8 folds with OOM.
Alternatively, you could try turning eager execution on or off with
tf.config.run_functions_eagerly(False) # or True

Training seq2seq model on Google Colab TPU with big dataset - Keras

I'm trying to train a sequence to sequence model for machine translation using Keras on Google Colab TPU.
I have a dataset which I can load in memory but I have to preprocess to it to feed it to the model. In particular I need to convert the target words to one hot vectors and with many examples I can't load the entire conversion in memory, so I need to make batches of data.
I'm using this function as a batch generator:
def generate_batch_bert(X_ids, X_masks, y, batch_size = 1024):
''' Generate a batch of data '''
while True:
for j in range(0, len(X_ids), batch_size):
# batch of encoder and decoder data
encoder_input_data_ids = X_ids[j:j+batch_size]
encoder_input_data_masks = X_masks[j:j+batch_size]
y_decoder = y[j:j+batch_size]
# decoder target and input for teacher forcing
decoder_input_data = y_decoder[:,:-1]
decoder_target_seq = y_decoder[:,1:]
# batch of decoder target data
decoder_target_data = to_categorical(decoder_target_seq, vocab_size_fr)
# keep only with the right amount of instances for training on TPU
if encoder_input_data_ids.shape[0] == batch_size:
yield([encoder_input_data_ids, encoder_input_data_masks, decoder_input_data], decoder_target_data)
The problem is that whenever I try to run the fit function as follows:
model.fit(x=generate_batch_bert(X_train_ids, X_train_masks, y_train, batch_size = batch_size),
steps_per_epoch = train_samples//batch_size,
epochs=epochs,
callbacks = callbacks,
validation_data = generate_batch_bert(X_val_ids, X_val_masks, y_val, batch_size = batch_size),
validation_steps = val_samples//batch_size)
I get the following error:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_util.py:445 make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
Not sure what's wrong and how I can solve this problem.
EDIT
I tried loading less amount of data in memory so that the conversion to one hot encoding of the target words doesn't crash the kernel and it actually works. So there is obviously something wrong on how I generate batches.
It's hard to tell what's wrong since you don't provide your model
definition nor any sample data. However, I'm fairly certain that you're
running into the same
TensorFlow bug
that I recently got bitten by.
The workaround is to use the tensorflow.data API which works much
better with TPUs. Like this:
from tensorflow.data import Dataset
import tensorflow as tf
def map_fn(X_id, X_mask, y):
decoder_target_data = tf.one_hot(y[1:], vocab_size_fr)
return (X_id, X_mask, y[:-1]), decoder_target_data
...
X_ids = Dataset.from_tensor_slices(X_ids)
X_masks = Dataset.from_tensor_slices(X_masks)
y = Dataset.from_tensor_slices(y)
ds = Dataset.zip((X_ids, X_masks, y)).map(map_fn).batch(1024)
model.fit(x = ds, ...)

keras generator with image and scalar

I am trying to train some layers of a network whose inputs are an image and a scalar. Please see the figure below for a better understanding. .
As you can see only the dark yellow layers will be trained. So I need to freeze the rest, that is for later.
Purpose of this architecture is to map images (chest x-rays) to 14 kinds of diseases.
The images are stored in the following directory: /home/akde/chexnet/CheXNet-Keras/data/images
Names of the images are the image IDs.
A dataframe maps images (Images are named as the Image ID) to classes (diseases)
As you can see an image can be mapped to more than one class (disease).
Another dataframe maps the images (Image IDs) to the patient age. You can see it below.
Image is the first input and patient age is the second.
So in short, for each image id, I have an image and age value which are in 2 separate dataframes.
I can already test (gives absurd results since the network is not trained, but still proves that the network accepts the input and gives some result) it using the following code.
res3 = model3.predict( [test_image, a] )
where a is the scalar input while the test_image is the image input.
My training data is stored in multiple dataframes, having read that post, I deduce that flow_from_dataframe should be used.
The first thing I have done was to see this post which explains how to use mixed inputs. That gave me some background but since it does not use fit_generator (instead uses fit) it did not solve my problem.
Then I have read this post which does not use multiple inputs. Again no clue.
Afterwards, I have seen this post, which takes 2 images as input ( not one image one scalar). So again no help.
Even though I haven't found a solution to my problem I have written the following piece of code which will be the skeleton the solution.
datagen=ImageDataGenerator(rescale=1./255., validation_split=0.25)
train_generator = datagen.flow_from_dataframe(traindf,
directory="/home/akde/chexnet/CheXNet-Keras/data/images",
class_mode="other",
x_col="Image Index",
y_col=["Atelectasis", "Cardiomegaly", "Effusion", "Infiltration", "Mass",
"Nodule", "Pneumonia", "Pneumothorax", "Consolidation", "Edema",
"Emphysema", "Fibrosis", "Pleural_Thickening", "Hernia"],
color_mode="rgb",
batch_size=32,
target_size=(224, 224)
)
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
model3.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])
model3.fit_generator(generator=train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
epochs=10
)
I know this piece of code is far from the solution.
So how can I create a generator that uses 2 dataframes which are explained earlier (the one that maps images to the diseases and the other one which maps image IDs to age).
In other words, what is the way of writing a generator that takes an image and a scalar value as an input, considering the fact that both are represented in dataframes. How can I write the generator that written in bold below.
model3.fit_generator(**generator=train_generator**,
steps_per_epoch=STEP_SIZE_TRAIN,
epochs=10
)
For your purpose you need to create a custom generator.
I will recommand you to take a deep look at this link :
https://blog.ml6.eu/training-and-serving-ml-models-with-tf-keras-3d29b41e066c
And especially this code :
import ast
import numpy as np
import math
import os
import random
from tensorflow.keras.preprocessing.image import img_to_array as img_to_array
from tensorflow.keras.preprocessing.image import load_img as load_img
def load_image(image_path, size):
# data augmentation logic such as random rotations can be added here
return img_to_array(load_img(image_path, target_size=(size, size))) / 255.
class KagglePlanetSequence(tf.keras.utils.Sequence):
"""
Custom Sequence object to train a model on out-of-memory datasets.
"""
def __init__(self, df_path, data_path, im_size, batch_size, mode='train'):
"""
df_path: path to a .csv file that contains columns with image names and labels
data_path: path that contains the training images
im_size: image size
mode: when in training mode, data will be shuffled between epochs
"""
self.df = pd.read_csv(df_path)
self.im_size = im_size
self.batch_size = batch_size
self.mode = mode
# Take labels and a list of image locations in memory
self.wlabels = self.df['weather_labels'].apply(lambda x: ast.literal_eval(x)).tolist()
self.glabels = self.df['ground_labels'].apply(lambda x: ast.literal_eval(x)).tolist()
self.image_list = self.df['image_name'].apply(lambda x: os.path.join(data_path, x + '.jpg')).tolist()
def __len__(self):
return int(math.ceil(len(self.df) / float(self.batch_size)))
def on_epoch_end(self):
# Shuffles indexes after each epoch
self.indexes = range(len(self.image_list))
if self.mode == 'train':
self.indexes = random.sample(self.indexes, k=len(self.indexes))
def get_batch_labels(self, idx):
# Fetch a batch of labels
return [self.wlabels[idx * self.batch_size: (idx + 1) * self.batch_size],
self.glabels[idx * self.batch_size: (idx + 1) * self.batch_size]]
def get_batch_features(self, idx):
# Fetch a batch of images
batch_images = self.image_list[idx * self.batch_size: (1 + idx) * self.batch_size]
return np.array([load_image(im, self.im_size) for im in batch_images])
def __getitem__(self, idx):
batch_x = self.get_batch_features(idx)
batch_y = self.get_batch_labels(idx)
return batch_x, batch_y
Hope this will help to find your solution !

Save large amount of numpy arrays in single file and use it to fit keras model

I have huge amount of numpy arrays that do not fit in RAM. Lets say millions of:
np.arange(10)
I want to save them on the file system in a single file, chunk by chunk.
I want to read them from the file and feed them to my keras model using model.fit_generator
I read about dask which works with large data that does not fit in memory but could not manage to achieve my goals.
Write your files to Disk with pickle:
pickle.dump((x, y), open(file, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
Then create a list of test and train files and create a generator:
def raw_generator(files):
while 1:
for file_num, file in enumerate(files):
try:
x, y = pickle.load(open(file, 'rb'))
batches = int(np.ceil(len(y) / batch_size))
for i in range(0, batches):
end = min(len(x), i * batch_size + batch_size)
yield x[i * batch_size:end], y[i * batch_size:end]
except EOFError:
print("error" + file)
train_gen = preprocessing.generator(training_files)
test_gen = preprocessing.generator(test_files)
Finally call fit_generator:
history = model.fit_generator(
generator=train_gen,
steps_per_epoch= (len(training_files)*data_per_file)/batch_size,
epochs=epochs
validation_data=test_gen,
validation_steps=(len(test_files)*data_per_file)/batch_size,
use_multiprocessing=False,
max_queue_size=10,
workers=1,
verbose=1)

How can I read endlessly from a Tensorflow tf.data.Dataset?

I'm switching my old datalayer (using Queues) to the "new" and recommended Dataset API. I'm using it for the first time, so I'm providing code examples in case I got something fundamentally wrong.
I create my Dataset from a generator (that will read a file, and provide n samples). It's a small dataset and n_iterations >> n_samples, so I simply want to read this dataset over and over again, ideally shuffled.
sample_set = tf.data.Dataset.from_generator( data_generator(filename),
(tf.uint8, tf.uint8), (tf.TensorShape([256,256,4]), tf.TensorShape([256,256,1]))
)
with datagenerator:
class data_generator:
def __init__(self, filename):
self.filename= filename
def __call__(self):
with filename.open() as f:
for idx in f: yield img[idx], label[idx]
To actually use the data, I got that I need to define an Iterator
sample = sample_set.make_one_shot_iterator().get_next()
and then we are set to read data
while True:
try: my_sample = sess.run(sample)
except tf.errors.OutOfRangeError: break # this happens after dset is read once
But all available Iterators seem to be "finite", in the way that they read a dataset only once.
Is there a simple way to make reading from the Dataset endless?
Datasets have repeat and shuffle methods.
BUF_SIZE = 100 # choose it depending on your data
sample_set = tf.data.Dataset.from_generator( data_generator(filename),
(tf.uint8, tf.uint8), (tf.TensorShape([256,256,4]),
tf.TensorShape([256,256,1]))
).repeat().shuffle(BUF_SIZE)
The Dataset.repeat() transformation will repeat a dataset endlessly if you don't pass an explicit count to it:
sample_set = tf.data.Dataset.from_generator(
data_generator(filename), (tf.uint8, tf.uint8),
(tf.TensorShape([256,256,4]), tf.TensorShape([256,256,1])))
# Repeats `sample_set` endlessly.
sample_set = sample_set.repeat()
sample = sample_set.make_one_shot_iterator().get_next()
The reinitializable Iterator will work with reinitializing on the same dataset, so this code will read the same dataset over and over again:
sample = tf.data.Iterator.from_structure(sample_set.output_types,
sample_set.output_shapes).get_next()
sample_it.make_initializer(sample_set) # create initialize op
with tf.Session(config=config) as sess:
sess.run(sample_set_init_op) # initialize in the beginning
while True:
try:
my_sample = sess.run(sample)
except tf.errors.OutOfRangeError:
sess.run(sample_set_init_op) # re-initialize on same dataset