Tensorflow operation to combine Iterator string handles within a single session feed_dict - tensorflow

I would like to generate minibatches with varying combinations of multiple datasets in a manner that uses the data api and does not cause tensor leakage (i.e., increasing the number of graph ops over time). For example, minibatch 1 might be a1, a2, b1, b2 followed by minibatch 2 with a3, a4, c1, c2.
Is it possible to run a single session on multiple initialized dataset iterators via the "string handle feed_dict method" (see feedable at TF ). Is there an op to combine two Iterator.string_handle objects? I have a minimal working example below that shows my issue at the end after the sys.exit.
import tensorflow as tf # v.1.4
import sys
# Predetermine minibatch size.
num_per_class = 6
# Create example datasets.
ds0 = tf.data.Dataset.range(0, 100, 2)
ds1 = tf.data.Dataset.range(1, 101, 2)
# Minibatchify. Note: could use adjustable tensor for minibatch size.
ds0 = ds0.apply(tf.contrib.data.batch_and_drop_remainder(num_per_class))
ds1 = ds1.apply(tf.contrib.data.batch_and_drop_remainder(num_per_class))
# Run forever.
ds0 = ds0.repeat()
ds1 = ds1.repeat()
# Dataset iterators.
ds0_itr = ds0.make_initializable_iterator()
ds1_itr = ds1.make_initializable_iterator()
# Switcher handle placeholder, iterator and ultimate minibatch datums.
switcher_h = tf.placeholder(tf.string, shape=[])
switcher_h_itr = tf.data.Iterator.from_string_handle(switcher_h,
ds0.output_types,
ds0.output_shapes)
mb_datums = switcher_h_itr.get_next()
# Start session.
sess = tf.Session()
# Dataset iterator handles.
ds0_h = sess.run(ds0_itr.string_handle())
ds1_h = sess.run(ds1_itr.string_handle())
# *Separate* dataset feed_dicts.
ds0_fd = {switcher_h: ds0_h}
ds1_fd = {switcher_h: ds1_h}
# Initialize dataset iterators.
sess.run([ds0_itr.initializer, ds1_itr.initializer])
# Print some datums from either (XOR) dataset.
print('ds0 data: {}'.format(sess.run(mb_datums, ds0_fd)))
print('ds1 data: {}'.format(sess.run(mb_datums, ds1_fd)))
# DESIRE A MINIBATCH OF SIZE 12: 6 FROM EACH.
sys.exit()
ds01_fd = {switcher_h: OP_TO_COMBINE_STRING_HANDLES(ds0_h, ds1_h)}
print('ds0+ds1: {}'.format(sess.run(mb_datums, ds01_fd)))

I know it's old, but for others who get to this question as I did and don't want to figure it out themselves: here's a minimal example that uses one dataset to dynamically select or "get_next()" from one of two other datasets:
import numpy as np
import tensorflow as tf
x = np.full(100, 1)
y = np.full(100, 2)
x_i = tf.data.Dataset.from_tensor_slices(x).make_one_shot_iterator()
y_i = tf.data.Dataset.from_tensor_slices(y).make_one_shot_iterator()
with tf.Session() as sesh:
[x_h, y_h] = sesh.run([x_i.string_handle(), y_i.string_handle()])
z_d = tf.data.Dataset.from_tensor_slices(np.random.sample(100))
z_d = z_d.map(lambda x: tf.gather([x_h, y_h], tf.cast(tf.round(x), tf.int32)))
z_i = z_d.make_one_shot_iterator()
picker_i = tf.data.Iterator.from_string_handle(z_i.get_next(), tf.int64).get_next()
for i in range(100):
print(sesh.run([picker_i]))

Related

How do I use all cores of my CPU in reinforcement learning with TF Agents?

I work with an RL algorithm. I'm using tensorflow and tf-agents and training a DQN. My problem is that only one core of the CPU is used when calculating the 10 episodes in the environment for data collection.
My training function looks like this:
def train_step(self, n_steps):
env_steps = tf_metrics.EnvironmentSteps()
#num_episodes = tf_metrics.NumberOfEpisodes()
rew = TFSumOfRewards()
action_hist = tf_metrics.ChosenActionHistogram(
name='ChosenActionHistogram', dtype=tf.int32, buffer_size=1000
)
#add reply buffer and metrict to the observer
replay_observer = [self.replay_buffer.add_batch]
train_metrics = [env_steps, rew]
self.replay_buffer.clear()
driver = dynamic_episode_driver.DynamicEpisodeDriver(
self.train_env, self.collect_policy, observers=replay_observer + train_metrics, num_episodes=self.collect_episodes)
final_time_step, policy_state = driver.run()
print('Number of Steps: ', env_steps.result().numpy())
for train_metric in train_metrics:
train_metric.tf_summaries(train_step=self.global_step, step_metrics=train_metrics)
# Convert the replay buffer to a tf.data.Dataset
# Dataset generates trajectories with shape [Bx2x...]
AUTOTUNE = tf.data.experimental.AUTOTUNE
dataset = self.replay_buffer.as_dataset(
num_parallel_calls=AUTOTUNE,
sample_batch_size=self.batch_size,
num_steps=(self.train_sequence_length + 1)).prefetch(AUTOTUNE)
iterator = iter(dataset)
train_loss = None
for _ in range(n_steps):
# Sample a batch of data from the buffer and update the agent's network.
experience, unused_info = next(iterator)
train_loss = self.agent.train(experience)
def train_agent(self, n_epoch):
for i in range(n_epoch):
self.train_step(int(self.replay_buffer.num_frames().numpy()/self.batch_size))
if(self.IsAutoStoreCheckpoint == True):
self.store_check_point()
pass
As already written above, num_episodes = 10. So it would make sense to calculate 10 episodes in parallel before the network is trained.
If I set the value num_parallel_calls to e.g. 10 nothing changes. What do I have to do to use all cores of my CPU (Ryzen 9 5950x with 16 cores)?
Thanks!
masterkey

How to avoid memory leakage in an autoregressive model within tensorflow

Recently, I am training a LSTM with attention mechanism for regressionin tensorflow 2.9 and I met an problem during training with model.fit():
At the beginning, the training time is okay, like 7s/step. However, it was increasing during the process and after several steps, like 1000, the value might be 50s/step. Here below is a part of the code for my model:
class AttentionModel(tf.keras.Model):
def __init__(self, encoder_output_dim, dec_units, dense_dim, batch):
super().__init__()
self.dense_dim = dense_dim
self.batch = batch
encoder = Encoder(encoder_output_dim)
decoder = Decoder(dec_units,dense_dim)
self.encoder = encoder
self.decoder = decoder
def call(self, inputs):
# Creat a tensor to record the result
tempt = list()
encoder_output, encoder_state = self.encoder(inputs)
new_features = np.zeros((self.batch, 1, 1))
dec_initial_state = encoder_state
for i in range(6):
dec_inputs = DecoderInput(new_features=new_features, enc_output=encoder_output)
dec_result, dec_state = self.decoder(dec_inputs, dec_initial_state)
tempt.append(dec_result.logits)
new_features = dec_result.logits
dec_initial_state = dec_state
result=tf.concat(tempt,1)
return result
In the official documents for tf.function, I notice: "Don't rely on Python side effects like object mutation or list appends".
Since I use a dynamic python list with append() to record the intermediate variables, I guess each time during training, a new tf.graph was added. Is the reason my training is getting slower and slower?
Additionally, what should I use instead of python list to avoid this? I have tried with a numpy.zeros matrix but it will lead to another problem:
tempt = np.zeros(shape=(1,6))
...
for i in range(6):
dec_inputs = DecoderInput(new_features=new_features, enc_output=encoder_output)
dec_result, dec_state = self.decoder(dec_inputs, dec_initial_state)
tempt[i]=(dec_result.logits)
...
Cannot convert a symbolic tf.Tensor (decoder/dense_3/BiasAdd:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported.

How to perform custom operations in between keras layers?

I have one input and one output neural network and in between I need to perform small operation. I have two inputs (from the same distribution of either mean 0 or mean 1) which I need to fed to the neural network one at a time and compare the output of each input. After the comparison, I am finally generating the prediction of the model. The implementation is as follows:
from tensorflow import keras
import tensorflow as tf
import numpy as np
#define network
x1 = keras.Input(shape=(1), name="x1")
x2 = keras.Input(shape=(1), name="x2")
model = keras.layers.Dense(20)
model1 = keras.layers.Dense(1)
x11 = model1(model(x1))
x22 = model1(model(x2))
After this I need to perform following operations:
if x11>=x22:
Vm=x1
else:
Vm=x2
Finally I need to do:
out = Vm - 0.5
out= keras.activations.sigmoid(out)
model = keras.Model([x1,x2], out)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss=tf.keras.losses.binary_crossentropy,
metrics=['accuracy']
)
model.summary()
tf.keras.utils.plot_model(model) #visualize model
I have normally distributed pair of data with same mean (mean 0 and mean 1 as generated below:
#Generating training dataset
from scipy.stats import skewnorm
n=1000 #sample each
s = 1 # scale to change o/p range
X1_0 = skewnorm.rvs(a = 0 ,loc=0, size=n)*s; X1_1 = skewnorm.rvs(a = 0 ,loc=1, size=n)*s #Skewnorm function
X2_0 = skewnorm.rvs(a = 0 ,loc=0, size=n)*s; X2_1 = skewnorm.rvs(a = 0 ,loc=1, size=n)*s #Skewnorm function
X1_train = list(X1_0) + list(X1_1) #append both data
X2_train = list(X2_0) + list(X2_1) #append both data
y_train = [x for x in (0,1) for i in range(0, n)] #make Y for above conditions
#reshape to proper format
X1_train = np.array(X1_train).reshape(-1,1)
X2_train = np.array(X2_train).reshape(-1,1)
y_train = np.array(y_train)
#train model
model.fit([X1_train, X2_train], y_train, epochs=10)
I am not been able to run the program if I include operation
if x11>=x22:
Vm=x1
else:
Vm=x2
in between layers. If I directly work with maximum of outputs as:
Vm = keras.layers.Maximum()([x11,x22])
The program is working fine. But I need to select either x1 or x2 based on the value of x11 and x22.
The problem might be due to the inclusion of the comparison operation while defining structure of the model where there is no value for x11 and x22 (I guess). I am totally new to all these stuffs and so I could not resolve this. I would greatly appreciate any help/suggestions. Thank you.
You can add this functionality via a Lambda layer.
Vm = tf.keras.layers.Lambda(lambda x: tf.where(x[0]>=x[1], x[2], x[3]))([x11, x22, x1, x2])

Sampling for large class and augmentation for small classes in each batch

Let's say we have 2 classes one is small and the second is large.
I would like to use for data augmentation similar to ImageDataGenerator
for the small class, and sampling from each batch, in such a way, that, that each batch would be balanced. (Fro minor class- augmentation for major class- sampling).
Also, I would like to continue using image_dataset_from_directory (since the dataset doesn't fit into RAM).
What about
sample_from_datasets
function?
import tensorflow as tf
from tensorflow.python.data.experimental import sample_from_datasets
def augment(val):
# Example of augmentation function
return val - tf.random.uniform(shape=tf.shape(val), maxval=0.1)
big_dataset_size = 1000
small_dataset_size = 10
# Init some datasets
dataset_class_large_positive = tf.data.Dataset.from_tensor_slices(tf.range(100, 100 + big_dataset_size, dtype=tf.float32))
dataset_class_small_negative = tf.data.Dataset.from_tensor_slices(-tf.range(1, 1 + small_dataset_size, dtype=tf.float32))
# Upsample and augment small dataset
dataset_class_small_negative = dataset_class_small_negative \
.repeat(big_dataset_size // small_dataset_size) \
.map(augment)
dataset = sample_from_datasets(
datasets=[dataset_class_large_positive, dataset_class_small_negative],
weights=[0.5, 0.5]
)
dataset = dataset.shuffle(100)
dataset = dataset.batch(6)
iterator = dataset.as_numpy_iterator()
for i in range(5):
print(next(iterator))
# [109. -10.044552 136. 140. -1.0505208 -5.0829906]
# [122. 108. 141. -4.0211563 126. 116. ]
# [ -4.085523 111. -7.0003924 -7.027302 -8.0362625 -4.0226436]
# [ -9.039093 118. -1.0695585 110. 128. -5.0553837]
# [100. -2.004463 -9.032592 -8.041705 127. 149. ]
Set up the desired balance between the classes in the weights parameter of sample_from_datasets.
As it was noticed by
Yaoshiang,
the last batches are imbalanced and the datasets length are different. This can be avoided by
# Repeat infinitely both datasets and augment the small one
dataset_class_large_positive = dataset_class_large_positive.repeat()
dataset_class_small_negative = dataset_class_small_negative.repeat().map(augment)
instead of
# Upsample and augment small dataset
dataset_class_small_negative = dataset_class_small_negative \
.repeat(big_dataset_size // small_dataset_size) \
.map(augment)
This case, however, the dataset is infinite and the number of batches in epoch has to be further controlled.
You can use tf.data.Dataset.from_generator that allows more control on your data generation without loading all your data into RAM.
def generator():
i=0
while True :
if i%2 == 0:
elem = large_class_sample()
else :
elem =small_class_augmented()
yield elem
i=i+1
ds= tf.data.Dataset.from_generator(
generator,
output_signature=(
tf.TensorSpec(shape=yourElem_shape , dtype=yourElem_ype))
This generator will alterate samples between the two classes,and you can add more dataset operations(batch , shuffle..)
I didn't totally follow the problem. Would psuedo-code this work? Perhaps there are some operators on tf.data.Dataset that are sufficient to solve your problem.
ds = image_dataset_from_directory(...)
ds1=ds.filter(lambda image, label: label == MAJORITY)
ds2=ds.filter(lambda image, label: label != MAJORITY)
ds2 = ds2.map(lambda image, label: data_augment(image), label)
ds1.batch(int(10. / MAJORITY_RATIO))
ds2.batch(int(10. / MINORITY_RATIO))
ds3 = ds1.zip(ds2)
ds3 = ds3.map(lambda left, right: tf.concat(left, right, axis=0)
You can use the tf.data.Dataset.from_tensor_slices to load the images of two categories seperately and do data augmentation for the minority class. Now that you have two datasets combine them with tf.data.Dataset.sample_from_datasets.
# assume class1 is the minority class
files_class1 = glob('class1\\*.jpg')
files_class2 = glob('class2\\*.jpg')
def augment(filepath):
class_name = tf.strings.split(filepath, os.sep)[0]
image = tf.io.read_file(filepath)
image = tf.expand_dims(image, 0)
if tf.equal(class_name, 'class1'):
# do all the data augmentation
image_flip = tf.image.flip_left_right(image)
return [[image, class_name],[image_flip, class_name]]
# apply data augmentation for class1
train_class1 = tf.data.Dataset.from_tensor_slices(files_class1).\
map(augment,num_parallel_calls=tf.data.AUTOTUNE)
train_class2 = tf.data.Dataset.from_tensor_slices(files_class2)
dataset = tf.python.data.experimental.sample_from_datasets(
datasets=[train_class1,train_class2],
weights=[0.5, 0.5])
dataset = dataset.batch(BATCH_SIZE)

tensorflow error - you must feed a value for placeholder tensor 'in'

I'm trying to implement queues for my tensorflow prediction but get the following error -
you must feed a value for placeholder tensor 'in' with dtype float and shape [1024,1024,3]
The program works fine if I use the feed_dict, Trying to replace feed_dict with queues.
The program basically takes a list of positions and passes the image np array to the input tensor.
for each in positions:
y,x = each
images = img[y:y+1024,x:x+1024,:]
a = images.astype('float32')
q = tf.FIFOQueue(capacity=200,dtypes=dtypes)
enqueue_op = q.enqueue(a)
qr = tf.train.QueueRunner(q, [enqueue_op] * 1)
tf.train.add_queue_runner(qr)
data = q.dequeue()
graph=load_graph('/home/graph/frozen_graph.pb')
with tf.Session(graph=graph,config=tf.ConfigProto(log_device_placement=True)) as sess:
p_boxes = graph.get_tensor_by_name("cat:0")
p_confs = graph.get_tensor_by_name("sha:0")
y = [p_confs, p_boxes]
x = graph.get_tensor_by_name("in:0")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord,sess=sess)
confs, boxes = sess.run(y)
coord.request_stop()
coord.join(threads)
How can I make sure the input data that I populated to the queue is recognized while running the graph in the session.
In my original run I call the
confs, boxes = sess.run([p_confs, p_boxes], feed_dict=feed_dict_testing)
I'd suggest not using queues for this problem, and switching to the new tf.data API. In particular tf.data.Dataset.from_generator() makes it easier to feed in data from a Python function. You can rewrite your code to be much simpler, as follows:
def generator():
for y, x in positions:
images = img[y:y+1024,x:x+1024,:]
yield images.astype('float32')
dataset = tf.data.Dataset.from_generator(
generator, tf.float32, [1024, 1024, img.shape[3]])
# Add any extra transformations in here, like `dataset.batch()` or
# `dataset.repeat()`.
# ...
iterator = dataset.make_one_shot_iterator()
data = iterator.get_next()
Note that in your program, there's no connection between the data tensor and the graph you loaded in load_graph() (at least, assuming that load_graph() doesn't grab data from the global state!). You will probably need to use tf.import_graph_def() and the input_map argument to associate data with one of the tensors in your frozen graph (possibly "in:0"?) to complete the task.