Receiving the same (not random) augmentations of image dataset - tensorflow

dataset = tf.data.Dataset.range(1, 6)
def aug(y):
x = np.random.uniform(0,1)
if x > 0.5:
y = 100
return y
dataset = dataset.map(aug)
print(list(dataset))
Run this code, then all the elements in the dataset are as they were, or all equal to 100. How do I make it so each element is individually transformed?
My more specific question below is basically asking this
I create my segmentation training set by:
dataset = tf.data.Dataset.from_tensor_slices((image_paths, mask_paths))
I then apply my augmentation function to the dataset:
def augment(image_path, mask_path)):
//use tf.io.read_file and tf.io.decode_jpeg to convert paths to tensors
x = np.random.choice([0,1])
if x == 1:
image = tf.image.flip_up_down(image)
mask = tf.image.flip_up_down(mask)
return image, mask
training_dataset = dataset.map(augment)
BATCH_SIZE=2
training_dataset = training_dataset.shuffle(100, reshuffle_each_iteration=True)
training_dataset = training_dataset.batch(BATCH_SIZE)
training_dataset = training_dataset.repeat()
training_dataset = training_dataset.prefetch(-1)
However when I visualise my training dataset, all the images have same flip applied- the are all either flipped upside down or not flipped. Where as I'm expecting them to have different flips- some upside down and some not.
Why is this happening?

You need to use tensorflow operations (not numpy or normal python) because tf.data.Dataset.map() executes the mapped function as a graph. When converting a function to a graph, numpy and base python are converted to constants. The augmentation function is only running np.random.uniform(0,1) once and storing it as a constant.
Note that irrespective of the context in which map_func is defined (eager vs. graph), tf.data traces the function and executes it as a graph.
The source for the above is here.
One solution is to use tensorflow operations. I have included an example below. Note that the y value in the if has to be cast to the same dtype as the input.
dataset = tf.data.Dataset.range(1, 6)
def aug(y):
x = tf.random.uniform([], 0, 1)
if x > 0.5:
y = tf.cast(100, y.dtype)
return y
dataset = dataset.map(aug)
print(list(dataset))

You can use a uniform random function or other probability distribution
tf.random.uniform(
shape, minval=0, maxval=None, dtype=tf.dtypes.float32, seed=None, name=None
)
even you can use prebuild method in TensorFlow or Keras for fliping
tf.keras.layers.experimental.preprocessing.RandomFlip(
mode=HORIZONTAL_AND_VERTICAL, seed=None, name=None, **kwargs
)

Related

How to cut and paste a part of an image randomly to a different location using tensorflow?

I am trying to implement a custom layer in keras.layers where I want to do a custom image augmentation. My idea is to cut out a part of an image from a random location and paste it to a different random location in the same image. The code below I have written works well for PIL Image but when I integrate it into my final code (which is a tensorflow model), I get as error saying that tensor doesn't support item assignment.
Below is the class that I have implemented:
class Cut_Paste(layers.Layer):
def __init__(self, x_scale = 10, y_scale = 10, IMG_SIZE = (224,224), **kwargs):
super().__init__(**kwargs)
"""
defining the x span and the y span of the box to cutout
x_scale and y_scale are taken as inputs as % of the width and height of the image
size
"""
self.size_x, self.size_y = IMG_SIZE
self.span_x = int(x_scale*self.size_x*0.01)
self.span_y = int(y_scale*self.size_y*0.01)
#getting the vertices for cut and paste
def get_vertices(self):
#determining random points for cut and paste
""" since the images in the dataset have the object of interest in the center of
the Image, the cutout will be taken from the central 25% of the image"""
fraction = 0.25
vert_x = random.randint(int(self.size_x*0.5*(1-fraction)),
int(self.size_x*0.5*(1+fraction)))
vert_y = random.randint(int(self.size_y*0.5*(1-fraction)),
int(self.size_y*0.5*(1+fraction)))
start_x = int(vert_x-self.span_x/2)
start_y = int(vert_y-self.span_y/2)
end_x = int(vert_x+self.span_x/2)
end_y = int(vert_y+self.span_y/2)
return start_x, start_y, end_x, end_y
def call(self, image):
#getting random vertices for cutting
cut_start_x, cut_start_y, cut_end_x, cut_end_y = self.get_vertices()
#getting the image as a sub-image
#image = tf.Variable(image)
sub_image = image[cut_start_x:cut_end_x,cut_start_y:cut_end_y,:]
#getting random vertices for pasting
paste_start_x, paste_start_y, paste_end_x, paste_end_y = self.get_vertices()
#replacing a part of the image at random location with sub_image
image[paste_start_x:paste_end_x,
paste_start_y:paste_end_y,:] = sub_image
return image
I am calling it from my model class this way:
class Contrastive_learning_model(keras.Model):
def __init__(self):
super().__init__()
self.cut_paste = Cut_Paste(**cut_paste_augmentation)
def train_step(self, data):
augmented_images_2 = self.cut_paste.call(images)
I have removed the part of the code which is irrelevant. But upon executing this is the error I get:
TypeError: 'tensorflow.python.framework.ops.EagerTensor' object does not support item assignment
I understood from other sources that it is not possible to do item assignment in tensor. So here I am seeking help to do this in an easier way. I need to use tensors for this. Any help will be much appreciated.
Tensorflow does not support item assignment unlike PyTorch.
A workaround you can implement is to convert the tensor to tf.Variable and then a numpy array like the following:
image = tf.Variable(image).numpy()

openCv and PyTorch inverser Transform not working

I have a transforms class which only does:
if transform is None:
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor()
])
root = os.path.join(PROJECT_ROOT_DIR, "data")
super(AttributesDataset, self).__init__()
self.data = torchvision.datasets.CelebA(
root=root,
split=split,
target_type='attr',
download=True,
transform=transform
)
From the documentation, I understand that this implies just a scale-down of values in the range 0,1 ie all pixel values shall lie between [0,1] (I have verified this as well).
I want to visualize some of the outputs coming from the model. As such, I created a simple method which does:-
for img, label in dataloader:
img.squeeze_(0)
# permute the channels. cv2 expects image in format (h, w, c)
unscaled_img = img.permute(1, 2, 0)
# move images to cpu and convert to numpy as required by cv2 library
unscaled_img = torch.round(unscaled_img * 255)
unscaled_img = unscaled_img.to(torch.uint8)
# unscaled_img = np.rint(unscaled_img * 255).astype(np.uint8)
unscaled_img = cv2.cvtColor(unscaled_img, cv2.COLOR_RGB2BGR)
cv2.imshow(unscaled_img.numpy())
However, all the images that are created have an unusually blue shade. For instance,
Can someone please tell me what exactly am I doing wrong here? Your help would be highly appreciated
Solved by #LajosArpad comment. The culprit was
unscaled_img = cv2.cvtColor(unscaled_img, cv2.COLOR_RGB2BGR)
Removing it resulted in correct values.

Bounding hyperparameter optimization with Tensorflow bijector chain in GPflow 2.0

While doing GP regression in GPflow 2.0, I want to set hard bounds on lengthscale (i.e. limiting lengthscale optimization range). Following this thread (Setting hyperparameter optimization bounds in GPflow 2.0), I constructed a TensorFlow Bijector chain (see bounded_lengthscale function below). However, the bijector chain below does not prevent the model from optimizing outside the supposed bounds. What do I need to change to make the bounded_lengthscale function put hard bounds on optimization?
Below is the MRE:
import gpflow
import numpy as np
from gpflow.utilities import print_summary
import tensorflow as tf
from tensorflow_probability import bijectors as tfb
# Noisy training data
noise = 0.3
X = np.arange(-3, 4, 1).reshape(-1, 1).astype('float64')
Y = (np.sin(X) + noise * np.random.randn(*X.shape)).reshape(-1,1)
def bounded_lengthscale(low, high, lengthscale):
"""Returns lengthscale Parameter with optimization bounds."""
affine = tfb.AffineScalar(shift=low, scale=high-low)
sigmoid = tfb.Sigmoid()
logistic = tfb.Chain([affine, sigmoid])
parameter = gpflow.Parameter(lengthscale, transform=logistic, dtype=tf.float32)
parameter = tf.cast(parameter, dtype=tf.float64)
return parameter
# build GPR model
k = gpflow.kernels.Matern52()
m = gpflow.models.GPR(data=(X, Y), kernel=k)
m.kernel.lengthscale.assign(bounded_lengthscale(0, 1, 0.5))
print_summary(m)
# train model
#tf.function(autograph=False)
def objective_closure():
return - m.log_marginal_likelihood()
opt = gpflow.optimizers.Scipy()
opt_logs = opt.minimize(objective_closure,
m.trainable_variables)
print_summary(m)
Thanks!
tfb.Sigmoid now accepts low and high parameters, as #Brian Patton forecasted in a comment.
Therefore, the code can be simplified to :
from tensorflow_probability import bijectors as tfb
def bounded_lengthscale(low, high, lengthscale):
"""Make lengthscale tfp Parameter with optimization bounds."""
sigmoid = tfb.Sigmoid(low, high)
parameter = gpflow.Parameter(lengthscale, transform=sigmoid, dtype='float32')
return parameter
m.kernel.lengthscale = bounded_lengthscale(0, 1, 0.5)
In the MWE you assign a new value to a Parameter that is already existing (and does not have the logistic transform). This value is the constrained-space value that the Parameter constructed with logistic transform has, but the transform isn't carried over. Instead, you need to replace the Parameter without logistic transform with one with the transform you want: m.kernel.lengthscale = bounded_lengthscale(0,1,0.5).
Note that the object that you assign to the kernel.lengthscale attribute must be a Parameter instance; if you assign the return value of tf.cast(parameter) as in the MWE this is equivalent to a constant, and it won't actually be optimised!
Simply temoving the tf.cast in the MWE in this question won't immediately work due to float32/float64 mismatch. To fix it, the AffineScalar bijector needs to be in float64; it does not have a dtype argument, instead cast the arguments to shift= and scale= to the required type:
def bounded_lengthscale(low, high, lengthscale):
"""Make lengthscale tfp Parameter with optimization bounds."""
affine = tfb.AffineScalar(shift=tf.cast(low, tf.float64),
scale=tf.cast(high-low, tf.float64))
sigmoid = tfb.Sigmoid()
logistic = tfb.Chain([affine, sigmoid])
parameter = gpflow.Parameter(lengthscale, transform=logistic, dtype=tf.float64)
return parameter
m.kernel.lengthscale = bounded_lengthscale(0, 1, 0.5)
(GPflow should probably contain a helper function like this to make bounded parameter transforms easier to use - GPflow always appreciates people helping out, so if you want to turn this into a pull request, please do!)

How to load pickle files by tensorflow's tf.data API

I have my data in multiple pickle files stored on disk. I want to use tensorflow's tf.data.Dataset to load my data into training pipeline. My code goes:
def _parse_file(path):
image, label = *load pickle file*
return image, label
paths = glob.glob('*.pkl')
print(len(paths))
dataset = tf.data.Dataset.from_tensor_slices(paths)
dataset = dataset.map(_parse_file)
iterator = dataset.make_one_shot_iterator()
Problem is I don't know how to implement the _parse_file fuction. The argument to this function, path, is of tensor type. I tried
def _parse_file(path):
with tf.Session() as s:
p = s.run(path)
image, label = pickle.load(open(p, 'rb'))
return image, label
and got error message:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'arg0' with dtype string
[[Node: arg0 = Placeholder[dtype=DT_STRING, shape=<unknown>, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
After some search on the Internet I still have no idea how to do it. I will be grateful to anyone providing me a hint.
I have solved this myself. I should use tf.py_func as in this doc.
This is how I solved this issue. I didn't use the tf.py_func; check out function "load_encoding()" below, which is what's doing the pickle reading. The FACELIB_DIR contains directories of pickled vggface2 encodings, each directory named for the person of those face encodings.
import tensorflow as tf
import pickle
import os
FACELIB_DIR='/var/noggin/FaceEncodings'
# Get list of all classes & build a quick int-lookup dictionary
labelNames = sorted([x for x in os.listdir(FACELIB_DIR) if os.path.isdir(os.path.join(FACELIB_DIR,x)) and not x.startswith('.')])
labelStrToInt = dict([(x,i) for i,x in enumerate(labelNames)])
# Function load_encoding - Loads Encoding data from enc2048 file in filepath
# This reads an encoding from disk, and through the file path gets the label oneHot value, returns both
def load_encoding(file_path):
with open(os.path.join(FACELIB_DIR,file_path),'rb') as fin:
A,_ = pickle.loads(fin.read()) # encodings, source_image_name
label_str = tf.strings.split(file_path, os.path.sep)[-2]
return (A, labelStrToInt[label_str])
# Build the dataset of every enc2048 file in our data library
encpaths = []
for D in sorted([x for x in os.listdir(FACELIB_DIR) if os.path.isdir(os.path.join(FACELIB_DIR,x)) and not x.startswith('.')]):
# All the encoding files
encfiles = sorted(filter((lambda x: x.endswith('.enc2048')), os.listdir(os.path.join(FACELIB_DIR, D))))
encpaths += [os.path.join(D,x) for x in encfiles]
dataset = tf.data.Dataset.from_tensor_slices(encpaths)
# Shuffle and speed improvements on the dataset
BATCH_SIZE = 64
from tensorflow.data import AUTOTUNE
dataset = (dataset
.shuffle(1024)
.cache()
.repeat()
.batch(BATCH_SIZE)
.prefetch(AUTOTUNE)
)
# Benchmark our tf.data pipeline
import time
datasetGen = iter(dataset)
NUM_STEPS = 10000
start_time = time.time()
for i in range(0, NUM_STEPS):
X = next(datasetGen)
totalTime = time.time() - start_time
print('==> tf.data generated {} tensors in {:.2f} seconds'.format(BATCH_SIZE * NUM_STEPS, totalTime))
tf.py_func
This function is used to solved that problem and also as menstion in doc.

Color map an image with TensorFlow?

I'm saving grayscale images in TFRecord files. The idea then was to color map them on my GPU (only using TF of course) so they get three channels (They are going to be used on a pre-trained VGG-16 model so they have to have three channels).
Does anyone have any idea how to this properly?
I tried to do it with my homemade TF color mapping script, using for-loops, tf.scatter_nd and a mapping array with shape = (256,3)... but it took forever.
EDIT:
img_rgb = GRAY SCALE IMAGE WITH 3 CHANNELS
cmp = [[255,255,255],
[255,255,253],
[255,254,250],
[255,254,248],
[255,254,245],
...
[4,0,0],
[0,0,0]]
cmp = tf.convert_to_tensor(cmp, tf.int32) # (256, 3)
hot = tf.zeros([224,224,3], tf.int32)
for i in range(img_rgb.shape[2]):
for j in range(img_rgb.shape[1]):
for k in range(img_rgb.shape[0]):
indices = tf.constant([[k,j,i]])
updates = tf.Variable([cmp[img_rgb[k,j,i],i]])
shape = tf.constant([256, 3])
hot = tf.scatter_nd(indices, updates, shape)
This was my attempt, I know it's not optimal in any way, but It was the only solution I could come up with.
Thanks work by jimfleming, https://gist.github.com/jimfleming/c1adfdb0f526465c99409cc143dea97b
import matplotlib
import matplotlib.cm
import tensorflow as tf
def colorize(value, vmin=None, vmax=None, cmap=None):
"""
A utility function for TensorFlow that maps a grayscale image to a matplotlib
colormap for use with TensorBoard image summaries.
Arguments:
- value: 2D Tensor of shape [height, width] or 3D Tensor of shape
[height, width, 1].
- vmin: the minimum value of the range used for normalization.
(Default: value minimum)
- vmax: the maximum value of the range used for normalization.
(Default: value maximum)
- cmap: a valid cmap named for use with matplotlib's `get_cmap`.
(Default: 'gray')
Example usage:
```
output = tf.random_uniform(shape=[256, 256, 1])
output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='plasma')
tf.summary.image('output', output_color)
```
Returns a 3D tensor of shape [height, width, 3].
"""
# normalize
vmin = tf.reduce_min(value) if vmin is None else vmin
vmax = tf.reduce_max(value) if vmax is None else vmax
value = (value - vmin) / (vmax - vmin) # vmin..vmax
# squeeze last dim if it exists
value = tf.squeeze(value)
# quantize
indices = tf.to_int32(tf.round(value * 255))
# gather
cm = matplotlib.cm.get_cmap(cmap if cmap is not None else 'gray')
colors = tf.constant(cm.colors, dtype=tf.float32)
value = tf.gather(colors, indices)
return value
You could also try tf.image.grayscale_to_rgb, although there seems to be only one choice of color map, gray.
We're here to help. If everyone wrote optimal code, there would be no need for Stackoverflow. :)
Here's how I would do it in place of the last 7 lines (untested code):
conv_img = tf.gather( params = cmp,
indices = img_rgb[ :, :, 0 ] )
Basically, no need for the for loops, Tensorflow will do that for you, and much quicker. tf.gather() will collect elements from cmp according to the indices provided, which here would be the 0th channel of img_rgb. Each collected element will have the three channels from cmp so when you put them all together, it will form an image.
I don't have time to test right now, gotta run, sorry. Hope it works.