Is the input of `tf.image.resize_images` must have static shape? - tensorflow

I run the code below, it raises an ValueError: 'images' contains no shape. Therefore I have to add the line behind # to set the static shape, but img_raw may have different shapes and this line makes the tf.image.resize_images out of effect.
I just want to turn images with different shapes to [227,227,3]. How should I do that?
def tf_read(file_queue):
reader = tf.WholeFileReader()
file_name, content = reader.read(file_queue)
img_raw = tf.image.decode_image(content,3)
# img_raw.set_shape([227,227,3])
img_resized = tf.image.resize_images(img_raw,[227,227])
img_shape = tf.shape(img_resized)
return file_name, img_resized,img_shape

The issue here actually comes from the fact that tf.image.decode_image doesn't return the shape of the image. This was explained in these two GitHub issues: issue1, issue2.
The problem comes from the fact that tf.image.decode_image also handles .gif, which returns a 4D tensor, whereas .jpg and .png return 3D images. Therefore, the correct shape cannot be returned.
The solution is to simply use tf.image.decode_jpeg or tf.image.decode_png (both work the same and can be used on .png and .jpg images).
def _decode_image(filename):
image_string = tf.read_file(filename)
image_decoded = tf.image.decode_jpeg(image_string, channels=3)
image = tf.cast(image_decoded, tf.float32)
image_resized = tf.image.resize_images(image, [224, 224])
return image_resized

No, tf.image.resize_images can handle dynamic shape
file_queue = tf.train.string_input_producer(['./dog1.jpg'])
# shape of dog1.jpg is (720, 720)
reader = tf.WholeFileReader()
file_name, content = reader.read(file_queue)
img_raw = tf.image.decode_jpeg(content, 3) # size (?, ?, 3) <= dynamic h and w
# img_raw.set_shape([227,227,3])
img_resized = tf.image.resize_images(img_raw, [227, 227])
img_shape = tf.shape(img_resized)
with tf.Session() as sess:
print img_shape.eval() #[227, 227, 3]
BTW, I am using tf v0.12, and there is no function called tf.image.decode_image, but I don't think it is important

Of course you can use tensor object as size input for tf.image.resize_images.
So, by saying "turn images with different shapes to [227,227,3]", I suppose you don't want to lose their aspect ratio, right? To achieve this, you have to rescale the input image first, then pad the rest with zero.
It should be noted, though, you should consider perform image distortion and standardization before padding it.
# Rescale so that one side of image can fit one side of the box size, then padding the rest with zeros.
# target height is 227
# target width is 227
image = a_image_tensor_you_read
shape = tf.shape(image)
img_h = shape[0]
img_w = shape[1]
box_h = tf.convert_to_tensor(target_height)
box_w = tf.convert_to_tensor(target_width)
img_ratio = tf.cast(tf.divide(img_h, img_w), tf.float32)
aim_ratio = tf.convert_to_tensor(box_h / box_w, tf.float32)
aim_h, aim_w = tf.cond(tf.greater(img_ratio, aim_ratio),
lambda: (box_h,
tf.cast(img_h / box_h * img_w, tf.int32)),
lambda: (tf.cast(img_w / box_w * img_h, tf.int32),
box_w))
image_resize = tf.image.resize_images(image, tf.cast([aim_h, aim_w], tf.int32), align_corners=True)
# Perform image standardization and distortion
image_standardized_distorted = blablabla
image_padded = tf.image.resize_image_with_crop_or_pad(image_standardized_distorted, box_h, box_w)
return image_padded

Related

image preprocess function for image_dataset_from_directory

In the ImageDataGenerator, I've used the following function to preprocess images, through the keyword of 'preprocessing' in .flow_from_dataframe().
However, I am now trying to use the image_dataset_from_directory, which does not work with the preprocess function, as it does not allow embedding this function.
I've tried to apply the preprocess_image() function after the dataset is generated by image_dataset_from_directory, through .map() function, but it does not work either.
Please could anyone advise?
Many thanks,
Tony
train_Gen = dataGen.flow_from_dataframe(
df,
x_col='id_code',
y_col='diagnosis',
directory=os.path.join(data_dir, 'train_images'),
batch_size=BATCH_SIZE,
target_size=(IMG_WIDTH, IMG_HEIGHT),
subset='training',
seed=123,
class_mode='categorical',
**preprocessing=preprocess_image**,
)
def crop_image_from_gray(img, tol=7):
"""
Applies masks to the orignal image and
returns the a preprocessed image with
3 channels
:param img: A NumPy Array that will be cropped
:param tol: The tolerance used for masking
:return: A NumPy array containing the cropped image
"""
# If for some reason we only have two channels
if img.ndim == 2:
mask = img > tol
return img[np.ix_(mask.any(1),mask.any(0))]
# If we have a normal RGB images
elif img.ndim == 3:
gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
mask = gray_img > tol
check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
if (check_shape == 0): # image is too dark so that we crop out everything,
return img # return original image
else:
img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
img = np.stack([img1,img2,img3],axis=-1)
return img
def preprocess_image(image, sigmaX=10):
"""
The whole preprocessing pipeline:
1. Read in image
2. Apply masks
3. Resize image to desired size
4. Add Gaussian noise to increase Robustness
:param img: A NumPy Array that will be cropped
:param sigmaX: Value used for add GaussianBlur to the image
:return: A NumPy array containing the preprocessed image
"""
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = crop_image_from_gray(image)
image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
image = cv2.addWeighted (image,4, cv2.GaussianBlur(image, (0,0) ,sigmaX), -4, 128)
return image

Tensorflow Object Detection API 1-channel image

Is there any way to use pre-trained models in Object Detection API of Tensorflow, which trained for RGB images, for single channel grayscale images(depth) ?
I tried the following approach to perform object detection on Grayscale (1 Channel images) using a pre-trained model (faster_rcnn_resnet101_coco_11_06_2017) in Tensorflow. It did work for me.
The model was trained on RGB Images, So I just had to modify certain code in object_detection_tutorial.ipynb, available in the Tensorflow Repo.
First Change:
Note that exisitng code in the ipynb was written for 3 Channel Images, So change the load_image_into_numpy array function as shown
From
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
To
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
channel_dict = {'L':1, 'RGB':3} # 'L' for Grayscale, 'RGB' : for 3 channel images
return np.array(image.getdata()).reshape(
(im_height, im_width, channel_dict[image.mode])).astype(np.uint8)
Second Change: Grayscale images have only data in 1 channel. To perform object detection we need 3 channels(the inference code was written for 3 channels)
This can be achieved in two ways.
a) Duplicate the single channel data into two more channels
b) Fill the other two channels with Zeros.
Both of them will work, I used the first method
In the ipynb, go the section where you read the images and convert them into numpy arrays (the forloop at the end of the ipynb).
Change the code From:
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
To this:
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
if image_np.shape[2] != 3:
image_np = np.broadcast_to(image_np, (image_np.shape[0], image_np.shape[1], 3)).copy() # Duplicating the Content
## adding Zeros to other Channels
## This adds Red Color stuff in background -- not recommended
# z = np.zeros(image_np.shape[:-1] + (2,), dtype=image_np.dtype)
# image_np = np.concatenate((image_np, z), axis=-1)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
That's it, Run the file and you should see the results.
These are my results

How to use Tensorflow's tf.cond() with two different Dataset iterators without iterating both?

I want to feed a CNN with the tensor "images". I want this tensor to contain images from the training set ( which have FIXED size ) when the placeholder is_training is True, otherwise I want it to contain images from the test set ( which are of NOT FIXED size ).
This is needed because in training I take a random fixed crop from the training images, while in test I want to perform a dense evaluation and feed the entire images inside the network ( it is fully convolutional so it will accept them)
The current NOT WORKING way is to create two different iterators, and try to select the training/test input with tf.cond at the session.run(images,{is_training:True/False}).
The problem is that BOTH the iterators are evaluated. The training and test dataset are also of different size so I cannot iterate both of them until the end. Is there a way to make this work? Or to rewrite this in a smarter way?
I've seen some questions/answers about this but they always used tf.assign which takes a numpy array and assigns it to a tensor. In this case I cannot use tf.assign because I already have a tensor coming from the iterators.
The current code that I have is this one. It simply checks the shape of the tensor "images":
train_filenames, train_labels = list_images(args.train_dir)
val_filenames, val_labels = list_images(args.val_dir)
graph = tf.Graph()
with graph.as_default():
# Preprocessing (for both training and validation):
def _parse_function(filename, label):
image_string = tf.read_file(filename)
image_decoded = tf.image.decode_jpeg(image_string, channels=3)
image = tf.cast(image_decoded, tf.float32)
return image, label
# Preprocessing (for training)
def training_preprocess(image, label):
# Random flip and crop
image = tf.image.random_flip_left_right(image)
image = tf.random_crop(image, [args.crop,args.crop, 3])
return image, label
# Preprocessing (for validation)
def val_preprocess(image, label):
flipped_image = tf.image.flip_left_right(image)
batch = tf.stack([image,flipped_image],axis=0)
return batch, label
# Training dataset
train_filenames = tf.constant(train_filenames)
train_labels = tf.constant(train_labels)
train_dataset = tf.contrib.data.Dataset.from_tensor_slices((train_filenames, train_labels))
train_dataset = train_dataset.map(_parse_function,num_threads=args.num_workers, output_buffer_size=args.batch_size)
train_dataset = train_dataset.map(training_preprocess,num_threads=args.num_workers, output_buffer_size=args.batch_size)
train_dataset = train_dataset.shuffle(buffer_size=10000)
batched_train_dataset = train_dataset.batch(args.batch_size)
# Validation dataset
val_filenames = tf.constant(val_filenames)
val_labels = tf.constant(val_labels)
val_dataset = tf.contrib.data.Dataset.from_tensor_slices((val_filenames, val_labels))
val_dataset = val_dataset.map(_parse_function,num_threads=1, output_buffer_size=1)
val_dataset = val_dataset.map(val_preprocess,num_threads=1, output_buffer_size=1)
train_iterator = tf.contrib.data.Iterator.from_structure(batched_train_dataset.output_types,batched_train_dataset.output_shapes)
val_iterator = tf.contrib.data.Iterator.from_structure(val_dataset.output_types,val_dataset.output_shapes)
train_images, train_labels = train_iterator.get_next()
val_images, val_labels = val_iterator.get_next()
train_init_op = train_iterator.make_initializer(batched_train_dataset)
val_init_op = val_iterator.make_initializer(val_dataset)
# Indicates whether we are in training or in test mode
is_training = tf.placeholder(tf.bool)
def f_true():
with tf.control_dependencies([tf.identity(train_images)]):
return tf.identity(train_images)
def f_false():
return val_images
images = tf.cond(is_training,f_true,f_false)
num_images = images.shape
with tf.Session(graph=graph) as sess:
sess.run(train_init_op)
#sess.run(val_init_op)
img = sess.run(images,{is_training:True})
print(img.shape)
The problem is that when I want to use only the training iterator, I comment the line to initialize the val_init_op but there is the following error:
FailedPreconditionError (see above for traceback): GetNext() failed because the iterator has not been initialized. Ensure that you have run the initializer operation for this iterator before getting the next element.
[[Node: IteratorGetNext_1 = IteratorGetNext[output_shapes=[[2,?,?,3], []], output_types=[DT_FLOAT, DT_INT32], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator_1)]]
If I do not comment that line everything works as expected, when is_training is true I get training images and when is_training is False I get validation images. The issue is that both the iterators need to be initialized and when I evaluate one of them, the other is incremented too. Since as I said they are of different size this causes an issue.
I hope there is a way to solve it! Thanks in advance
The trick is to call iterator.get_next() inside the f_true() and f_false() functions:
def f_true():
train_images, _ = train_iterator.get_next()
return train_images
def f_false():
val_images, _ = val_iterator.get_next()
return val_images
images = tf.cond(is_training, f_true, f_false)
The same advice applies to any TensorFlow op that has a side effect, like assigning to a variable: if you want that side effect to happen conditionally, the op must be created inside the appropriate branch function passed to tf.cond().

How to use feed_dict in slim.learning.train of tensorflow

I read an example in tf-slim-mnist, and read one or two answers in Google, but all of them feed data to an 'images' tensor and a 'labels' tensor from an already filled-up tenser of data. For example, in tf-slim-mnist,
# load batch of dataset
images, labels = load_batch(
dataset,
FLAGS.batch_size,
is_training=True)
def load_batch(dataset, batch_size=32, height=28, width=28, is_training=False):
data_provider = slim.dataset_data_provider.DatasetDataProvider(dataset)
image, label = data_provider.get(['image', 'label'])
image = lenet_preprocessing.preprocess_image(
image,
height,
width,
is_training)
images, labels = tf.train.batch(
[image, label],
batch_size=batch_size,
allow_smaller_final_batch=True)
return images, labels
Another example, in tensorflow github issues #5987,
graph = tf.Graph()
with graph.as_default():
image, label = input('train', FLAGS.dataset_dir)
images, labels = tf.train.shuffle_batch([image, label], batch_size=FLAGS.batch_size, capacity=1000 + 3 * FLAGS.batch_size, min_after_dequeue=1000)
images_validation, labels_validation = inputs('validation', FLAGS.dataset_dir, 5000)
images_test, labels_test = inputs('test', FLAGS.dataset_dir, 10000)
Because my data is of variable size, it is hard to fill up a tensor of data beforehand.
Is there any way to use feed_dict with slim.learning.train()? Is it a proper way to add feed_dict as an argument to the train_step_fn()? If yes, how? Thanks.
I think feed_dict is not a good way when input data size varies and hard to fill in memory.
Convert your data into tfrecords is a more proper way. Here is the example of convert data. You can deal with the data by TFRecordReader and parse_example to deal with output file.

Online oversampling in Tensorflow input pipeline

I have an input pipeline similar to the one in the Convolutional Neural Network tutorial. My dataset is imbalanced and I want to use minority oversampling to try to deal with this. Ideally, I want to do this "online", i.e. I don't want to duplicate data samples on disk.
Essentially, what I want to do is duplicate individual examples (with some probability) based on the label. I have been reading a bit on Control Flow in Tensorflow. And it seems tf.cond(pred, fn1, fn2) is the way to go. I am just struggling to find the right parameterisation, since fn1 and fn2 would need to output lists of tensors, where the lists have the same size.
This is roughly what I have so far:
image = image_preprocessing(image_buffer, bbox, False, thread_id)
pred = tf.reshape(tf.equal(label, tf.convert_to_tensor([2])), [])
r_image = tf.cond(pred, lambda: [tf.identity(image), tf.identity(image)], lambda: [tf.identity(image),])
r_label = tf.cond(pred, lambda: [tf.identity(label), tf.identity(label)], lambda: [tf.identity(label),])
However, this raises an error as I mentioned before:
ValueError: fn1 and fn2 must return the same number of results.
Any ideas?
P.S.: this is my first Stack Overflow question. Any feedback on my question is appreciated.
After doing a bit more research, I found a solution for what I wanted to do. What I forgot to mention is that the code mentioned in my question is followed by a batch method, such as batch() or batch_join().
These functions take an argument that allows you to group tensors of various batch size rather than just tensors of a single example. The argument is enqueue_many and should be set to True.
The following piece of code does the trick for me:
for thread_id in range(num_preprocess_threads):
# Parse a serialized Example proto to extract the image and metadata.
image_buffer, label_index = parse_example_proto(
example_serialized)
image = image_preprocessing(image_buffer, bbox, False, thread_id)
# Convert 3D tensor of shape [height, width, channels] to
# a 4D tensor of shape [batch_size, height, width, channels]
image = tf.expand_dims(image, 0)
# Define the boolean predicate to be true when the class label is 1
pred = tf.equal(label_index, tf.convert_to_tensor([1]))
pred = tf.reshape(pred, [])
oversample_factor = 2
r_image = tf.cond(pred, lambda: tf.concat(0, [image]*oversample_factor), lambda: image)
r_label = tf.cond(pred, lambda: tf.concat(0, [label_index]*oversample_factor), lambda: label_index)
images_and_labels.append([r_image, r_label])
images, label_batch = tf.train.shuffle_batch_join(
images_and_labels,
batch_size=batch_size,
capacity=2 * num_preprocess_threads * batch_size,
min_after_dequeue=1 * num_preprocess_threads * batch_size,
enqueue_many=True)