How convert TFRecordDataset item to image? - tensorflow

I have a issue with converting tfrecords back to images:
def _parse_test_image_function(img):
image_feature_description = {
'image/file_name': tf.io.FixedLenFeature([], tf.string),
'image/encoded_image': tf.io.FixedLenFeature([], tf.string),
}
return tf.io.parse_single_example(img, image_feature_description)
test_dataset = tf.data.TFRecordDataset(temp_path)
test_dataset = test_dataset.map(_parse_test_image_function)
print(tf.__version__)
images = test_dataset.take(1)
print(images)
2.5.0
<TakeDataset shapes: {image/encoded_image: (), image/file_name: ()}, types: {image/encoded_image: tf.string, image/file_name: tf.string}>
Fields in image_feature_description are correct
also I saw this
Converting TFRecords back into JPEG Images
But this is not very helpful for me because some of functions which is used in answers outdated.

You can get the image as numpy array by using the below code.
import numpy as np
import PIL.Image as Image
gold_fish=Image.open('/content/gold.jpeg')
gold_fish=np.array(gold_fish)
Thank You.

Related

Using tfrec files in Keras

I feel like this should be simple but cannot for the life of me work it out.
I have this melanoma dataset(https://www.kaggle.com/datasets/cdeotte/melanoma-512x512/code) (in tfrec format) downloaded to my local machine.
import os
import cv2
import numpy as np
import pandas as pd
import albumentations
import tensorflow as tf
from tensorflow import keras
features = {'image': tf.io.FixedLenFeature([], tf.string),
'image_name': tf.io.FixedLenFeature([], tf.string),
'patient_id': tf.io.FixedLenFeature([], tf.int64),
'sex': tf.io.FixedLenFeature([], tf.int64),
'age_approx': tf.io.FixedLenFeature([], tf.int64),
'anatom_site_general_challenge': tf.io.FixedLenFeature([], tf.int64),
'diagnosis': tf.io.FixedLenFeature([], tf.int64),
'target': tf.io.FixedLenFeature([], tf.int64),
'width': tf.io.FixedLenFeature([], tf.int64),
'height': tf.io.FixedLenFeature([], tf.int64)}
train_filepaths=tf.io.gfile.glob(path+'/train*.tfrec')
train_filepaths
this lists all the files:
['\Users\adban\Dissertation\Moles\512\train00-2182.tfrec',
'\Users\adban\Dissertation\Moles\512\train01-2185.tfrec',
'\Users\adban\Dissertation\Moles\512\train02-2193.tfrec', ...]
But I cannot seem to decode them. (Tried 'tf.io.parse_single_example' and 'tf.data.TFRecordDataset' but either get a parse error or an empty array returned.)
I figured it out.
This will add all images to a list as 3d array.
def _parse_image_function(example_proto):
return tf.io.parse_single_example(example_proto, features)
def preprocess_image(image):
image = tf.io.decode_image(image, channels=3)
return image
path = '/Users/adban/Dissertation/Moles/512'
tfimage_set = []
for filename in os.listdir(path):
#change for
train_image_dataset = tf.data.TFRecordDataset(path+'/'+filename)
train_images = train_image_dataset.map(_parse_image_function)
for image_feature in train_images:
image_raw = preprocess_image(image_feature['image'])
image_raw_np = image_raw.numpy()
tfimage_set.append(image_raw_np)

How can I wrap tf.io.parse_single_example with tf.py_function?

First, I was wondering if I should wrap tf.io.parse_single_example with tf.py_function when reading TFRecord data from dataset.map,
N = config.get_num_listings_per_search()
features={
'qf': tf.io.FixedLenFeature([len(config.get_query_continuous_features())], tf.float32),
'qi': tf.io.FixedLenFeature([len(config.get_query_categorical_features())], tf.int64),
}
def _parse_function(example_proto):
parsed_features = tf.io.parse_single_example(example_proto, features)
return parsed_features['qf'], parsed_features['qi']
dataset = tf.data.TFRecordDataset(training_files)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
dataset = dataset.shuffle(buffer_size=1000000)
dataset = dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(config.get_batch_size())
becauseThe tf.data guide mentioned that
For performance reasons, we encourage you to use TensorFlow operations for preprocessing your data whenever possible. However, it is sometimes useful to call external Python libraries when parsing your input data. You can use the tf.py_function() operation in a Dataset.map() transformation.
I tried to wrap with
parsed_features = tf.py_function(tf.io.parse_single_example, (example_proto, features),
(tf.float32, tf.int64))
However, running the code gave me the following error:
TypeError: Tensors in list passed to 'input' of 'EagerPyFunc' Op have types [string, <NOT CONVERTIBLE TO TENSOR>] that are invalid.
It seems to me tf.py_function(tf.io.parse_single_example(example_proto, features)) is not supported because example_proto is of type tf.string ?
The primary reason I might want to do this is because the current input data pipeline is slow. Will I get some performance improvement if I wrap tf.io.parse_single_example with tf.py_function?
The above code is run in tensorflow-gpu==2.0
Thank you!
tf.py_function is meant to wrap external Python libraries like PIL or scipy, not TensorFlow operations like tf.io.parse_single_example. Adding tf.py_function here will probably make performance worse by forcing TensorFlow to call into Python instead of doing the parsing in C++.
The TFRecord guide gives an example of using tf.io.parse_single_example:
raw_image_dataset = tf.data.TFRecordDataset('images.tfrecords')
# Create a dictionary describing the features.
image_feature_description = {
'height': tf.io.FixedLenFeature([], tf.int64),
'width': tf.io.FixedLenFeature([], tf.int64),
'depth': tf.io.FixedLenFeature([], tf.int64),
'label': tf.io.FixedLenFeature([], tf.int64),
'image_raw': tf.io.FixedLenFeature([], tf.string),
}
def _parse_image_function(example_proto):
# Parse the input tf.Example proto using the dictionary above.
return tf.io.parse_single_example(example_proto, image_feature_description)
parsed_image_dataset = raw_image_dataset.map(_parse_image_function)

Reading Images from TFrecord using Dataset API and showing them on Jupyter notebook

I created a tfrecord from a folder of images, now I want to iterate over entries in TFrecord file using Dataset API and show them on Jupyter notebook. However I'm facing problems with reading tfrecord file.
Code I used to create TFRecord
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def generate_tfr(image_list):
with tf.python_io.TFRecordWriter(output_path) as writer:
for image in images:
image_bytes = open(image,'rb').read()
image_array = imread(image)
image_shape = image_array.shape
image_x, image_y, image_z = image_shape[0],image_shape[1], image_shape[2]
data = {
'image/bytes':_bytes_feature(image_bytes),
'image/x':_int64_feature(image_x),
'image/y':_int64_feature(image_y),
'image/z':_int64_feature(image_z)
}
features = tf.train.Features(feature=data)
example = tf.train.Example(features=features)
serialized = example.SerializeToString()
writer.write(serialized)
Code to read TFRecord
#This code is incomplete and has many flaws.
#Please give some suggestions in correcting this code if you can
def parse(serialized):
features = \
{
'image/bytes': tf.FixedLenFeature([], tf.string),
'image/x': tf.FixedLenFeature([], tf.int64),
'image/y': tf.FixedLenFeature([], tf.int64),
'image/z': tf.FixedLenFeature([], tf.int64)
}
parsed_example = tf.parse_single_example(serialized=serialized,features=features)
image = parsed_example['image/bytes']
image = tf.decode_raw(image,tf.uint8)
x = parsed_example['image/x'] # breadth
y = parsed_example['image/y'] # height
z = parsed_example['image/z'] # depth
image = tf.cast(image,tf.float32)
# how can I reshape image tensor here? tf.reshape throwing some weird errors.
return {'image':image,'x':x,'y':y,'z':z}
dataset = tf.data.TFRecordDataset([output_path])
dataset.map(parse)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
epoch = 1
with tf.Session() as sess:
for _ in range(epoch):
img = next_element.eval()
print(img)
# when I print image, it shows byte code.
# How can I convert it to numpy array and then show image on my jupyter notebook ?
I've never worked with any of this before and I'm stuck at reading TFRecords. Please answer how to iterate over the contents of TFrecords and show them on Jupyter notebook. Feel free to correct/optimize both pieces of code. That would help me a lot.
Is this what you may be looking for? I think once u convert to numpy array you can show in jupyter notebook using PIL.Image.
convert tf records to numpy => How can I convert TFRecords into numpy arrays?
show numpy array as image
https://gist.github.com/kylemcdonald/2f1b9a255993bf9b2629

Multi Layer Tiff labelled dataset conversion to format that tensor flow can use for model optimisation

I'm a Python and Tensor Flow newbie, and was wondering...
How best to convert a labelled dataset of Multi-Layer Tiffs into a format that Tensor Flow can use for model optimisation / fine tuning ?
I currently have this code that puts each layer of a folder of Multi-Tiffs into a 3D Array, but i need to preserve the label or filename of the Multi-Tiffs. I have seen some tensor flow scripts to convert to TFRecords, however, I'm not sure if these preserve the file name ? How best would you go about this ? It will be quite a big dataset.
Any help much appreciated
import os # For file handling
from PIL import Image# Import Pillow image processing library
import numpy
CroppedMultiTiffs = "MultiTiffs/"
for filename in os.listdir(MultiTiffs):
## Imports Multi-Layer TIFF into 3D Numpy Array.
img = Image.open(MultiTiffs + filename)
imgArray = numpy.zeros( ( img.n_frames, img.size[1], img.size[0] ),numpy.uint8 )
try:
# for frames in range, img.n_frames for whole folder.
for frame in range(2,img.n_frames):
img.seek( frame )
imgArray[frame,:,:] = img
frame = frame + 1
except (EOFError): img.seek( 0 )
# output error if it doesn't find a file.
pass
print(imgArray.shape) # imgArray is now 3D
print(imgArray.size)
best wishes
TWP
okay, so I figured it out using the thread from Daniils blog
http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
However my current implimentation creates multiple TFRecords, and I think it needs to be a single TFRecord, so trying to figure out how to make it a single TFRecord. How do I do that?
Then I can validate it using a TFRecord Reading script to read it back and check it is in the right format for Tensor Flow. I currently get errors using the reading script.
from PIL import Image
import numpy as np
import tensorflow as tf
import os
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
path = 'test/'
output = 'output/'
fileList = [os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(path) for f in files if f.endswith('.tif')]
print (fileList)
for filename in fileList:
basename = os.path.basename(filename)
file_name = basename[:-4]
print ("processing file: " , filename)
print (file_name)
if not os.path.exists(output):
os.mkdir(output)
writer = tf.python_io.TFRecordWriter(output+ file_name + '.tfrecord')
img = Image.open(filename)
imgArray = np.zeros( ( img.n_frames, img.size[1], img.size[0] ),np.uint8 )
## Imports Multi-Layer file into 3D Numpy Array.
try:
for frame in range(0,img.n_frames):
img.seek( frame )
imgArray[frame,:,:] = img
frame = frame + 1
except (EOFError): img.seek( 0 )
pass
print ("print img size:" , img.size)
print ("print image shape: " , imgArray.shape)
print ("print image size: " , imgArray.size)
annotation = np.array(Image.open(filename))
height = imgArray.shape[0]
width = imgArray.shape[1]
depth = imgArray.shape[2]
img_raw = imgArray.tostring()
annotation_raw = annotation.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'depth': _int64_feature(depth), # for 3rd dimension
'image_raw': _bytes_feature(img_raw),
'mask_raw': _bytes_feature(annotation_raw)}))
writer.write(example.SerializeToString())
My current TFRecords Reading script
import tensorflow as tf
import os
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64)
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
label = tf.cast(features['label'], tf.int32)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
depth = tf.cast(features['depth'], tf.int32)
return image, label, height, width, depth
with tf.Session() as sess:
filename_queue = tf.train.string_input_producer(["output/A.3.1.tfrecord"])
image, label, height, width, depth = read_and_decode(filename_queue)
image = tf.reshape(image, tf.stack([height, width, 3]))
image.set_shape([32,32,3])
init_op = tf.initialize_all_variables()
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(1000):
example, l = sess.run([image, label])
print (example,l)
coord.request_stop()
coord.join(threads)
receiving the error:-
InvalidArgumentError (see above for traceback): Name: , Feature: label (data type: int64) is required but could not be found.
Images are grayscale multi-page

Using height, width information stored in a TFRecords file to set shape of a Tensor

I have converted a directory of images and their labels into a TFRecords file, the feature maps include image_raw, label, height, width and depth. The function is as follows:
def convert_to_tfrecords(data_samples, filename):
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
writer = tf.python_io.TFRecordWriter(filename)
for fname, lb in data_samples:
im = cv2.imread(fname, cv2.IMREAD_UNCHANGED)
image_raw = im.tostring()
feats = tf.train.Features(
feature =
{
'image_raw': _bytes_feature(image_raw),
'label': _int64_feature(int(lb)),
'height': _int64_feature(im.shape[0]),
'width': _int64_feature(im.shape[1]),
'depth': _int64_feature(im.shape[2])
}
)
example = tf.train.Example(features=feats)
writer.write(example.SerializeToString())
writer.close()
Now, I would like to read this TFRecords file to feed a input pipeline. However, since image_raw has been flattened, we need to reshape it into the original [height, width, depth] size. So how can I get the values of height, width and depth from the TFRecords file? It seems the following code cannot work because height is a Tensor without values.
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
feats = {
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64)
}
features = tf.parse_single_example(serialized_example, features=feats)
image = tf.decode_raw(features['image_raw'], tf.uint8)
label = tf.cast(features['label'], tf.int32)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
depth = tf.cast(features['depth'], tf.int32)
image = tf.reshape(image, [height, width, depth]) # <== not work
image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
return image, label
When I read the Tensorflow's official documents, I found they usually pass into a known size, saying [224,224,3]. However, I don't like it, because this information has been stored into the TFRecords file, and manually passing into fixed size cannot ensure the size is consistent with the data stored in the file.
So any ideas?
The height returned by tf.parse_single_example is a Tensor, and the only way to get its value is to call session.run() on it, or similar. However, I think that's overkill.
Since the Tensorflow example is just a protocol buffer (see the documentation), you don't necessarily have to use tf.parse_single_example to read it. You could instead parse it yourself and read the shapes you want out directly.
You might also consider filing a feature request on Tensorflow's github issues tracker --- I agree this API seems a bit awkward for this use case.
The function 'tf.reshape' only accept a tensor,not a list of tensors,so you can use the following code:
image = tf.reshape(image, tf.stack([height, width, depth]))
You can also get the numpy array out ot the tensor and reshape using np.resize() passing the dimensions as argument