Flattening tuple of images in tensorflow dataset - tensorflow

I have a dataset of triplet images that I'm reading from tfrecords, that I've converted to a dataset using the following code
def parse_dataset(record):
def convert_raw_to_image_tensor(raw):
raw = tf.io.decode_base64(raw)
image_shape = tf.stack([299, 299, 3])
decoded = tf.io.decode_image(raw, channels=3,
dtype=tf.uint8, expand_animations=False)
decoded = tf.cast(decoded, tf.float32)
decoded = tf.reshape(decoded, image_shape)
decoded = tf.math.divide(decoded, 255.)
return decoded
features = {
'n': tf.io.FixedLenFeature([], tf.string),
'p': tf.io.FixedLenFeature([], tf.string),
'q': tf.io.FixedLenFeature([], tf.string)
}
sample = tf.io.parse_single_example(record, features)
neg_image = sample['n']
pos_image = sample['p']
query_image = sample['q']
neg_decoded = convert_raw_to_image_tensor(neg_image)
pos_decoded = convert_raw_to_image_tensor(pos_image)
query_decoded = convert_raw_to_image_tensor(query_image)
return (neg_decoded, pos_decoded, query_decoded)
record_dataset = tf.data.TFRecordDataset(filenames=path_dataset, num_parallel_reads=4)
record_dataset = record_dataset.map(parse_dataset)
The shape of this resulting dataset is
<MapDataset shapes: ((299, 299, 3), (299, 299, 3), (299, 299, 3)), types: (tf.float32, tf.float32, tf.float32)>
which I think means that each entry contains 3 images (which I confirmed by iterating through the dataset and printing the 1st, 2nd, and 3rd elements). I want to flatten this, so I get a dataset that doesn't contain any tuples but just a flat list of images. I've tried using flat_map but that just converts the images to (299, 3) and I've tried iterating through the dataset, appending each image to a list, then calling convert_to_tensor_slices but that's really inefficient.
I've read this question but it didn't seem to help.
Btw this is the flat_map code I tried
record_dataset = record_dataset.flat_map(lambda *x: tf.data.Dataset.from_tensor_slices(x))
and the resulting dataset has this shape
<FlatMapDataset shapes: ((299, 3), (299, 3), (299, 3)), types: (tf.float32, tf.float32, tf.float32)>

I think you are just unpacking the tuple wrongly.
this ought to do it:
def flatten(*x):
return tf.data.Dataset.from_tensor_slices([i for i in x])
flattened = record_dataset.flat_map(flatten)
so that:
for i in flattened:
print(i.shape)
gives:
(299, 299, 3)
(299, 299, 3)
(299, 299, 3)
(299, 299, 3)
...
as expected

Related

Can't resolve ValueError: as_list() is not defined on an unknown TensorShape

I'm busy creating a pre-processing pipeline in a tensorflow dataset that takes in a list of the relative paths to files, decodes the file name from a bytes string to a regular string, loads the numpy array (which contains mel-frequency cepstral coefficients), reshapes it to have one channel, i.e. adds a dimension with size 1 on the end, extracts the corresponding label by using the parent directory name (the parent directory name indicates the class), and then returns the array and label.
I've read up about this problem but nothing seems to work. I tried setting the shape in the function, but it was to no avail.
Would appreciate any help.
Here's the relevant code:
def get_mfccs_and_label(file_path):
output_shape = (36, 125, 1)
file_path = file_path.decode()
emotion = file_path.split("/")[-2]
combined_mfccs = np.load(file_path)
combined_mfccs = tf.convert_to_tensor(combined_mfccs)
combined_mfccs = tf.reshape(combined_mfccs, output_shape)
combined_mfccs.set_shape(output_shape)
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sadness', 'surprise']
category_encoder = tf.keras.layers.CategoryEncoding(num_tokens=7,
output_mode="one_hot")
one_hot_encoded_label = category_encoder(emotions.index(emotion))
one_hot_encoded_label.set_shape(7)
return combined_mfccs, one_hot_encoded_label
combined_mfcc_files = glob.glob("challengeA_data/combined_mfccs/*/*.npy")
files_ds = tf.data.Dataset.from_tensor_slices(combined_mfcc_files)
ds = files_ds.map(lambda file: tf.numpy_function(get_mfccs_and_label, [file], [tf.float32, tf.float32]),
num_parallel_calls=tf.data.AUTOTUNE)
ds = ds.shuffle(buffer_size=100)
num_instances = len(ds)
num_train = int(num_instances * 0.8)
num_val = int(num_instances * 0.2)
train_ds = ds.take(num_train)
val_ds = ds.skip(num_train)
batch_size = 64
train_ds = train_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
model = models.Sequential([
layers.Input(shape=(36, 125, 1)),
layers.Conv2D(8, 5, activation="relu"),
layers.MaxPool2D(2),
layers.Dropout(0.2),
layers.Conv2D(16, 5, activation="relu"),
layers.MaxPool2D(2),
layers.Dropout(0.2),
layers.Conv2D(200, 5, activation="relu"),
layers.MaxPool2D(2),
layers.Dropout(0.2),
layers.Flatten(),
layers.Dense(1024, activation="relu"),
layers.Dropout(0.5),
layers.Dense(512, activation="relu"),
layers.Dropout(0.5),
layers.Dense(7, activation="softmax")
])
model.summary()
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=["accuracy"]
)
EPOCHS = 10
# ----> "as_list()..." error raised when calling model.fit()
cnn_with_combined_mfcc_history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS)

Create timeseries dataset for TensorFlow v2

I'm trying to feed a model CNN+LSTM with data from a csv. What I'm missing is setting a correct dataset to start training my model.
This is my test model:
def test_model():
model = models.Sequential()
model.add(TimeDistributed(Conv1D(32, 4, strides=1, activation='relu', padding="valid"), input_shape=[None, 6, 20]))
model.add(TimeDistributed(MaxPooling1D(pool_size=2), input_shape=[None, 6, 20]))
model.add(TimeDistributed(Conv1D(64, 4, strides=1, activation='relu', padding="valid"), input_shape=[None, 6, 20]))
model.add(TimeDistributed(MaxPooling1D(pool_size=2), input_shape=[None, 6, 20]))
model.add(TimeDistributed(Flatten(), input_shape=[None, 6, 20]))
model.add(LSTM(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(100, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(5, activation="softmax"))
model.compile(optimizer=keras.optimizers.Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
return model
Here you can download test.csv
CSV dataset is made of 6 features + 1 multilabel. I need to create a time series each 20 row, so I think my shape should be [None, 6, 20] ordered by ascending timestamp value.
I'm new in TensorFlow and I don't know how to create an appropriate dataset from scratch, I was able to load a dataset from directory with images (for CNN) but in this scenario I really don't know how to do it.
This is what I tried to generate my dataset:
with open('test.csv') as csvfile:
dataset = list()
reader = csv.reader(csvfile, delimiter=',')
next(reader)
count = 0
timeseries = list()
labels = list()
for row in reader:
count = count + 1
if count <= 20:
timeseries.append(
[float(row[0]), float(row[1]), float(row[2]), float(row[3]), float(row[4]), float(row[5])])
else:
dataset.append(timeseries)
labels.append(int(row[6].split("L")[-1]))
timeseries = list()
count = 0
After that I transformed it in a tf.DataSet like this:
dataset = tf.data.Dataset.from_tensor_slices(dataset)
labels = tf.data.Dataset.from_tensor_slices(labels)
Here I got a <TensorSliceDataset shapes: (20, 6), types: tf.float32> like I want. Then I fed a K-Fold with it:
estimator = KerasClassifier(build_fn=test_model, epochs=60, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, dataset, labels, cv=kfold)
When running cross_val_score I got this error:
TypeError: Singleton array array(<TensorSliceDataset shapes: (20, 6), types: tf.float32>, dtype=object) cannot be considered a valid collection.
What I'm missing?

How to create a bi-input TPU model for images?

I want to convert my GPU model to TPU model. My GPU model takes two input image and has the same output for both images. I use custom data generator for this. There are two parallel networks; one for each input.
From this StackOverflow question, I tried to solve this but I failed.
Here is what I tried
dataset_12 = tf.data.Dataset.from_tensor_slices((left_train_paths, right_train_paths))
dataset_label = tf.data.Dataset.from_tensor_slices(train_labels)
dataset = tf.data.Dataset.zip((dataset_12, dataset_label)).batch(2).repeat()
Problem I am facing is that I am unable to decode the bi-input images.
Here is the decoder function
def decode_image(filename, label=None, image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(filename)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
#convert to numpy and do some cv2 staff mb?
if label is None:
return image
else:
return image, label
The issue is that I am unable to pass both images to the decoder function at the same time. How can I resolve this?
I also try to decode the image in following way
def decode(img,image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(img)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
def decode_image(left, right,labels=None ):
if labels is None:
return decode(left),decode(right)
else:
return decode(left),decode(right),labels
image=tf.data.Dataset.from_tensor_slices((left_train_paths,right_train_paths,train_labels ))
dataset=image.map(decode_image, num_parallel_calls=AUTO).repeat().shuffle(512).batch(BATCH_SIZE).prefetch(AUTO)
dataset
The output is of dataset variable is now as
<PrefetchDataset shapes: ((None, 760, 760, 3), (None, 760, 760, 3), (None, 8)), types: (tf.float32, tf.float32, tf.int64)>
How can I pass it to the model now?
Model
def get_model():
left_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
right_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
left_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=left_tensor)
right_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=right_tensor)
con = concatenate([left_model.output, right_model.output])
GAP= GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model =Model(inputs=[left_input, right_input], outputs=out)
return model
I found a pretty elegant solution. I will explain step by step since may be a bit different of what you thought:
When decoding the images stack both images in a single tensor so the input tensor will be of shape [2, IMAGE_H, IMAGE_W, 3]
def decode_single(im_path, image_size):
bits = tf.io.read_file(im_path)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
# Note that the image paths are packed in a tuple, and we unpack them inside the function
def decode(paths, label=None, image_size=(128, 128)):
image_path1, image_path2 = paths
im1 = decode_single(image_path1, image_size)
im2 = decode_single(image_path2, image_size)
images = tf.stack([im1, im2])
if label is not None:
return images, label
return images
I declare the data pipeline so the paths are packed in a tuple.
label_ds = ...
ds = tf.data.Dataset.from_tensor_slices((left_paths, right_paths))
ds = tf.data.Dataset.zip((ds, label_ds)) # returns as ((im_path1, im_path2), label)) not (im_path1, im_path2, label)
ds = ds.map(decode).batch(4)
print(ds)
# Out: <BatchDataset shapes: ((None, 2, 128, 128, 3), ((None,),)), types: (tf.float32, (tf.int32,))>
Since we are feeding batches of two images (None, 2, 128, 128, 3). Declare the model with a single input of shape (2, HEIGHT, WIDTH, 3) and then we split the input in the two images:
def get_model():
input_layer = Input(shape=(2, IMAGE_H,IMAGE_W,3))
# Split into two images
right_image, left_image = Lambda(lambda x: tf.split(x, 2, axis=1))(input_layer)
right_image = Reshape([IMAGE_H, IMAGE_W, 3])(right_image)
left_image = Reshape([IMAGE_H, IMAGE_W, 3])(left_image)
# Replace by EfficientNets
left_model = Conv2D(64, 3)(left_image)
right_model = Conv2D(64, 3)(right_image)
con = Concatenate(-1)([left_model, right_model])
GAP = GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model = tf.keras.Model(inputs=input_layer, outputs=out)
return model
Finally compile and train the model as usual:
model = get_model()
model.compile(...)
model.fit(ds, epochs=10)

Transfer Learning with MobileV2Net

I am trying to implement transfer learning with MobileV2Net following from https://www.tensorflow.org/tutorials/images/transfer_learning .
The above tutorial uses the MobileV2Net model as the base model and uses the "cats_vs_dog" dataset which has the type tensorflow.python.data.ops.dataset_ops._OptionsDataset.
In my case i want to use MobileV2Net as the base model , freeze all the weights for different C.N.N layers,add a fully connected layer and fine tune it . The data set i am using is the tiny_imagenet . Following is my code :
##After pre-processing the data :
(x_train, y_train), (x_valid, y_valid),(x_test, y_test) = data
#type(x_train) = numpy.ndarray
#len(x_train) = 1750
##Converting the data to use the pipleine that comes with tf.Data.Dataset
raw_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
raw_validation = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
raw_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
#print(raw_train) gives
<DatasetV1Adapter shapes: ((64, 64, 3), ()), types: (tf.float64, tf.int64)>
## Now i follow everything from the link (given above in problem statement) :
IMG_SIZE = 160 # All images will be resized to 160x160
def format_example(image, label):
image = tf.cast(image, tf.float32)
image = (image/127.5) - 1
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
train = raw_train.map(format_example)
validation = raw_validation.map(format_example)
test = raw_test.map(format_example
#print(train) gives
#<DatasetV1Adapter shapes: ((160, 160, 3), ()), types: (tf.float32, tf.int64)>
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)
test_batches = test.batch(BATCH_SIZE)
#print(train_batches) gives :
<DatasetV1Adapter shapes: ((?, 160, 160, 3), (?,)), types: (tf.float32, tf.int64)>
##The corresponding command in the tutorial (which works on cats vs dogs dataset gives) :
<BatchDataset shapes: ((None, 160, 160, 3), (None,)), types: (tf.float32, tf.int64)>
I also tried using padded_batch() instead of batch() but still the below goes to infinite loop.
##Goes to infinite loop
for image_batch, label_batch in train_batches.take(1):
print("hello")
pass
image_batch.shape ## Does not reach here
##The same command in the tutorial gives :
hello
TensorShape([32, 160, 160, 3])
##Further in my case :
#print(train_batches.take(1)) gives
<DatasetV1Adapter shapes: ((?, 160, 160, 3), (?,)), types: (tf.float32, tf.int64)>
##In tutorial it gives :
<TakeDataset shapes: ((None, 160, 160, 3), (None,)), types: (tf.float32, tf.int64)>
The image_batch is used later in the code.
##Load the pre trained Model :
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
##This feature extractor converts each 160x160x3 image to a 5x5x1280 block of features. See what ##it does to the example batch of images:
feature_batch = base_model(image_batch)
print(feature_batch.shape) ## ((32, 5, 5, 1280))
##Freezing the convolution base
base_model.trainable = False
##Adding a classification head :
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape) ## (32, 1280)
prediction_layer = keras.layers.Dense(1)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape) ##(32, 1)
model = tf.keras.Sequential([
base_model,
global_average_layer,
prediction_layer
])
I have never worked much with tensorflow , any ideas how to make it work ?
Padded batch vs batch: padded batch is used if the elements inside your dataset are of different shapes whereas batch has a requirement that elements in it should have same shape.
The problem with your code is you are not hitting an infinite loop as you are describing. The dataset used by you which is tiny imagenet comprises of 100,000 images and it takes time to iterate through all of them once. If you don't wish to wait for that long, you can change the pass to break inside your for loop and it will come out of loop after first iteration.
There is another operation known as repeat. This is used to repeat your dataset the number of times you specify inside it's count parameter. If you set it to -1 however, the dataset will keep looping and in that case, your dataset would have gone to infinite loop.

Using TF Estimator with TFRecord generator

I am trying to create a simple NN that reads in a folder of tfrecords. Each record has a 1024-value 'mean_rgb' vector, and a category label. I am trying to create a simple feed-forward NN that learns the categories based on this feature vector.
def generate(dir, shuffle, batch_size):
def parse(serialized):
features = {
'mean_rgb': tf.FixedLenFeature([1024], tf.float32),
'category': tf.FixedLenFeature([], tf.int64)
}
parsed_example = tf.parse_single_example(serialized=serialized, features=features)
vrv = parsed_example['mean_rgb']
label = parsed_example['category']
d = dict(zip(['mean_rgb'], [vrv])), label
return d
dataset = tf.data.TFRecordDataset(dir).repeat(1)
dataset = dataset.map(parse)
if shuffle:
dataset = dataset.shuffle(8000)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
next = iterator.get_next()
print(next)
return next
def batch_generator(dir, shuffle=False, batch_size=64):
sess = K.get_session()
while True:
yield sess.run(generate(dir, shuffle, batch_size))
num_classes = 29
batch_size = 64
yt8m_train = [os.path.join(yt8m_dir_train, x) for x in read_all_file_names(yt8m_dir_train) if '.tfrecord' in x]
yt8m_test = [os.path.join(yt8m_dir_test, x) for x in read_all_file_names(yt8m_dir_test) if '.tfrecord' in x]
feature_columns = [tf.feature_column.numeric_column(k) for k in ['mean_rgb']]
#batch_generator(yt8m_test).__next__()
classifier = tf.estimator.DNNClassifier(
feature_columns=feature_columns,
hidden_units=[1024, 1024],
n_classes=num_classes,
model_dir=model_dir)
classifier.train(
input_fn=lambda: generate(yt8m_train, True, batch_size))
However, I get the following error:
InvalidArgumentError (see above for traceback): Input to reshape is a
tensor with 65536 values, but the requested shape has 64
I am not sure why it sees the input as a 64x1024=65536 vector instead of a (64, 1024) vector. When I print the next item in the generator, I get
({'mean_rgb': <tf.Tensor: id=23, shape=(64, 1024), dtype=float32, numpy=
array([[ 0.9243997 , 0.28990048, -0.4130672 , ..., -0.096692 ,
0.27225342, 0.13346168],
[ 0.5853526 , 0.67050666, -0.24683481, ..., -0.6999033 ,
-0.4100128 , -0.00349384],
[ 0.49572858, 0.5231492 , -0.53445834, ..., 0.0449002 ,
0.10582132, -0.37333965],
...,
[ 0.5776026 , -0.07128889, -0.61762846, ..., 0.22194198,
0.61441416, -0.27355513],
[-0.01848815, 0.20132884, 1.1023484 , ..., 0.06496283,
0.29560333, 0.09157721],
[-0.25877073, -1.9552246 , 0.10309827, ..., 0.22032814,
-0.6812989 , -0.23649289]], dtype=float32)>}
which has the correct (64, 1024) shape
the problem is at how the features_columns works, for example, I had a similar problem and I solved by doing a reshape here is part of my code that will help you understand:
defining the features_column:
feature_columns = {
'images': tf.feature_column.numeric_column('images', self.shape),
}
then to create the input for the model:
with tf.name_scope('input'):
feature_columns = list(self._features_columns().values())
input_layer = tf.feature_column.input_layer(
features=features, feature_columns=feature_columns)
input_layer = tf.reshape(
input_layer,
shape=(-1, self.parameters.size, self.parameters.size,
self.parameters.channels))
if pay attention to the last part I had to reshape the tensor, the -1 is to let Tensorflow figure out the batch size
I believe the issue was that feature_columns = [tf.feature_column.numeric_column(k) for k in ['mean_rgb']] assumes that the column is a scalar - when actually it is a 1024 vector. I had to add shape=1024 to the numeric_column call. Also had to remove existing checkpoint saved model.