Can't preprocess data before training for image segmentation - tensorflow

I'm trying to do some image segmentation for ocr, my mask image is a 3 classes image, like this
and my original image is a gray image like this
but when I try to fit the model I get this error
could not broadcast input array from shape (128,128,3) into shape (128,128)
here is the code I'm using to create the datasets
img_size = (128, 128)
batch_size = 32
input_img_paths = sorted(
[ os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".jpg") ] )
target_img_paths = sorted(
[ os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith(".jpg") and not fname.startswith(".") ])
class OxfordPets(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((batch_size,) + self.img_size, dtype="float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((batch_size,) + self.img_size, dtype="float32")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="rgb")
y[j] = img
return x, y
val_samples = 150
random.Random(1337).shuffle(input_img_paths)
random.Random(1337).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]
# Instantiate data Sequences for each split
train_gen = OxfordPets(
batch_size, img_size, train_input_img_paths, train_target_img_paths
)
val_gen = OxfordPets(batch_size, img_size, val_input_img_paths, val_target_img_paths)
but when i try to fit whit this
model_history = model.fit(train_gen, epochs=30,
steps_per_epoch=50,
validation_steps=25,
validation_data=val_gen)
I get the error, I am trying to adapt this solution
https://keras.io/examples/vision/oxford_pets_image_segmentation/?fbclid=IwAR2wFYju-N0X7FUaWkhvOVaAAaVqLdOryBwg7xDC0Rji9LQ5F2jYOkeNnns
from keras
into the example of the tensorflow page
https://www.tensorflow.org/tutorials/images/segmentation
and I have the impression that the problem has something to do whit the fact that the original image is on gray scale, how can I solve this error? any advice would be great!

You should convert your image to RGB first. Your image is gray-scaled and has only 1 channel. Its shape is (128,128,1). Them apply sth like opencv: backtorgb = cv2.cvtColor(gray,cv2.COLOR_GRAY2RGB) to every image in your data and everything will be ok

Your mask is RGB and has 3 channels. but your image is grayscale and has one channel. See This question for converting RGB image to grayscale image

Related

Keras use augmentation with custom image generator

I am using a custom image generator to read my data off disk in batches as described here, https://keras.io/examples/vision/oxford_pets_image_segmentation/
The exact generator looks like this:
from tensorflow import keras
import numpy as np
from tensorflow.keras.preprocessing.image import load_img
import tensorflow
class OxfordPets(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="grayscale")
y[j] = np.expand_dims(img, 2)
# Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
y[j] -= 1
return x, y
This works great and is helpful as my data is too large to read into ram. It takes two paths which are pathways to the input images (batch_input_img_paths) and the masks (batch_target_img_paths).
I would like to modify this generator to use data augmentation. I am trying this:
class OxfordPets(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __data_augmentation(self, img):
''' function for apply some data augmentation '''
img = tensorflow.keras.preprocessing.image.random_shift(img, 0.2, 0.2)
img = tensorflow.keras.preprocessing.image.random_zoom(img, 0.2)
img = tensorflow.keras.preprocessing.image.random_shear(img, 0.2)
img = tensorflow.keras.preprocessing.image.random_rotation(img, 40)
img = tensorflow.image.random_flip_left_right(img)
img = tensorflow.image.random_flip_up_down(img)
return img
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
#apply augmentation
img = self.__data_augmentation(img)
x[j] = img
y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="grayscale")
y[j] = np.expand_dims(img, 2)
#apply augmentation
img = self.__data_augmentation(img)
# Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
y[j] -= 1
return x, y
but this returns:
AttributeError: 'Image' object has no attribute 'shape'
Another thing I am not positive about is if the exact same augmentation will be applied to the data and to the label. Perhaps a random seed is needed for this? Another thing I would like to do is not only feed in augmented images into the final network but to increase the training size with the augmentation, so that some real images and some augmented images are being fed in, for instance only augment 30% of the input images.
EDIT:
I think I got the augmentation working like this:
class OxfordPets(keras.utils.Sequence):
"""Helper to iterate over the data (as Numpy arrays)."""
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
#augmentation
self.augmentor = tensorflow.keras.preprocessing.image.ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
"""Returns tuple (input, target) correspond to batch #idx."""
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="grayscale")
y[j] = np.expand_dims(img, 2)
# Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
y[j] -= 1
return x, y
but this does not solve my problem of actually increasing the dataset size, I believe it is only sending augmented images in now, and no real data.

tf.data or tf.keras.utils.Sequence. Improving efficiency of tf.data?

I am trying to develop an image colorizer using autoencoders. There are 13000 training images. Each epoch takes about 45 minutes if I use tf.data and about 25 minutes if I use tf.utils.keras.Sequence. However with the use of Sequence there is a risk of deadlocks. How do I improve tf.data? I tried a couple of things but they don't seem to improve anything.
tf.data 1
image_path_list = glob.glob('datasets/imagenette/*')
data = tf.data.Dataset.list_files(image_path_list)
def tf_rgb2lab(image):
im_shape = image.shape
[image,] = tf.py_function(color.rgb2lab, [image], [tf.float32])
image.set_shape(im_shape)
return image
def preprocess(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [224, 224])
image = tf_rgb2lab(image)
L = image[:,:,0]/100.
ab = image[:,:,1:]/128.
input = tf.stack([L,L,L], axis=2)
return input, ab
train_ds = data.repeat().map(preprocess, AUTOTUNE).batch(32).prefetch(AUTOTUNE)
tf.data 2
AUTOTUNE = tf.data.experimental.AUTOTUNE
def tf_rgb2lab(image):
im_shape = image.shape
[image,] = tf.py_function(color.rgb2lab, [image], [tf.float32])
image.set_shape(im_shape)
return image
def split_for_feed(image):
L = image[:,:,:,0]/100.
ab = image[:,:,:,1:]/128.
input = tf.stack([L,L,L], axis=-1)
return input, ab
def read_images(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [224, 224])
image = tf_rgb2lab(image)
return image
data2 = data.repeat().map(read_images, AUTOTUNE).batch(32)
train_ds = data2.map(split_for_feed, AUTOTUNE).prefetch(AUTOTUNE)
Sequence
class ImageGenerator(tf.keras.utils.Sequence):
def __init__(self, image_filenames, batch_size):
self.image_filenames = image_filenames
self.batch_size = batch_size
def __len__(self):
return math.ceil(len(self.image_filenames) / self.batch_size)
def __getitem__(self, idx):
batch = self.image_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
X_batch = []
y_batch = []
for file_name in batch:
file_name = 'datasets/imagenette/' + file_name
try:
color_image = transform.resize(io.imread(file_name),(224,224))
lab_image = color.rgb2lab(color_image)
L = lab_image[:,:,0]/100.
ab = lab_image[:,:,1:]/128.
X_batch.append(np.stack((L,L,L), axis=2))
y_batch.append(ab)
except:
pass
return np.array(X_batch), np.array(y_batch)
If your data fits in memory, try caching the preprocessing. Instead of
train_ds = data.repeat().map(preprocess, AUTOTUNE).batch(32).prefetch(AUTOTUNE)
Do
train_ds = data.map(preprocess, AUTOTUNE).batch(32).cache().repeat().prefetch(AUTOTUNE)
That way you parse each file just once, instead of repeatedly.
If you're looking to optimize the pipeline further, consider using the TF Profiler, which can tell you exactly how much time is being spent in each part of the dataset, so that you can find the bottleneck and solve it.

How to create a bi-input TPU model for images?

I want to convert my GPU model to TPU model. My GPU model takes two input image and has the same output for both images. I use custom data generator for this. There are two parallel networks; one for each input.
From this StackOverflow question, I tried to solve this but I failed.
Here is what I tried
dataset_12 = tf.data.Dataset.from_tensor_slices((left_train_paths, right_train_paths))
dataset_label = tf.data.Dataset.from_tensor_slices(train_labels)
dataset = tf.data.Dataset.zip((dataset_12, dataset_label)).batch(2).repeat()
Problem I am facing is that I am unable to decode the bi-input images.
Here is the decoder function
def decode_image(filename, label=None, image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(filename)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
#convert to numpy and do some cv2 staff mb?
if label is None:
return image
else:
return image, label
The issue is that I am unable to pass both images to the decoder function at the same time. How can I resolve this?
I also try to decode the image in following way
def decode(img,image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(img)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
def decode_image(left, right,labels=None ):
if labels is None:
return decode(left),decode(right)
else:
return decode(left),decode(right),labels
image=tf.data.Dataset.from_tensor_slices((left_train_paths,right_train_paths,train_labels ))
dataset=image.map(decode_image, num_parallel_calls=AUTO).repeat().shuffle(512).batch(BATCH_SIZE).prefetch(AUTO)
dataset
The output is of dataset variable is now as
<PrefetchDataset shapes: ((None, 760, 760, 3), (None, 760, 760, 3), (None, 8)), types: (tf.float32, tf.float32, tf.int64)>
How can I pass it to the model now?
Model
def get_model():
left_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
right_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
left_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=left_tensor)
right_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=right_tensor)
con = concatenate([left_model.output, right_model.output])
GAP= GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model =Model(inputs=[left_input, right_input], outputs=out)
return model
I found a pretty elegant solution. I will explain step by step since may be a bit different of what you thought:
When decoding the images stack both images in a single tensor so the input tensor will be of shape [2, IMAGE_H, IMAGE_W, 3]
def decode_single(im_path, image_size):
bits = tf.io.read_file(im_path)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
# Note that the image paths are packed in a tuple, and we unpack them inside the function
def decode(paths, label=None, image_size=(128, 128)):
image_path1, image_path2 = paths
im1 = decode_single(image_path1, image_size)
im2 = decode_single(image_path2, image_size)
images = tf.stack([im1, im2])
if label is not None:
return images, label
return images
I declare the data pipeline so the paths are packed in a tuple.
label_ds = ...
ds = tf.data.Dataset.from_tensor_slices((left_paths, right_paths))
ds = tf.data.Dataset.zip((ds, label_ds)) # returns as ((im_path1, im_path2), label)) not (im_path1, im_path2, label)
ds = ds.map(decode).batch(4)
print(ds)
# Out: <BatchDataset shapes: ((None, 2, 128, 128, 3), ((None,),)), types: (tf.float32, (tf.int32,))>
Since we are feeding batches of two images (None, 2, 128, 128, 3). Declare the model with a single input of shape (2, HEIGHT, WIDTH, 3) and then we split the input in the two images:
def get_model():
input_layer = Input(shape=(2, IMAGE_H,IMAGE_W,3))
# Split into two images
right_image, left_image = Lambda(lambda x: tf.split(x, 2, axis=1))(input_layer)
right_image = Reshape([IMAGE_H, IMAGE_W, 3])(right_image)
left_image = Reshape([IMAGE_H, IMAGE_W, 3])(left_image)
# Replace by EfficientNets
left_model = Conv2D(64, 3)(left_image)
right_model = Conv2D(64, 3)(right_image)
con = Concatenate(-1)([left_model, right_model])
GAP = GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model = tf.keras.Model(inputs=input_layer, outputs=out)
return model
Finally compile and train the model as usual:
model = get_model()
model.compile(...)
model.fit(ds, epochs=10)

In Pytorch, how to test simple image with my loaded model?

I made a alphabet classification CNN model using Pytorch, and then use that model to test it with a single image that I've never seen before. I extracted a bounding box in my handwriting image with opencv, but I don't know how to apply it to the model.
bounded my_image
this is custom dataset
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, height, width, transforms=None):
"""
Args:
csv_path (string): path to csv file
height (int): image height
width (int): image width
transform: pytorch transforms for transforms and tensor conversion
"""
self.data = pd.read_csv(csv_path)
self.labels = np.asarray(self.data.iloc[:, 0])
self.height = height
self.width = width
self.transforms = transforms
def __getitem__(self, index):
single_image_label = self.labels[index]
# Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28])
img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
# Convert image from numpy array to PIL image, mode 'L' is for grayscale
img_as_img = Image.fromarray(img_as_np)
img_as_img = img_as_img.convert('L')
# Transform image to tensor
if self.transforms is not None:
img_as_tensor = self.transforms(img_as_img)
# Return image and the label
return (img_as_tensor, single_image_label)
def __len__(self):
return len(self.data.index)
transformations = transforms.Compose([
transforms.ToTensor()
])
alphabet_from_csv = CustomDatasetFromCSV("/content/drive/My Drive/A_Z Handwritten Data.csv",
28, 28, transformations)
random_seed = 50
data_size = len(alphabet_from_csv)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
if True:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
train_dataset = SubsetRandomSampler(train_indices)
test_dataset = SubsetRandomSampler(test_indices)
train_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = train_dataset)
test_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = test_dataset)
this is my model
class ConvNet3(nn.Module):
def __init__(self, num_classes=26):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(28),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(56),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Dropout(p = 0.5),
nn.Linear(56 * 7 * 7, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Dropout(p = 0.5),
nn.Linear(512, 26),
)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet3(num_classes).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
# train phase
model.train()
# create a progress bar
batch_loss_list = []
progress = ProgressMonitor(length=len(train_dataset))
for batch, target in train_loader:
# Move the training data to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# calculate the loss
loss = loss_func( output, target )
# clear previous gradient computation
optimizer.zero_grad()
# backpropagate to compute gradients
loss.backward()
# update model weights
optimizer.step()
# update progress bar
batch_loss_list.append(loss.item())
progress.update(batch.shape[0], sum(batch_loss_list)/len(batch_loss_list) )
def test():
# test phase
model.eval()
correct = 0
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
for batch, target in test_loader:
# Move the training batch to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
# accumulate correct number
correct += (output == target).sum().item()
# Calculate test accuracy
acc = 100 * float(correct) / len(test_dataset)
print( 'Test accuracy: {}/{} ({:.2f}%)'.format( correct, len(test_dataset), acc ) )
for epoch in range(num_epochs):
print("{}'s try".format(int(epoch)+1))
train()
test()
print("-----------------------------------------------------------------------------")
this is my image to bound
import cv2
import matplotlib.image as mpimg
im = cv2.imread('/content/drive/My Drive/my_handwritten.jpg')
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
rects=[]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h < 20: continue
red = (0, 0, 255)
cv2.rectangle(im, (x, y), (x+w, y+h), red, 2)
rects.append((x,y,w,h))
cv2.imwrite('my_handwritten_bounding.png', im)
img_result = []
img_for_class = im.copy()
margin_pixel = 60
for rect in rects:
#[y:y+h, x:x+w]
img_result.append(
img_for_class[rect[1]-margin_pixel : rect[1]+rect[3]+margin_pixel,
rect[0]-margin_pixel : rect[0]+rect[2]+margin_pixel])
# Draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]),
(rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2)
count = 0
nrows = 4
ncols = 7
plt.figure(figsize=(12,8))
for n in img_result:
count += 1
plt.subplot(nrows, ncols, count)
plt.imshow(cv2.resize(n,(28,28)), cmap='Greys', interpolation='nearest')
plt.tight_layout()
plt.show()
You have already written the function test to test your net. The only thing you should do — create batch with one image with same preprocessing as images in your dataset.
def test_one_image(I, model):
'''
I - 28x28 uint8 numpy array
'''
# test phase
model.eval()
# convert image to torch tensor and add batch dim
batch = torch.tensor(I / 255).unsqueeze(0)
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
batch = batch.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
return output

Passing two queues to Tensorflow training

I'm trying to create a train operation based on CIFAR10 example from Tensorflow that uses tf.RandomShuffleQueue and my labels comes from the name of the files as mentioned in (Accessing filename from file queue in Tensor Flow). How can I use this code with that?
When I try to run the following code, where path is a directory with many files:
filenames = [path, f) for f in os.listdir(path)][1:]
file_fifo = tf.train.string_input_producer(filenames,
shuffle=False,
capacity=len(filenames))
reader = tf.WholeFileReader()
key, value = reader.read(file_fifo)
image = tf.image.decode_png(value, channels=3, dtype=tf.uint8)
image.set_shape([config.image_height, config.image_width, config.image_depth])
image = tf.cast(image, tf.float32)
image = tf.divide(image, 255.0)
labels = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in filenames]
label_fifo = tf.FIFOQueue(len(filenames), tf.int32, shapes=[[]])
label_enqueue = label_fifo.enqueue_many([tf.constant(labels)])
label = label_fifo.dequeue()
bq = tf.RandomShuffleQueue(capacity=16 * batch_size,
min_after_dequeue=8 * batch,
dtypes=[tf.float32, tf.int32])
batch_enqueue_op = bq.enqueue([image, label_enqueue])
runner = tf.train.queue_runner.QueueRunner(bq, [batch_enqueue_op] * num_threads)
tf.train.add_queue_runner(runner)
# Read 'batch' labels + images from the example queue.
images, labels = batch_queue.dequeue_many(FLAGS.batch_size)
labels = tf.reshape(labels, [FLAGS.batch_size, 1])
I get obvious erros, because I know my code doesn't make much sense. I have two FIFO queues file_fifo and label_fifo, but I don't know how to make my label_fifo input of my tf.RandomShuffleQueue.
Can someone help? Thank you :-)
I changed my code to:
filenames = [os.path.join(FLAGS.data_path, f) for f in os.listdir(FLAGS.data_path)][1:]
np.random.shuffle(filenames)
file_fifo = tf.train.string_input_producer(filenames, shuffle=False, capacity=len(filenames))
reader = tf.WholeFileReader()
key, value = reader.read(file_fifo)
image = tf.image.decode_png(value, channels=3, dtype=tf.uint8)
image.set_shape([config.image_height, config.image_width, config.image_depth])
image = tf.cast(image, tf.float32)
image = tf.divide(image, 255.0)
labels = [int(os.path.basename(f).split('_')[-1].split('.')[0]) for f in filenames]
label_fifo = tf.FIFOQueue(len(filenames), tf.int32, shapes=[[]])
label_enqueue = label_fifo.enqueue_many([tf.constant(labels)])
label = label_fifo.dequeue()
if is_train:
images, label_batch = tf.train.shuffle_batch([image, label],
batch_size=FLAGS.batch_size,
num_threads=FLAGS.num_threads,
capacity=16 * FLAGS.batch_size,
min_after_dequeue=8 * FLAGS.batch_size)
labels = tf.reshape(label_batch, [FLAGS.batch_size, 1])
For training I have:
class _LoggerHook(tf.train.SessionRunHook):
"""Logs loss and runtime."""
def begin(self):
self._step = -1
def before_run(self, run_context):
self._step += 1
self._start_time = time.time()
if self._step % int(config.train_examples / FLAGS.batch_size) == 0 or self._step == 0:
run_context.session.run(label_enqueue_op)
return tf.train.SessionRunArgs({'loss': loss, 'net': net})
and I run training as:
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_path,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()],
config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) as mon_sess:
while not mon_sess.should_stop():
mon_sess.run(train_op)
The training starts, but it runs only for the very first step and hangs - maybe because it's waiting for some queue command