I want to convert my GPU model to TPU model. My GPU model takes two input image and has the same output for both images. I use custom data generator for this. There are two parallel networks; one for each input.
From this StackOverflow question, I tried to solve this but I failed.
Here is what I tried
dataset_12 = tf.data.Dataset.from_tensor_slices((left_train_paths, right_train_paths))
dataset_label = tf.data.Dataset.from_tensor_slices(train_labels)
dataset = tf.data.Dataset.zip((dataset_12, dataset_label)).batch(2).repeat()
Problem I am facing is that I am unable to decode the bi-input images.
Here is the decoder function
def decode_image(filename, label=None, image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(filename)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
#convert to numpy and do some cv2 staff mb?
if label is None:
return image
else:
return image, label
The issue is that I am unable to pass both images to the decoder function at the same time. How can I resolve this?
I also try to decode the image in following way
def decode(img,image_size=(IMG_SIZE_h, IMG_SIZE_w)):
bits = tf.io.read_file(img)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
def decode_image(left, right,labels=None ):
if labels is None:
return decode(left),decode(right)
else:
return decode(left),decode(right),labels
image=tf.data.Dataset.from_tensor_slices((left_train_paths,right_train_paths,train_labels ))
dataset=image.map(decode_image, num_parallel_calls=AUTO).repeat().shuffle(512).batch(BATCH_SIZE).prefetch(AUTO)
dataset
The output is of dataset variable is now as
<PrefetchDataset shapes: ((None, 760, 760, 3), (None, 760, 760, 3), (None, 8)), types: (tf.float32, tf.float32, tf.int64)>
How can I pass it to the model now?
Model
def get_model():
left_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
right_tensor = Input(shape=(IMG_SIZE_h,IMG_SIZE_w,3))
left_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=left_tensor)
right_model = EfficientNetB3(input_shape = (img_shape,img_shape,3), include_top = False, weights = 'imagenet',input_tensor=right_tensor)
con = concatenate([left_model.output, right_model.output])
GAP= GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model =Model(inputs=[left_input, right_input], outputs=out)
return model
I found a pretty elegant solution. I will explain step by step since may be a bit different of what you thought:
When decoding the images stack both images in a single tensor so the input tensor will be of shape [2, IMAGE_H, IMAGE_W, 3]
def decode_single(im_path, image_size):
bits = tf.io.read_file(im_path)
image = tf.image.decode_jpeg(bits, channels=3)
image = tf.cast(image, tf.float32) / 255.0
image = tf.image.resize(image, image_size)
return image
# Note that the image paths are packed in a tuple, and we unpack them inside the function
def decode(paths, label=None, image_size=(128, 128)):
image_path1, image_path2 = paths
im1 = decode_single(image_path1, image_size)
im2 = decode_single(image_path2, image_size)
images = tf.stack([im1, im2])
if label is not None:
return images, label
return images
I declare the data pipeline so the paths are packed in a tuple.
label_ds = ...
ds = tf.data.Dataset.from_tensor_slices((left_paths, right_paths))
ds = tf.data.Dataset.zip((ds, label_ds)) # returns as ((im_path1, im_path2), label)) not (im_path1, im_path2, label)
ds = ds.map(decode).batch(4)
print(ds)
# Out: <BatchDataset shapes: ((None, 2, 128, 128, 3), ((None,),)), types: (tf.float32, (tf.int32,))>
Since we are feeding batches of two images (None, 2, 128, 128, 3). Declare the model with a single input of shape (2, HEIGHT, WIDTH, 3) and then we split the input in the two images:
def get_model():
input_layer = Input(shape=(2, IMAGE_H,IMAGE_W,3))
# Split into two images
right_image, left_image = Lambda(lambda x: tf.split(x, 2, axis=1))(input_layer)
right_image = Reshape([IMAGE_H, IMAGE_W, 3])(right_image)
left_image = Reshape([IMAGE_H, IMAGE_W, 3])(left_image)
# Replace by EfficientNets
left_model = Conv2D(64, 3)(left_image)
right_model = Conv2D(64, 3)(right_image)
con = Concatenate(-1)([left_model, right_model])
GAP = GlobalAveragePooling2D()(con)
out = Dense(8, activation = 'sigmoid')(GAP)
model = tf.keras.Model(inputs=input_layer, outputs=out)
return model
Finally compile and train the model as usual:
model = get_model()
model.compile(...)
model.fit(ds, epochs=10)
Related
I've got a working CNN model that classifies images from a custom dataset that is loaded with a csv file. The dataset is split up into training, validation and test dataset after being shuffled. Now I want to expand the image input by four extra input classes containing info / metadata about the images.
I've already learnt that I should split up my cnn model into two branches, one for the images and one for the extra input. My question is, how must I modify my data input so that the model can correctly process both images and additional input?
I'm very new to creating neural networks in tensorflow. My entire code is basically from this website. However, none of the topics could solve the problem for my code.
This is my code: (additional metadata are called usages, completions, heights, constructions)
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from keras.callbacks import History
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
import io
# READ IMAGES, METADATA AND LABELS
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)
file_paths = df['file_name'].values
labels = df['label'].values
usages = df['usage'].values
completions = df['completion'].values
heights = df['height'].values
constructions = df['construction'].values
# SPLITTING THE DATASET INTO 80 % TRAINING DATA, 10 % VALIDATION DATA, 10 % TEST DATA
dataset_size = len(df.index)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = int(0.1 * dataset_size)
img_height = 350
img_width = 350
batch_size = 16
autotune = tf.data.experimental.AUTOTUNE
# FUNCTION TO READ AND NORMALIZE THE IMAGES
def read_image(image_file, label, usg, com, hei, con):
image = tf.io.read_file(image_file)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, (img_width, img_height))
return tf.cast(image, tf.float32) / 255.0, label, \
tf.cast(usg, tf.float32), tf.cast(com, tf.float32), \
tf.cast(hei, tf.float32), tf.cast(con, tf.float32)
# FUNCTION FOR DATA AUGMENTATION
def augment(image, labeL, usg, com, hei, con):
if tf.random.uniform((), minval=0, maxval=1) < 0.1:
image = tf.tile(tf.image.rgb_to_grayscale(image), [1, 1, 3])
image = tf.image.random_brightness(image, max_delta=0.25)
image = tf.image.random_contrast(image, lower=0.75, upper=1.25)
image = tf.image.random_saturation(image, lower=0.75, upper=1.25)
image = tf.image.random_flip_left_right(image)
return image, label, usg, com, hei, con
# SETUP FOR TRAINING, VALIDATION & TEST DATASET
ds_train = ds_train.map(read_image, num_parallel_calls=autotune)
ds_train = ds_train.cache()
ds_train = ds_train.map(augment, num_parallel_calls=autotune)
ds_train = ds_train.batch(batch_size)
ds_train = ds_train.prefetch(autotune)
ds_val = ds_val.map(read_image, num_parallel_calls=autotune)
ds_val = ds_val.batch(batch_size)
ds_val = ds_val.prefetch(autotune)
ds_test = ds_test.map(read_image, num_parallel_calls=autotune)
ds_test = ds_test.batch(batch_size)
ds_test = ds_test.prefetch(autotune)
## HOW TO SPLIT UP THE DATASET FOR THE MODEL FROM HERE? ##
# DEFINING FUNCTIONAL MODEL
input_img = keras.Input(shape=(img_width, img_height, 3))
input_dat = keras.Input(shape=(4,)) # how is this shape supposed to be?
x = layers.Conv2D(16, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(input_img)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
out1 = layers.Flatten()(x)
out2 = layers.Dense(128, activation='relu')(input_dat)
merge = layers.concatenate([out1, out2])
x = layers.Dense(256, activation='relu')(merge)
x = layers.Dropout(0.35)(x)
output = layers.Dense(8, activation='sigmoid')(x)
model = keras.Model(inputs=[input_img, input_dat], outputs=output)
history = History()
no_overfit = keras.callbacks.EarlyStopping(monitor='val_loss', # stop training when overfitting occurs
min_delta=0.015, patience=1,
verbose=2, mode='auto')
# TRAINING STEP
model.compile(
optimizer=keras.optimizers.Adam(3e-5),
loss=[keras.losses.SparseCategoricalCrossentropy()],
metrics=["accuracy"])
model.fit(ds_train, epochs=30, callbacks=[no_overfit, history],
verbose=1, validation_data=ds_val)
So far I've only added the extra inputs to the dataset tensor and changed the model structure. How exactly do I split my dataset into input_img and input_dat so that each model branch will receive their proper input?
Also I have a custom test step in order to plot a confusion matrix. How is this supposed to be modified? Here the working code, for just the image input:
y_true = []
y_pred = []
for x, y in ds_test:
y_true.append(y)
predicts = model.predict(x) # compute model predictions for test step
y_pred.append(np.argmax(predicts, axis=-1))
true = tf.concat([item for item in y_true], axis=0)
pred = tf.concat([item for item in y_pred], axis=0)
cm = confusion_matrix(true, pred) # confusion matrix from seaborn
testacc = np.trace(cm) / float(np.sum(cm)) # calculating test accuracy
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10, 10))
color = sns.light_palette("seagreen", as_cmap=False)
sns.heatmap(cm, annot=True, square=True, cmap=color, fmt=".3f",
linewidths=0.6, linecolor='k', cbar_kws={"shrink": 0.8})
plt.yticks(rotation=0)
plt.xlabel('\nPredicted Labels', fontsize=18)
plt.ylabel('True Labels\n', fontsize=18)
plt.title('Multiclass Model - Confusion Matrix (Test Step)\n', fontsize=24)
plt.text(10, 1.1, 'Accuracy = {:0.4f}'.format(testacc), fontsize=20)
ax.axhline(y=8, color='k', linewidth=1.5) # depending on amount of classes
ax.axvline(x=8, color='k', linewidth=1.5)
plt.show()
print('\naccuracy: {:0.4f}'.format(testacc))
Any help is greatly appreciated!!
I'd like to try image segmentation with my grayscale tif images (the shape of original images are (512,512) and the value of each pixel is between 0-2 or NaN which is in float32 type and the mask images have 0, 1, or NaN also in float32 type). I followed Google Colab and tensorflow tutorial to create the following code:
from glob import glob
from PIL import Image
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models
from tensorflow.python.keras import backend as K
#get the path of my data
img = sorted(glob('train_sub_5/*.tif'))
mask = sorted(glob('train_mask_sub_5/*.tif'))
#split into train and test data
img, img_val, mask, mask_val = train_test_split(img, mask, test_size=0.2, random_state=42)
#load image as array and append to a list
train_image = []
for m in img:
img= Image.open(m)
img_arr = np.array(img)
stacked_img = np.stack((img_arr,)*1, axis=-1)
train_image.append(stacked_img)
train_mask = []
for n in mask:
mask= Image.open(n)
mask_arr= np.array(mask)
stacked_mask = np.stack((mask_arr,)*1, axis=-1)
train_mask.append(stacked_mask)
test_img = []
for o in img_val:
img= Image.open(o)
img_arr = np.array(img)
stacked_img = np.stack((img_arr,)*1, axis=-1)
test_img.append(stacked_img)
test_mask = []
for p in mask_val:
mask= Image.open(p)
mask_arr = np.array(mask)
stacked_mask = np.stack((mask_arr,)*1, axis=-1)
test_mask.append(stacked_mask)
#create TensorSliceDataset
for i, j in zip(train_image, train_mask):
train= tf.data.Dataset.from_tensor_slices(([i], [j]))
for k, l in zip(test_img, test_mask):
test= tf.data.Dataset.from_tensor_slices(([k], [l]))
#for visualization
def display(display_list):
plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']
for i in range(len(display_list)):
plt.subplot(1, len(display_list), i+1)
plt.title(title[i])
plt.imshow(display_list[i])
plt.axis('off')
plt.show()
for img, mask in train.take(1):
sample_image = img.numpy()[:,:,0]
sample_mask = mask.numpy()[:,:,0]
display([sample_image, sample_mask])
The output of the visualization looks normal like below:
out put of the visualization
#build the model
train_length = len(train_image)
img_shape = (512,512,1)
batch_size = 8
buffer_size = 5
epochs = 5
train_dataset = train.cache().shuffle(train_length).batch(batch_size).repeat()
train_dataset = train_dataset.prefetch(buffer_size)
test_dataset = test.batch(batch_size).repeat()
def conv_block(input_tensor, num_filters):
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
encoder = layers.BatchNormalization()(encoder)
encoder = layers.Activation('relu')(encoder)
return encoder
def encoder_block(input_tensor, num_filters):
encoder = conv_block(input_tensor, num_filters)
encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)
return encoder_pool, encoder
def decoder_block(input_tensor, concat_tensor, num_filters):
decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
decoder = layers.BatchNormalization()(decoder)
decoder = layers.Activation('relu')(decoder)
return decoder
inputs = layers.Input(shape=img_shape)
# 256
encoder0_pool, encoder0 = encoder_block(inputs, 32)
# 128
encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64)
# 64
encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128)
# 32
encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256)
# 16
encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512)
# 8
center = conv_block(encoder4_pool, 1024)
# center
decoder4 = decoder_block(center, encoder4, 512)
# 16
decoder3 = decoder_block(decoder4, encoder3, 256)
# 32
decoder2 = decoder_block(decoder3, encoder2, 128)
# 64
decoder1 = decoder_block(decoder2, encoder1, 64)
# 128
decoder0 = decoder_block(decoder1, encoder0, 32)
# 256
outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)
model = models.Model(inputs=[inputs], outputs=[outputs])
def dice_coeff(y_true, y_pred):
smooth = 1.
# Flatten
y_true_f = tf.reshape(y_true, [-1])
y_pred_f = tf.reshape(y_pred, [-1])
intersection = tf.reduce_sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dice_coeff(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
model.compile(optimizer='adam', loss=bce_dice_loss, metrics=[dice_loss])
model.summary()
#save model
save_model_path = 'tmp/weights.hdf5'
cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_dice_loss', mode='max', save_best_only=True)
#start training
history = model.fit(train_dataset,
steps_per_epoch=int(np.ceil(train_length / float(batch_size))),
epochs=epochs,
validation_data=test_dataset,
validation_steps=int(np.ceil(len(test_img) / float(batch_size))),
callbacks=[cp])
#training process visualization
dice = history.history['dice_loss']
val_dice = history.history['val_dice_loss']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, dice, label='Training Dice Loss')
plt.plot(epochs_range, val_dice, label='Validation Dice Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Dice Loss')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
The output of the training process visualization looks like below:
The output of the training process visualization
The model seems functioning.
#make prediction
def show_predictions(dataset=None, num=1):
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
display([image[0,:,:,0], mask[0,:,:,0], create_mask(pred_mask)])
def create_mask(pred_mask):
pred_mask = tf.argmax(pred_mask, axis=-1)
pred_mask = pred_mask[..., tf.newaxis]
return pred_mask[0,:,:,0]
show_predictions(test_dataset, 3)
The output of the prediction is below:
The output of predictions
I tried to inspect the variables test and test_dataset using:
for img, mask in test:
print(img,mask)
But I only got one image array and one mask array. Does it mean that there's only one image array and one mask array in the dataset? What's wrong with my code creating train and test TensorSliceDataset?
The Second question is why I got the predicted mask blank? Is it because some of my patches have nan? As you can see in output, the white part of the input image and the true mask, the sea is represented by NaN. If this is the problem, how do I set the value for NaN if I hope the model can ignore sea?
Thank you for your help.
def display(display_list):
fig = plt.figure(figsize=(15, 15))
title = ['Input Image', 'True Mask', 'Predicted Mask']
for i in range(len(display_list)):
plt.subplot(1, len(display_list), i + 1)
plt.title(title[i])
plt.imshow(tf.keras.preprocessing.image.array_to_img
(display_list[i]))
plt.axis('off')
plt.show()
def show_predictions(dataset=None, num=1):
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
pred_mask *= 255.0
print(pred_mask.min())
print(pred_mask.max())
print(np.unique(pred_mask, return_counts=True))
display([image[0], mask[0], pred_mask[0]])
show_predictions(test_dataset, 3)
I made a alphabet classification CNN model using Pytorch, and then use that model to test it with a single image that I've never seen before. I extracted a bounding box in my handwriting image with opencv, but I don't know how to apply it to the model.
bounded my_image
this is custom dataset
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, height, width, transforms=None):
"""
Args:
csv_path (string): path to csv file
height (int): image height
width (int): image width
transform: pytorch transforms for transforms and tensor conversion
"""
self.data = pd.read_csv(csv_path)
self.labels = np.asarray(self.data.iloc[:, 0])
self.height = height
self.width = width
self.transforms = transforms
def __getitem__(self, index):
single_image_label = self.labels[index]
# Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28])
img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
# Convert image from numpy array to PIL image, mode 'L' is for grayscale
img_as_img = Image.fromarray(img_as_np)
img_as_img = img_as_img.convert('L')
# Transform image to tensor
if self.transforms is not None:
img_as_tensor = self.transforms(img_as_img)
# Return image and the label
return (img_as_tensor, single_image_label)
def __len__(self):
return len(self.data.index)
transformations = transforms.Compose([
transforms.ToTensor()
])
alphabet_from_csv = CustomDatasetFromCSV("/content/drive/My Drive/A_Z Handwritten Data.csv",
28, 28, transformations)
random_seed = 50
data_size = len(alphabet_from_csv)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
if True:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
train_dataset = SubsetRandomSampler(train_indices)
test_dataset = SubsetRandomSampler(test_indices)
train_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = train_dataset)
test_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = test_dataset)
this is my model
class ConvNet3(nn.Module):
def __init__(self, num_classes=26):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(28),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(56),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Dropout(p = 0.5),
nn.Linear(56 * 7 * 7, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Dropout(p = 0.5),
nn.Linear(512, 26),
)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet3(num_classes).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
# train phase
model.train()
# create a progress bar
batch_loss_list = []
progress = ProgressMonitor(length=len(train_dataset))
for batch, target in train_loader:
# Move the training data to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# calculate the loss
loss = loss_func( output, target )
# clear previous gradient computation
optimizer.zero_grad()
# backpropagate to compute gradients
loss.backward()
# update model weights
optimizer.step()
# update progress bar
batch_loss_list.append(loss.item())
progress.update(batch.shape[0], sum(batch_loss_list)/len(batch_loss_list) )
def test():
# test phase
model.eval()
correct = 0
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
for batch, target in test_loader:
# Move the training batch to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
# accumulate correct number
correct += (output == target).sum().item()
# Calculate test accuracy
acc = 100 * float(correct) / len(test_dataset)
print( 'Test accuracy: {}/{} ({:.2f}%)'.format( correct, len(test_dataset), acc ) )
for epoch in range(num_epochs):
print("{}'s try".format(int(epoch)+1))
train()
test()
print("-----------------------------------------------------------------------------")
this is my image to bound
import cv2
import matplotlib.image as mpimg
im = cv2.imread('/content/drive/My Drive/my_handwritten.jpg')
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
rects=[]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h < 20: continue
red = (0, 0, 255)
cv2.rectangle(im, (x, y), (x+w, y+h), red, 2)
rects.append((x,y,w,h))
cv2.imwrite('my_handwritten_bounding.png', im)
img_result = []
img_for_class = im.copy()
margin_pixel = 60
for rect in rects:
#[y:y+h, x:x+w]
img_result.append(
img_for_class[rect[1]-margin_pixel : rect[1]+rect[3]+margin_pixel,
rect[0]-margin_pixel : rect[0]+rect[2]+margin_pixel])
# Draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]),
(rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2)
count = 0
nrows = 4
ncols = 7
plt.figure(figsize=(12,8))
for n in img_result:
count += 1
plt.subplot(nrows, ncols, count)
plt.imshow(cv2.resize(n,(28,28)), cmap='Greys', interpolation='nearest')
plt.tight_layout()
plt.show()
You have already written the function test to test your net. The only thing you should do — create batch with one image with same preprocessing as images in your dataset.
def test_one_image(I, model):
'''
I - 28x28 uint8 numpy array
'''
# test phase
model.eval()
# convert image to torch tensor and add batch dim
batch = torch.tensor(I / 255).unsqueeze(0)
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
batch = batch.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
return output
I'm trying to get my first net running. The following error occures:
ValueError: Error when checking input: expected dense_125_input to have 2 dimensions, but got array with shape (192, 192, 1)
# ... images 300 px width/height
def preprocess_image(image):
image = tf.image.decode_jpeg(image, channels=1)
image = tf.image.resize(image, [192, 192])
image /= 255.0 # normalize to [0,1] range
return image
# creating the dataset
def prepare_data_train(path, label_from_filename, show=False):
images = []
labels = []
for file in glob.glob(path + '*.jpg'):
label = label_from_filename(file)
if label != False:
images.append(file)
labels.append(label)
path_ds = tf.data.Dataset.from_tensor_slices(images)
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(labels, tf.int32))
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
# shuffling, batch size
BATCH_SIZE = 20
image_count = len(images)
# Setting a shuffle buffer size as large as the dataset ensures that the data is
# completely shuffled.
ds = image_label_ds.shuffle(buffer_size=image_count)
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)
# `prefetch` lets the dataset fetch batches, in the background while the model is training.
ds = ds.prefetch(buffer_size=AUTOTUNE)
keras_ds = ds.map(change_range)
image_batch, label_batch = next(iter(keras_ds))
return image_label_ds
# running ...
model = Sequential()
model.add(Dense(100, activation='relu', input_shape=(192,)))
model.add(Dense(2, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
train_ds = prepare_data_train(path_train, label_from_filename, False)
validation_ds = prepare_data_test(path_test, label_from_filename, False)
# error when fitting
history = model.fit(train_ds,
batch_size=20,
epochs=10,
verbose=2,
validation_steps=2,
steps_per_epoch=2,
validation_data=validation_ds)
How to resolve it? Is reshaping needed, how?
Based on the images the net should predict 1 or 2.
The error comes from this line in your code:
model.add(Dense(100, activation='relu', input_shape=(192,)))
Namely, the shape of your input is 3 dimensional [width, height, channels] or [192, 192, 1]. So, if you really want to have that dense layer at the start, change the model definition to:
model = Sequential()
model.add(Flatten(input_shape=[192, 192, 1]))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
The line model.add(Flatten(input_shape=[192, 192, 1])) will flatten your input to be a single vector for each element in the batch. Then, you can proceed as you want.
This error comes when You put wrong train data to model.fit attribute. Check Your train_ds by print(train_ds) before passing it to model.fit. It should return something like:
<SkipDataset shapes: ((192, 192, 1), ()), types: (tf.float32, tf.int64)>
Can i get a full example somewhere where they feed tf.data.Dataset iterator to a model? I'm trying to feed this data into a model without the help of tf.Estimators.
def preprocess_image(image):
image = tf.image.decode_jpeg(image, channels=1)
image = tf.image.resize_images(image, [224, 224])
image = tf.image.random_flip_left_right(image)
image /= 255.0
image = tf.cast(image, tf.float32)
image = tf.train.shuffle_batch([image],batch_size=16, num_threads=10, capacity=100000, min_after_dequeue=15)
return image
def load_and_preprocess_image(path):
image = tf.read_file(path)
return preprocess_image(image)
train_data_dx = tf.data.Dataset.from_tensor_slices(xray_data_train['full_path'].values)
train_data_dx = train_data_dx.map(load_and_preprocess_image, num_parallel_calls=8)
train_data_dy = xray_data_train['Finding_strings']
print(train_data_dx.output_shapes)
print(train_data_dx.output_types)
test_data_dx = tf.data.Dataset.from_tensor_slices(xray_data_test['full_path'].values)
test_data_dx = test_data_dx.map(load_and_preprocess_image, num_parallel_calls=8)
test_data_dy = xray_data_test['Finding_strings']
Here's a full example.
Note
Iterator must be initialized at the beginning
We can set number of epochs to perform by using repeat() method of number of epochs and batch() method for batch size. Note that I use first repeat() and then batch().
At each iteration we're using tf.Session() interface to access the next batch.
We use try-except since when repetition of data ends it raises tf.error.OutOfRangeError.
import tensorflow as tf
from sklearn.datasets import make_blobs
# generate dummy data for illustration
x_train, y_train = make_blobs(n_samples=25,
n_features=2,
centers=[[1, 1], [-1, -1]],
cluster_std=0.5)
n_epochs = 2
batch_size = 10
with tf.name_scope('inputs'):
x = tf.placeholder(tf.float32, shape=[None, 2])
y = tf.placeholder(tf.int32, shape=[None])
with tf.name_scope('logits'):
logits = tf.layers.dense(x,
units=2,
name='logits')
with tf.name_scope('loss'):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss_tensor = tf.reduce_mean(xentropy)
with tf.name_scope('optimizer'):
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss_tensor)
# create dataset `from_tensor_slices` and create iterator
dataset = tf.data.Dataset.from_tensor_slices({'x':x_train, 'y':y_train})
dataset = dataset.repeat(n_epochs).batch(10)
iterator = dataset.make_initializable_iterator()
with tf.Session() as sess:
sess.run([tf.global_variables_initializer(),
iterator.initializer]) # <-- must be initialized!
next_batch = iterator.get_next()
while True:
try:
batch = sess.run(next_batch) # <-- extract next batch
loss_val, _ = sess.run([loss_tensor, train_op],
feed_dict={x:batch['x'], y:batch['y']})
print(loss_val)
except tf.errors.OutOfRangeError:
break