Tensorflow meets an error when rotating images - tensorflow

I want to implement data argumentation by rotating images in Tensorflow. After searching the relative material in the stack overflow, one better answer is found according to zimmermc.
def rotate_image_tensor(image, angle, mode='black'):
Rotates a 3D tensor (HWD), which represents an image by given radian angle.
New image has the same size as the input image.
mode controls what happens to border pixels.
mode = 'black' results in black bars (value 0 in unknown areas)
mode = 'white' results in value 255 in unknown areas
mode = 'ones' results in value 1 in unknown areas
mode = 'repeat' keeps repeating the closest pixel known
s = image.get_shape().as_list()
assert len(s) == 3, "Input needs to be 3D."
assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode."
image_center = [np.floor(x/2) for x in s]
# Coordinates of new image
coord1 = tf.range(s[0])
coord2 = tf.range(s[1])
# Create vectors of those coordinates in order to vectorize the image
coord1_vec = tf.tile(coord1, [s[1]])
coord2_vec_unordered = tf.tile(coord2, [s[0]])
coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]])
coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1])
# center coordinates since rotation center is supposed to be in the image center
coord1_vec_centered = coord1_vec - image_center[0]
coord2_vec_centered = coord2_vec - image_center[1]
coord_new_centered = tf.cast(tf.pack([coord1_vec_centered, coord2_vec_centered]), tf.float32)
# Perform backward transformation of the image coordinates
rot_mat_inv = tf.dynamic_stitch([[0], [1], [2], [3]], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)])
rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2])
coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered)
# Find nearest neighbor in old image
coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32)
coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32)
# Clip values to stay inside image coordinates
if mode == 'repeat':
coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1)
coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1)
outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0))
outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0))
outside_ind = tf.logical_or(outside_ind1, outside_ind2)
coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind))
coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind))
coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind))
coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind))
coord_old_clipped = tf.cast(tf.transpose(tf.pack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32)
# Coordinates of the new image
coord_new = tf.transpose(tf.cast(tf.pack([coord1_vec, coord2_vec]), tf.int32), [1, 0])
image_channel_list = tf.split(2, s[2], image)
image_rotated_channel_list = list()
for image_channel in image_channel_list:
image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped)
if (mode == 'black') or (mode == 'repeat'):
background_color = 0
elif mode == 'ones':
background_color = 1
elif mode == 'white':
background_color = 255
image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values,
background_color, validate_indices=False))
image_rotated = tf.transpose(tf.pack(image_rotated_channel_list), [1, 2, 0])
return image_rotated
when implementing the above codes, I meet an error as follow.
How to solve it? Thanks very much!
image_center = [np.floor(x/2) for x in s] TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
I feed data to the graph by use of input pipeline method. When debuging the codes, s = [None, None, 3]. The url of the source code is tensorflow: how to rotate an image for data augmentation?

Your input image is most likely a tf.placeholder with variable dimensions.
For example, an image with undefined height:
image = tf.placeholder(tf.float32, shape=[None, 365, 3])
When you evaluate your graph, you can get the actual dimensions:
s = tf.shape(image) # Returns a Tensor, not a list
image_center = tf.floor(s / 2)
You can't use numpy, as this calculation needs to occur as part of the Graph.
As an aside, you should use tf.contrib.image.rotate now.


How to access tensor shape inside map function

I need to access image shapes to perform an augmentation pipeline although when accessing through image.shape[0] and image.shape[1] I'm unable to perform the augmentations since it outputs that my tensors have shape None.
Related issues: How to access Tensor shape in .map?
Appreciate if anyone could help.
parsed_dataset = tf.data.TFRecordDataset(filenames=train_records_paths).map(parsing_fn) # Returns [image,label]
augmented_dataset = parsed_dataset.map(augment_pipeline)
augmented_dataset = augmented_dataset.unbatch()
Mapped function
5 Versions of the original image: 4 corner crops + a central crop and the respective labels.
def augment_pipeline(original_image,label):
central_crop = lambda image: tf.image.central_crop(image,0.5)
corner_crops = lambda image: tf.image.extract_patches(images=tf.expand_dims(image,0), # Transform image in a batch of single sample
sizes=[1, int(0.5 * image.shape[0]), int(0.5 * image.shape[1]), 1], # 50% of the image's height and width
rates=[1, 1, 1, 1],
strides=[1, int(0.5 * image.shape[0]), int(0.5 * image.shape[1]), 1],
reshaped_patches = tf.reshape(corner_crops(original_image), [-1,int(0.5*original_image.shape[0]),int(0.5*original_image.shape[1]),3])
images = tf.concat([reshaped_patches,tf.expand_dims(central_crop(original_image),axis=0)],axis=0)
label = tf.reshape(label,[1,1])
labels = tf.tile(label,[5,1])
return images,labels
After further research i was able to manage by using py_func as suggested here and tf.shape(image)[0] here.
5 Versions of the original image: 4 corner crops + a central crop and the respective labels.
def augment_pipeline(original_image,label):
height = int(tf.shape(original_image)[0].numpy() * 0.5) # 50% of the image's height and width
width = int(tf.shape(original_image)[1].numpy() * 0.5)
central_crop = lambda image: tf.image.central_crop(image,0.5)
corner_crops = lambda image: tf.image.extract_patches(images=tf.expand_dims(image,0), # Transform image in a batch of single sample
sizes=[1, height, width, 1],
rates=[1, 1, 1, 1],
strides=[1, height, width, 1],
Then we use py_func to allow accessing numpy values inside map function:
parsed_dataset = tf.data.TFRecordDataset(filenames=train_records_paths).map(parsing_fn) # Returns [image,label]
augmented_dataset = parsed_dataset.map(lambda image,label: tf.py_function(func=augment_pipeline,
augmented_dataset = augmented_dataset.unbatch()
Every Dataset object is iterable. Now the Dataset object can either be in the batched form or the unbatched form. I will tell you how to get their elements shapes in both the cases.
Case 1. Dataset object is in unbatched form.
Method 1. Consuming its elements using iter
it = iter(dataset)
element = next(it)
image,label = element
## element is a tuple
Method 2. using take
element = dataset.take(1)
image,label = element
# element is a tuple
Case 2. When the dataset is batched. Now I assume that the dataset contains (image,label) tuples
Method 1. Using iter
it = iter(dataset)
batch = next(it)
images,labels = batch
## batch is a tuple check it using type(batch)
Method 2. Using take
batch = dataset.take(1)
## Note here each element of the dataset is a batch and each batch contains some number of
## (image,label) tuples
batch = next(iter(batch))
images,labels = batch
## batch is again a tuple

How do I make the bounding boxes in yolo v3 tighter (closer to the objects)?

I'm following this Repo on creating Yolo v3 model from scratch in PyTorch. The only problem is that the bounding boxes are not as tight (close to the objects) in most images I tried. I compared them to the tutorial on creating Yolo v3 model but using TensorFlow. The tensorflow model produces excellent bounding boxed that are as tight as possible to the objects.
I tried to understand how the calculations are different between the two, but I'm finding myself getting stuck with the differences between torch and tf.
I believe the code for the bounding boxes in the tf tutorial comes from here:
def yolo_layer(inputs, n_classes, anchors, img_size, data_format):
"""Creates Yolo final detection layer.
Detects boxes with respect to anchors.
inputs: Tensor input.
n_classes: Number of labels.
anchors: A list of anchor sizes.
img_size: The input size of the model.
data_format: The input format.
Tensor output.
n_anchors = len(anchors)
inputs = tf.layers.conv2d(inputs, filters=n_anchors * (5 + n_classes),
kernel_size=1, strides=1, use_bias=True,
shape = inputs.get_shape().as_list()
grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3]
if data_format == 'channels_first':
inputs = tf.transpose(inputs, [0, 2, 3, 1])
inputs = tf.reshape(inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1],
5 + n_classes])
strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1])
box_centers, box_shapes, confidence, classes = \
tf.split(inputs, [2, 2, 1, n_classes], axis=-1)
x = tf.range(grid_shape[0], dtype=tf.float32)
y = tf.range(grid_shape[1], dtype=tf.float32)
x_offset, y_offset = tf.meshgrid(x, y)
x_offset = tf.reshape(x_offset, (-1, 1))
y_offset = tf.reshape(y_offset, (-1, 1))
x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
box_centers = tf.nn.sigmoid(box_centers)
box_centers = (box_centers + x_y_offset) * strides
anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1])
box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)
confidence = tf.nn.sigmoid(confidence)
classes = tf.nn.sigmoid(classes)
inputs = tf.concat([box_centers, box_shapes,
confidence, classes], axis=-1)
return inputs
While the code for the bounding boxes for the pytorch model comes from here, and the explanation:
def bbox_iou(box1, box2):
Returns the IoU of two bounding boxes
#Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
#get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)
#Intersection area
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
#Union Area
b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
iou = inter_area / (b1_area + b2_area - inter_area)
return iou
def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
batch_size = prediction.size(0)
stride = inp_dim // prediction.size(2)
grid_size = inp_dim // stride
bbox_attrs = 5 + num_classes
num_anchors = len(anchors)
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
prediction = prediction.transpose(1,2).contiguous()
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
#Sigmoid the centre_X, centre_Y. and object confidencce
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
#Add the center offsets
grid = np.arange(grid_size)
a,b = np.meshgrid(grid, grid)
x_offset = torch.FloatTensor(a).view(-1,1)
y_offset = torch.FloatTensor(b).view(-1,1)
if CUDA:
x_offset = x_offset.cuda()
y_offset = y_offset.cuda()
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
prediction[:,:,:2] += x_y_offset
#log space transform height and the width
anchors = torch.FloatTensor(anchors)
if CUDA:
anchors = anchors.cuda()
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
prediction[:,:,:4] *= stride
return prediction

High Eigen values always for Edge detection

I am trying to understand Harris detector, using the explanation here. As per explanation, I understand, if we calculate the eigen values, then,
However, when I try to calculate the eigen values are always high. Below is my main image from which I extract parts to calculate eigen values.
For a flat area with no visible features, I get this distribution (on right most) which is good, but eigen values are large
For a linear edge, also I get high eigen values: 16290305.45393251 567780.54606749
For corner, it is expected to get high values, but now I am doubtful if these high values are correct due to above cases.
8958127.80563239 10986758.19436761
Here is my method, translated from matlab code here. Its the vals value I directly get from numpy's linear algebra library.
def plot_derivatives_1(img_rgb, mode=1):
img_rgb = image in rgb color space (3 channeled)
img_1c = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
if mode == 1: # method 1 derivative
Ix = cv2.Sobel(img_1c, cv2.CV_64F, 1, 0, ksize=3)
Iy = cv2.Sobel(img_1c, cv2.CV_64F, 0, 1, ksize=3)
# another method of derivatives
dx = np.array([
[-1, 0, 1],
[-1, 0, 1],
[-1, 0, 1]
dy = np.transpose(dx)
Ix = signal.convolve2d(img_1c, dx, mode='valid')
Iy = signal.convolve2d(img_1c, dy, mode='valid')
Ix, Iy = Ix.astype(np.float64), Iy.astype(np.float64) # else gaussian blur later is failing
# yet to solve why we need A and eigen outputs
A = np.array([
[ np.sum(Ix*Ix), np.sum(Ix*Iy) ],
[ np.sum(Ix*Iy), np.sum(Iy*Iy) ]
vals, V = linalg.eig(A)
lamb = vals/np.max(vals)
print('lambda values:{}'.format(vals))
fig, ax = plt.subplots(1,4, figsize=(20,5))
ax[0].imshow(img_rgb);ax[0].set_title('Input Image')
ax[1].imshow(Ix, cmap='gray');ax[1].set_title('$I_x = \dfrac{\partial I}{\partial x}$')
ax[2].imshow(Iy, cmap='gray');ax[2].set_title('$I_y = \dfrac{\partial I}{\partial y}$')
ax[3].scatter(Ix, Iy);ax[3].set_xlim([-200,200]);ax[3].set_ylim([-200,200]);
ax[3].set_aspect('equal');ax[3].set_title('Derivatives Distribution');
ax[3].axvline(x=0, color = 'r');ax[3].axhline(y=0, color ='r')
return Ix, Iy
A sample call for a case (here shown for corner).
img = cv2.imread(SRC_FOLDER + 'checkersandbooksmall_sample_6.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
Ix, Iy = plot_derivatives_1(img_rgb, mode=1)
I use jupyter notebook and the code is just built as I try to understand the concept.
What am I doing wrong to get high eigen values always for all cases?
The sample images used for above cases could be found here

Tensorflow : Predict in Recurrent Neural Networks for Drawing Classification tutorial

I used the tutorial code from https://www.tensorflow.org/tutorials/recurrent_quickdraw and all works fine until I tried to make a prediction instead of just evaluate it.
I wrote a new input function for prediction, based on the code in create_dataset.py
def predict_input_fn():
def parse_line(stroke_points):
"""Parse an ndjson line and return ink (as np array) and classname."""
inkarray = json.loads(stroke_points)
stroke_lengths = [len(stroke[0]) for stroke in inkarray]
total_points = sum(stroke_lengths)
np_ink = np.zeros((total_points, 3), dtype=np.float32)
current_t = 0
for stroke in inkarray:
for i in [0, 1]:
np_ink[current_t:(current_t + len(stroke[0])), i] = stroke[i]
current_t += len(stroke[0])
np_ink[current_t - 1, 2] = 1 # stroke_end
# Preprocessing.
# 1. Size normalization.
lower = np.min(np_ink[:, 0:2], axis=0)
upper = np.max(np_ink[:, 0:2], axis=0)
scale = upper - lower
scale[scale == 0] = 1
np_ink[:, 0:2] = (np_ink[:, 0:2] - lower) / scale
# 2. Compute deltas.
np_ink = np_ink[1:, 0:2] - np_ink[0:-1, 0:2]
np_ink = np_ink[1:, :]
features = {}
features["ink"] = tf.train.Feature(float_list=tf.train.FloatList(value=np_ink.flatten()))
features["shape"] = tf.train.Feature(int64_list=tf.train.Int64List(value=np_ink.shape))
f = tf.train.Features(feature=features)
example = tf.train.Example(features=f)
#t = tf.constant(np_ink)
return example
def parse_example(example):
"""Parse a single record which is expected to be a tensorflow.Example."""
# feature_to_type = {
# "ink": tf.VarLenFeature(dtype=tf.float32),
# "shape": tf.FixedLenFeature((0,2), dtype=tf.int64)
# }
feature_to_type = {
"ink": tf.VarLenFeature(dtype=tf.float32),
"shape": tf.FixedLenFeature([2], dtype=tf.int64)
example_proto = example.SerializeToString()
parsed_features = tf.parse_single_example(example_proto, feature_to_type)
parsed_features["ink"] = tf.sparse_tensor_to_dense(parsed_features["ink"])
return parsed_features
example = parse_line(FLAGS.predict_input_stroke_data)
features = parse_example(example)
dataset = tf.data.Dataset.from_tensor_slices(features)
# Our inputs are variable length, so pad them.
dataset = dataset.padded_batch(FLAGS.batch_size, padded_shapes=dataset.output_shapes)
iterator = dataset.make_one_shot_iterator()
next_feature_batch = iterator.get_next()
return next_feature_batch, None # In prediction, we have no labels
I modified the existing model_fn() function and added below at appropirate place
predictions = tf.argmax(logits, axis=1)
if mode == tf.estimator.ModeKeys.PREDICT:
preds = {
"class_index": predictions,
"probabilities": tf.nn.softmax(logits),
'logits': logits
return tf.estimator.EstimatorSpec(mode, predictions=preds)
However when i call the following the code
if (FLAGS.predict_input_stroke_data != None):
# prepare_input_tfrecord_for_prediction()
# predict_results = estimator.predict(input_fn=get_input_fn(
# mode=tf.estimator.ModeKeys.PREDICT,
# tfrecord_pattern=FLAGS.predict_input_temp_file,
# batch_size=FLAGS.batch_size))
predict_results = estimator.predict(input_fn=predict_input_fn)
for idx, prediction in enumerate(predict_results):
type = prediction["class_ids"][0] # Get the predicted class (index)
print("Prediction Type: {}\n".format(type))
I get the following error, what is wrong in my code could anyone please help me. I have tried quite a few things to get the shape right but i am unable to. I also tried to first write my strokes data as a tfrecord and then use the existing input_fn to read from the tfrecord that gives me similar errors but slighly different
File "/Users/farooq/.virtualenvs/tensor1.0/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn
File "/Users/farooq/.virtualenvs/tensor1.0/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Shape must be rank 2 but is rank 1 for 'Slice' (op: 'Slice') with input shapes: [?], [2], [2].
I finally solved the problem by taking my input keystrokes, writing them to disk as a TFRecord. I also had to write the same inputstrokes batch_size times to same TFRecord, else i got the shape mismatch errors. And then invoking predict worked.
The main addition for prediction was the following function
def create_tfrecord_for_prediction(batch_size, stoke_data, tfrecord_file):
def parse_line(stoke_data):
"""Parse provided stroke data and ink (as np array) and classname."""
inkarray = json.loads(stoke_data)
stroke_lengths = [len(stroke[0]) for stroke in inkarray]
total_points = sum(stroke_lengths)
np_ink = np.zeros((total_points, 3), dtype=np.float32)
current_t = 0
for stroke in inkarray:
if len(stroke[0]) != len(stroke[1]):
print("Inconsistent number of x and y coordinates.")
return None
for i in [0, 1]:
np_ink[current_t:(current_t + len(stroke[0])), i] = stroke[i]
current_t += len(stroke[0])
np_ink[current_t - 1, 2] = 1 # stroke_end
# Preprocessing.
# 1. Size normalization.
lower = np.min(np_ink[:, 0:2], axis=0)
upper = np.max(np_ink[:, 0:2], axis=0)
scale = upper - lower
scale[scale == 0] = 1
np_ink[:, 0:2] = (np_ink[:, 0:2] - lower) / scale
# 2. Compute deltas.
#np_ink = np_ink[1:, 0:2] - np_ink[0:-1, 0:2]
#np_ink = np_ink[1:, :]
np_ink[1:, 0:2] -= np_ink[0:-1, 0:2]
np_ink = np_ink[1:, :]
features = {}
features["ink"] = tf.train.Feature(float_list=tf.train.FloatList(value=np_ink.flatten()))
features["shape"] = tf.train.Feature(int64_list=tf.train.Int64List(value=np_ink.shape))
f = tf.train.Features(feature=features)
ex = tf.train.Example(features=f)
return ex
if stoke_data is None:
print("Error: Stroke data cannot be none")
example = parse_line(stoke_data)
#Remove the file if it already exists
if tf.gfile.Exists(tfrecord_file):
writer = tf.python_io.TFRecordWriter(tfrecord_file)
for i in range(batch_size):
Then in the main function you just have to invoke estimator.predict() reusing the same input_fn=get_input_fn(...)argument except point it to the temporary created tfrecord_file
Hope this helps

Tensorflow - apply function over 1D Tensor

I have a function dice
def dice(yPred,yTruth,thresh):
smooth = tf.constant(1.0)
threshold = tf.constant(thresh)
yPredThresh = tf.to_float(tf.greater_equal(yPred,threshold))
mul = tf.mul(yPredThresh,yTruth)
intersection = 2*tf.reduce_sum(mul) + smooth
union = tf.reduce_sum(yPredThresh) + tf.reduce_sum(yTruth) + smooth
dice = intersection/union
return dice, yPredThresh
which works. An example is given here
with tf.Session() as sess:
thresh = 0.5
print("Dice example")
yPred = tf.constant([0.1,0.9,0.7,0.3,0.1,0.1,0.9,0.9,0.1],shape=[3,3])
yTruth = tf.constant([0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0],shape=[3,3])
diceScore, yPredThresh= dice(yPred=yPred,yTruth=yTruth,thresh= thresh)
diceScore_ , yPredThresh_ , yPred_, yTruth_ = sess.run([diceScore,yPredThresh,yPred, yTruth])
print("\nScore = {0}".format(diceScore_))
>>> Score = 0.899999976158
I would like to be able to loop over the third arguement of dice, thresh. I do not know the best way to do this such that I can extract it from the graph. Something along the lines of the following...
def diceROC(yPred,yTruth,thresholds=np.linspace(0.1,0.9,20)):
thresholds = thresholds.astype(np.float32)
nThreshs = thresholds.size
diceScores = tf.zeros(shape=nThreshs)
for i in xrange(nThreshs):
score,_ = dice(yPred,yTruth,thresholds[i])
diceScores[i] = score
return diceScores
Evaluating diceScoreROC yields the error 'Tensor' object does not support item assignment as I can't loop into and slice a tf tensor apparently.
Instead of the loop, I would encourage you to use broadcasting abilities of tensorflow. If you redefine dice to:
def dice(yPred,yTruth,thresh):
smooth = tf.constant(1.0)
yPredThresh = tf.to_float(tf.greater_equal(yPred,thresh))
mul = tf.mul(yPredThresh,yTruth)
intersection = 2*tf.reduce_sum(mul, [0, 1]) + smooth
union = tf.reduce_sum(yPredThresh, [0, 1]) + tf.reduce_sum(yTruth, [0, 1]) + smooth
dice = intersection/union
return dice, yPredThresh
You will be able to pass 3-dimensional yPred and yTruth (assuming the tensors will be just repeated along the last dimension) and 1-dimensional thresh:
with tf.Session() as sess:
thresh = [0.1,0.9,20, 0.5]
print("Dice example")
yPred = tf.constant([0.1,0.9,0.7,0.3,0.1,0.1,0.9,0.9,0.1],shape=[3,3,1])
ypred_tiled = tf.tile(yPred, [1,1,4])
yTruth = tf.constant([0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0],shape=[3,3,1])
ytruth_tiled = tf.tile(yTruth, [1,1,4])
diceScore, yPredThresh= dice(yPred=ypred_tiled,yTruth=ytruth_tiled,thresh= thresh)
diceScore_ = sess.run(diceScore)
print("\nScore = {0}".format(diceScore_))
You'll get:
Score = [ 0.73333335 0.77777779 0.16666667 0.89999998]