Caffe always returns one label - testing

I have trained a model with caffe tools under bin and now I am trying to do testing using python script, I read in an image and preprocess it myself (as I did for my training dataset) and I load the pretrained weights to the net, but I am almost always (99.99% of the time) receiving the same result -0- for every test image. I did consider that my model might be overfitting but after training a few models, I have come to realize the labels I get from predictions are most likely the cause. I have also increased dropout and took random crops to overcome overfitting and I have about 60K for training. The dataset is also roughly balanced. I get between 77 to 87 accuracy during evaluation step of training (depending on how I process data, what architecture I use etc)
Excuse my super hacky code, I have been distant to caffe testing for some time so I suspect the problem is how I pass the input data to the network, but I can't put my finger on it:
import h5py, os
import sys
sys.path.append("/home/X/Desktop/caffe-caffe-0.16/python")
from caffe.io import oversample
from caffe.io import resize_image
import caffe
from random import randint
import numpy as np
import cv2
import matplotlib.pyplot as plt
from collections import Counter as Cnt
meanImg = cv2.imread('/home/caffe/data/Ch/Final_meanImg.png')
model_def = '/home/X/Desktop/caffe-caffe-0.16/models/bvlc_googlenet/deploy.prototxt'
model_weights = '/media/X/DATA/SDet/Google__iter_140000.caffemodel'
# load the model
#caffe.set_mode_gpu()
#caffe.set_device(0)
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
with open( '/home/caffe/examples/sdet/SDet/test_random.txt', 'r' ) as T, open('/media/X/DATA/SDet/results/testResults.txt','w') as testResultsFile:
readImgCounter = 0
runningCorrect = 0
runningAcc = 0.0
#testResultsFile.write('filename'+' '+'prediction'+' '+'GT')
lines = T.readlines()
for i,l in enumerate(lines):
sp = l.split(' ')
video = sp[0].split('_')[0]
impath = '/home/caffe/data/Ch/images/'+video+'/'+sp[0] +'.jpg'
img = cv2.imread(impath)
resized_img = resize_image(img, (255,255))
oversampledImages = oversample([resized_img], (224,224)) #5 crops x 2 mirror flips = return 10 images
transposed_img = np.zeros( (10, 3, 224, 224), dtype='f4' )
tp = np.zeros( (1, 3, 224, 224), dtype='f4' )
predictedLabels = []
for j in range(0,oversampledImages.shape[0]-1):
transposed_img[j] = oversampledImages[j].transpose((2,0,1))
tp[0] = transposed_img[j]
net.blobs['data'].data[0] = tp
pred = net.forward(data=tp)
predictedLabels.append(pred['prob'].argmax())
print(predictedLabels)
prediction,num_most_common = Cnt(predictedLabels).most_common(1)[0]
print(prediction)
readImgCounter = readImgCounter + 1
if (prediction == int(sp[1])):
runningCorrect = runningCorrect + 1
runningAcc = runningCorrect / readImgCounter
print('runningAcc:')
print(runningAcc)
print('-----------')
print('runningCorrect:')
print(runningCorrect)
print('-----------')
print('totalImgRead:')
print(readImgCounter)
print('-----------')
testResultsFile.write(sp[0]+' '+str(prediction)+' '+sp[1])
testResultsFile.write('\n')

I have fixed this problem eventually. I am not 100% sure what worked but it was most likely changing the bias to 0 while learning.

Related

embedding layer for several categories and regression

I found [this][1] and created this running POC code:
import tensorflow as tf
from tensorflow import keras
import numpy as np
def get_embedding_size(cat_data):
no_of_unique_cat = len(np.unique(cat_data))
return int(min(np.ceil((no_of_unique_cat)/2), 50))
# 3 numerical variables
num_data = np.random.random(size=(10,3))
# 2 categorical variables
cat_data_1 = np.random.randint(0,4,10)
cat_data_2 = np.random.randint(0,5,10)
target = np.random.random(size=(10,1))
no_unique_categories_category_1 = len(np.unique(cat_data_1))
embedding_size_category_1 = get_embedding_size(cat_data_1)
inp_cat_data = keras.layers.Input(shape=(no_unique_categories_category_1,))
# 3 columns
inp_num_data = keras.layers.Input(shape=(num_data.shape[1],))
emb = keras.layers.Embedding(input_dim=no_unique_categories_category_1, output_dim=embedding_size_category_1)(inp_cat_data)
flatten = keras.layers.Flatten()(emb)
# Concatenate two layers
conc = keras.layers.Concatenate()([flatten, inp_num_data])
dense1 = keras.layers.Dense(3, activation=tf.nn.relu,)(conc)
# Creating output layer
out = keras.layers.Dense(1, activation=None)(dense1)
model = keras.Model(inputs=[inp_cat_data, inp_num_data], outputs=out)
model.compile(optimizer='adam',
loss=keras.losses.mean_squared_error,
metrics=[keras.metrics.mean_squared_error])
one_hot_encoded_cat_data_1 = np.eye(cat_data_1.max()+1)[cat_data_1]
model.fit([one_hot_encoded_cat_data_1, num_data], target)
I wonder how could one add the additional categorical variable cat_data_2? I am also wondering, why is one hot encoding still used. Is the whole point of embedding not to make this necessary? Thanks!
model.layers[1].get_weights()[0]
[1]: https://mmuratarat.github.io/2019-06-12/embeddings-with-numeric-variables-Keras

What is the correct way to implement a basic GLCM-Layer in Tensorflow/Keras?

I am trying to get a GLCM implementation running in a custom Keras Layer in a reasonable fast time. So far I took the _glcm_loop from skimage-implementation, reduced it to what I needed and put it into a basic layer, like this:
import numpy as np
import tensorflow as tf
from time import time
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers
from skimage.feature import *
from numpy import array
from math import sin, cos
from time import time
import matplotlib.pyplot as plt
class GLCMLayer(keras.layers.Layer):
def __init__(self, greylevels=32, angles=[0], distances=[1], name=None, **kwargs):
self.greylevels = greylevels
self.angles = angles
self.distances = distances
super(GLCMLayer, self).__init__(name=name, **kwargs)
def _glcm_loop(self, image, distances, angles, levels, out):
rows = image.shape[0]
cols = image.shape[1]
for a_idx in range(len(angles)):
angle = angles[a_idx]
for d_idx in range(len(distances)):
distance = distances[d_idx]
offset_row = round(sin(angle) * distance)
offset_col = round(cos(angle) * distance)
start_row = max(0, -offset_row)
end_row = min(rows, rows - offset_row)
start_col = max(0, -offset_col)
end_col = min(cols, cols - offset_col)
for r in range(start_row, end_row):
for c in range(start_col, end_col):
i = image[r, c]
row = r + offset_row
col = c + offset_col
j = image[row, col]
out[i, j, d_idx, a_idx] += 1
def call(self, inputs):
P = np.zeros((self.greylevels, self.greylevels, len(self.distances), len(self.angles)), dtype=np.uint32, order='C')
self._glcm_loop(inputs, self.distances, self.angles, self.greylevels, P)
return P
def get_config(self):
config = {
'angle': self.angle,
'distance': self.distance,
'greylevel': self.greylevel,
}
base_config = super(GLCMLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
My execution code looks like this:
def quant(img, greylevels):
return array(img)//(256//greylevels)
if __name__ == "__main__":
source_file = "<some sour file>"
img_raw = image.load_img(source_file, target_size=(150,150), color_mode="grayscale")
img = quant(img_raw, 32)
layer = GLCMLayer()
start = time()
aug = layer(img)
tf.print(time()-start)
This is my first step to create it as a preprocessing layer. The second step then will be to modify it to run it also as hidden layer inside a model. That is why I didn't put it to a complete model yet, but I feel like there will be additional changes required when doing so.
For some reason the execution time is about 15-20 seconds long. Executing the code on the CPU without the layer takes about 0.0009 seconds. Obviously, something is going wrong here.
I am fairly new to tf and keras, so I fear I am missing something in the way on how to use the framework. In order to resolve it, I read about (which doesn't mean I understood):
do not use np-functions inside tensorflow, but tf-functions instead,
use tf.Variable,
use tf.Data,
unfolding is not possible in some way (whatever that means)
I tried a little here and there, but couldn't get them running, instead finding various different exceptions. So my questions are:
What is the correct way to use tf-functions in a GLCM to get the best performance on the GPU?
What do I need to take care of when using the layer in a complete model?
From that point on, I should hopefully be able to then implement the GLCM properties.
Any help is greatly appreciated.
(Disclaimer: I assume that there is a lot of other stuff not optimal yet, if anything comes to your mind just add it.)

How to calculate the confidence intervals for prediction in Regression? and also how to plot it in python

Fig 7.1, An Introduction To Statistical Learning
I am currently studying a book named Introduction to Statistical Learning with applications in R, and also converting the solutions to python language.
I am not able to get how to get the confidence intervals and plot them as shown in the above image(dashed lines).
I have plotted the line. Here's my code for that -
(I am using polynomial regression with predictiors - 'age' and response - 'wage',degree is 4)
poly = PolynomialFeatures(4)
X = poly.fit_transform(data['age'].to_frame())
y = data['wage']
# X.shape
model = sm.OLS(y,X).fit()
print(model.summary())
# So, what we want here is not only the final line, but also the standart error related to the line
# TO find that we need to calcualte the predictions for some values of age
test_ages = np.linspace(data['age'].min(),data['age'].max(),100)
X_test = poly.transform(test_ages.reshape(-1,1))
pred = model.predict(X_test)
plt.figure(figsize = (12,8))
plt.scatter(data['age'],data['wage'],facecolors='none', edgecolors='darkgray')
plt.plot(test_ages,pred)
Here data is WAGE data which is available in R.
This is the resulting graph i get -
I have used bootstraping to calculate the confidence intervals, for this i have used a self customed module -
import numpy as np
import pandas as pd
from tqdm import tqdm
class Bootstrap_ci:
def boot(self,X_data,y_data,R,test_data,model):
predictions = []
for i in tqdm(range(R)):
predictions.append(self.alpha(X_data,y_data,self.get_indices(X_data,200),test_data,model))
return np.percentile(predictions,2.5,axis = 0),np.percentile(predictions,97.5,axis = 0)
def alpha(self,X_data,y_data,index,test_data,model):
X = X_data.loc[index]
y = y_data.loc[index]
lr = model
lr.fit(pd.DataFrame(X),y)
return lr.predict(pd.DataFrame(test_data))
def get_indices(self,data,num_samples):
return np.random.choice(data.index, num_samples, replace=True)
The above module can be used as -
poly = PolynomialFeatures(4)
X = poly.fit_transform(data['age'].to_frame())
y = data['wage']
X_test = np.linspace(min(data['age']),max(data['age']),100)
X_test_poly = poly.transform(X_test.reshape(-1,1))
from bootstrap import Bootstrap_ci
bootstrap = Bootstrap_ci()
li,ui = bootstrap.boot(pd.DataFrame(X),y,1000,X_test_poly,LinearRegression())
This will give us the lower confidence interval, and upper confidence interval.
To plot the graph -
plt.scatter(data['age'],data['wage'],facecolors='none', edgecolors='darkgray')
plt.plot(X_test,pred,label = 'Fitted Line')
plt.plot(X_test,ui,linestyle = 'dashed',color = 'r',label = 'Confidence Intervals')
plt.plot(X_test,li,linestyle = 'dashed',color = 'r')
The resultant graph is
Following code results in the 95% confidence interval
from scipy import stats
confidence = 0.95
squared_errors = (<<predicted values>> - <<true y_test values>>) ** 2
np.sqrt(stats.t.interval(confidence, len(squared_errors) - 1,
loc=squared_errors.mean(),
scale=stats.sem(squared_errors)))

One-hot encoding Tensorflow Strings

I have a list of strings as labels for training a neural network. Now I want to convert them via one_hot encoding so that I can use them for my tensorflow network.
My input list looks like this:
labels = ['"car"', '"pedestrian"', '"car"', '"truck"', '"car"']
The requested outcome should be something like
one_hot [0,1,0,2,0]
What is the easiest way to do this? Any help would be much appreciated.
Cheers,
Andi
the desired outcome looks like LabelEncoder in sklearn, not like OneHotEncoder - in tf you need CategoryEncoder - BUT it is A preprocessing layer which encodes integer features.:
inp = layers.Input(shape=[X.shape[0]])
x0 = layers.CategoryEncoding(
num_tokens=3, output_mode="multi_hot")(inp)
model = keras.Model(inputs=[inp], outputs=[x0])
model.compile(optimizer= 'adam',
loss='categorical_crossentropy',
metrics=[tf.keras.metrics.CategoricalCrossentropy()])
print(model.summary())
this part gets encoding of unique values... And you can make another branch in this model to input your initial vector & fit it according labels from this reference-branch (it is like join reference-table with fact-table in any database) -- here will be ensemble of referenced-data & your needed data & output...
pay attention to -- num_tokens=3, output_mode="multi_hot" -- are being given explicitly... AND numbers from class_names get apriory to model use, as is Feature Engineering - like this (in pd.DataFrame)
import numpy as np
import pandas as pd
d = {'transport_col':['"car"', '"pedestrian"', '"car"', '"truck"', '"car"']}
dataset_df = pd.DataFrame(data=d)
classes = dataset_df['transport_col'].unique().tolist()
print(f"Label classes: {classes}")
df= dataset_df['transport_col'].map(classes.index).copy()
print(df)
from manual example REF: Encode the categorical label into an integer.
Details: This stage is necessary if your classification label is represented as a string. Note: Keras expected classification labels to be integers.
in another architecture, perhaps, you could use StringLookup
vocab= np.array(np.unique(labels))
inp = tf.keras.Input(shape= labels.shape[0], dtype=tf.string)
x = tf.keras.layers.StringLookup(vocabulary=vocab)(inp)
but labels are dependent vars usually, as opposed to features, and shouldn't be used at Input
Everything in keras.docs
possible FULL CODE:
import numpy as np
import pandas as pd
import keras
X = np.array([['"car"', '"pedestrian"', '"car"', '"truck"', '"car"']])
vocab= np.unique(X)
print(vocab)
y= np.array([[0,1,0,2,0]])
inp = layers.Input(shape=[X.shape[0]], dtype='string')
x0= tf.keras.layers.StringLookup(vocabulary=vocab, name='finish')(inp)
model = keras.Model(inputs=[inp], outputs=[x0])
model.compile(optimizer= 'adam',
loss='categorical_crossentropy',
metrics=[tf.keras.metrics.categorical_crossentropy])
print(model.summary())
from tensorflow.keras import backend as K
for layerIndex, layer in enumerate(model.layers):
print(layerIndex)
func = K.function([model.get_layer(index=0).input], layer.output)
layerOutput = func([X]) # input_data is a numpy array
print(layerOutput)
if layerIndex==1: # the last layer here
scale = lambda x: x - 1
print(scale(layerOutput))
res:
[[0 1 0 2 0]]
another possible Solution for your case - layers.TextVectorization
import numpy as np
import keras
input_array = np.atleast_2d(np.array(['"car"', '"pedestrian"', '"car"', '"truck"', '"car"']))
vocab= np.unique(input_array)
input_data = keras.Input(shape=(None,), dtype='string')
layer = layers.TextVectorization( max_tokens=None, standardize=None, split=None, output_mode="int", vocabulary=vocab)
int_data = layer(input_data)
model = keras.Model(inputs=input_data, outputs=int_data)
output_dataset = model.predict(input_array)
print(output_dataset) # starts from 2 ... probably [0, 1] somehow concerns binarization ?
scale = lambda x: x - 2
print(scale(output_dataset))
result:
array([[0, 1, 0, 2, 0]])

TensorFlow example save mandelbrot image

Learning how to use tensorflow, first tutorial code on mandelbrot set below
# Import libraries for simulation
import tensorflow as tf
import numpy as np
# Imports for visualization
import PIL.Image
from io import BytesIO
from IPython.display import Image, display
def DisplayFractal(a, fmt='jpeg'):
"""Display an array of iteration counts as a
colorful picture of a fractal."""
a_cyclic = (6.28*a/20.0).reshape(list(a.shape)+[1])
img = np.concatenate([10+20*np.cos(a_cyclic),
30+50*np.sin(a_cyclic),
155-80*np.cos(a_cyclic)], 2)
img[a==a.max()] = 0
a = img
a = np.uint8(np.clip(a, 0, 255))
f = BytesIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
sess = tf.InteractiveSession()
# Use NumPy to create a 2D array of complex numbers
Y, X = np.mgrid[-1.3:1.3:0.005, -2:1:0.005]
Z = X+1j*Y
xs = tf.constant(Z.astype(np.complex64))
zs = tf.Variable(xs)
ns = tf.Variable(tf.zeros_like(xs, tf.float32))
tf.global_variables_initializer().run()
# Compute the new values of z: z^2 + x
zs_ = zs*zs + xs
# Have we diverged with this new value?
not_diverged = tf.abs(zs_) < 4
# Operation to update the zs and the iteration count.
#
# Note: We keep computing zs after they diverge! This
# is very wasteful! There are better, if a little
# less simple, ways to do this.
#
step = tf.group(
zs.assign(zs_),
ns.assign_add(tf.cast(not_diverged, tf.float32))
)
for i in range(200): step.run()
DisplayFractal(ns.eval())
returns this on shell
<IPython.core.display.Image at 0x7fcdee1da810>
It doesn't display the image and I'd prefer if it saved the image.
How can I save the result as an image?
Scipy has an easy image save function! https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.misc.imsave.html
You should try this:
import scipy.misc
scipy.misc.imsave('mandelbrot.png',ns.eval())
I hope this works! Regardless, let me know!