Visualizing self attention weights for sequence addition problem with LSTM? - tensorflow

I am using Self Attention layer from here for a simple problem of adding all the numbers in a sequence that come before a delimiter. With training, I expect the neural network to learn which numbers to add and using Self Attention layer, I expect to visualize where the model is focusing. The code to reproduce the the results is following
import os
import sys
import matplotlib.pyplot as plt
import numpy
import numpy as np
from keract import get_activations
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.layers import Dense, Dropout, LSTM
from attention import Attention # https://github.com/philipperemy/keras-attention-mechanism
def add_numbers_before_delimiter(n: int, seq_length: int, delimiter: float = 0.0,
index_1: int = None) -> (np.array, np.array):
"""
Task: Add all the numbers that come before the delimiter.
x = [1, 2, 3, 0, 4, 5, 6, 7, 8, 9]. Result is y = 6.
#param n: number of samples in (x, y).
#param seq_length: length of the sequence of x.
#param delimiter: value of the delimiter. Default is 0.0
#param index_1: index of the number that comes after the first 0.
#return: returns two numpy.array x and y of shape (n, seq_length, 1) and (n, 1).
"""
x = np.random.uniform(0, 1, (n, seq_length))
y = np.zeros(shape=(n, 1))
for i in range(len(x)):
if index_1 is None:
a = np.random.choice(range(1, len(x[i])), size=1, replace=False)
else:
a = index_1
y[i] = np.sum(x[i, 0:a])
x[i, a] = delimiter
x = np.expand_dims(x, axis=-1)
return x, y
def main():
numpy.random.seed(7)
# data. definition of the problem.
seq_length = 20
x_train, y_train = add_numbers_before_delimiter(20_000, seq_length)
x_val, y_val = add_numbers_before_delimiter(4_000, seq_length)
# just arbitrary values. it's for visual purposes. easy to see than random values.
test_index_1 = 4
x_test, _ = add_numbers_before_delimiter(10, seq_length, 0, test_index_1)
# x_test_mask is just a mask that, if applied to x_test, would still contain the information to solve the problem.
# we expect the attention map to look like this mask.
x_test_mask = np.zeros_like(x_test[..., 0])
x_test_mask[:, test_index_1:test_index_1 + 1] = 1
model = Sequential([
LSTM(100, input_shape=(seq_length, 1), return_sequences=True),
SelfAttention(name='attention_weight'),
Dropout(0.2),
Dense(1, activation='linear')
])
model.compile(loss='mse', optimizer='adam')
print(model.summary())
output_dir = 'task_add_two_numbers'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
max_epoch = int(sys.argv[1]) if len(sys.argv) > 1 else 200
class VisualiseAttentionMap(Callback):
def on_epoch_end(self, epoch, logs=None):
attention_map = get_activations(model, x_test, layer_names='attention_weight')['attention_weight']
# top is attention map.
# bottom is ground truth.
plt.imshow(np.concatenate([attention_map, x_test_mask]), cmap='hot')
iteration_no = str(epoch).zfill(3)
plt.axis('off')
plt.title(f'Iteration {iteration_no} / {max_epoch}')
plt.savefig(f'{output_dir}/epoch_{iteration_no}.png')
plt.close()
plt.clf()
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=max_epoch,
batch_size=64, callbacks=[VisualiseAttentionMap()])
if __name__ == '__main__':
main()
However, I get following results attention weights
[Please click the link]1 to view weights during training.
I expect the attention to focus on all values before the delimiter. The white below represents ground truth while the upper half part represents weights for 10 samples.

Related

ValueError: Input 0 of layer “Discriminator” is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score,\
accuracy_score, balanced_accuracy_score,classification_report,\
plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, Concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal
import tensorflow.keras.backend as K
from sklearn.utils import shuffle
import pickle
from tqdm import tqdm
import numpy as np
from scipy import stats
import pandas as pd
np.random.seed(1635848)
def get_data_XYZ_one_dimensional(n, a=-2, c=1/2, random_state=None, verbose=True):
"""
Generates pseudo-random data distributed according to the distribution defined in section 2.1 of the document
"Math/Confounders and data generation.pdf".
:param n: Number of data points to generate.
:param a: Mean of X.
:param c: Shape parameter for Weibull distribution.
:param random_state: Used to set the seed of numpy.random before generation of random numbers.
:param verbose: If True will display a progress bar. If False it will not display a progress bar.
:return: Pandas DataFrame with three columns (corresponding to X, Y and Z) and n rows (corresponding to the n
generated pseudo-random samples).
"""
np.random.seed(random_state)
output = []
iterator = tqdm(range(n)) if verbose else range(n)
for _ in iterator:
X = stats.norm.rvs(loc=-2, scale=1)
Y = stats.bernoulli.rvs(p=1/(1+np.exp(-X)))
if Y == 0:
Z = stats.expon.rvs(scale=np.exp(-X)) # note: np.exp(-X) could be cached for more computational efficiency but would render the code less useful
elif Y == 1:
Z = stats.weibull_min.rvs(c=c, scale=np.exp(-X))
else:
assert False
output.append((X, Y, Z))
return pd.DataFrame(output, columns=["Personal information", "Treatment", "Time to event"])
data = get_data_XYZ_one_dimensional(n=100, random_state=0)
print(data)
# The Architecture of CGAN
class cGAN():
"""
Class containing 3 methods (and __init__): generator, discriminator and train.
Generator is trained using random noise and label as inputs. Discriminator is trained
using real/fake samples and labels as inputs.
"""
def __init__(self,latent_dim=100, out_shape=3):
self.latent_dim = latent_dim
self.out_shape = out_shape
self.num_classes = 2
# using Adam as our optimizer
optimizer = Adam(0.0002, 0.5)
# building the discriminator
self.discriminator = self.discriminator()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# building the generator
self.generator = self.generator()
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,))
gen_samples = self.generator([noise, label])
# we don't train discriminator when training generator
self.discriminator.trainable = False
valid = self.discriminator([gen_samples, label])
# combining both models
self.combined = Model([noise, label], valid)
self.combined.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
def generator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(128, input_dim=self.latent_dim))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(self.out_shape, activation='tanh'))
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding])
gen_sample = model(model_input)
model.summary()
return Model([noise, label], gen_sample, name="Generator")
def discriminator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(512, input_dim=self.out_shape, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(128, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
gen_sample = Input(shape=(self.out_shape,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.out_shape)(label))
model_input = multiply([gen_sample, label_embedding])
validity = model(model_input)
model.summary()
return Model(inputs=[gen_sample, label], outputs=validity, name="Discriminator")
def train(self, X_train, y_train, pos_index, neg_index, epochs, sampling=False, batch_size=32, sample_interval=100, plot=True):
# though not recommended, defining losses as global helps as in analysing our cgan out of the class
global G_losses
global D_losses
G_losses = []
D_losses = []
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# if sampling==True --> train discriminator with 8 sample from positive class and rest with negative class
if sampling:
idx1 = np.random.choice(pos_index, 3)
idx0 = np.random.choice(neg_index, batch_size-3)
idx = np.concatenate((idx1, idx0))
# if sampling!=True --> train discriminator using random instances in batches of 32
else:
idx = np.random.choice(len(y_train), batch_size)
samples, labels = X_train[idx], y_train[idx]
samples, labels = shuffle(samples, labels)
# Sample noise as generator input
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_samples = self.generator.predict([noise, labels])
# label smoothing
if epoch < epochs//1.5:
valid_smooth = (valid+0.1)-(np.random.random(valid.shape)*0.1)
fake_smooth = (fake-0.1)+(np.random.random(fake.shape)*0.1)
else:
valid_smooth = valid
fake_smooth = fake
# Train the discriminator
self.discriminator.trainable = True
d_loss_real = self.discriminator.train_on_batch([samples, labels], valid_smooth)
d_loss_fake = self.discriminator.train_on_batch([gen_samples, labels], fake_smooth)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train Generator
self.discriminator.trainable = False
sampled_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
# Train the generator
g_loss = self.combined.train_on_batch([noise, sampled_labels], valid)
if (epoch+1)%sample_interval==0:
print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f'
% (epoch, epochs, d_loss[0], g_loss[0]))
G_losses.append(g_loss[0])
D_losses.append(d_loss[0])
if plot:
if epoch+1==epochs:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()
data.Treatment.value_counts()
scaler = StandardScaler()
X = scaler.fit_transform(data.drop('Treatment', 1))
y = data['Treatment'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
lgb_1 = lgb.LGBMClassifier()
lgb_1.fit(X_train, y_train)
y_pred = lgb_1.predict(X_test)
# evaluation
print(classification_report(y_test, y_pred))
plot_confusion_matrix(lgb_1, X_test, y_test)
plt.show()
le = preprocessing.LabelEncoder()
for i in ['Personal information', 'Treatment', 'Time to event']:
data[i] = le.fit_transform(data[i].astype(str))
y_train = y_train.reshape(-1,1)
pos_index = np.where(y_train==1)[0]
neg_index = np.where(y_train==0)[0]
cgan.train(X_train, y_train, pos_index, neg_index, epochs=500)
Here, the training gives an error ValueError: Input 0 of layer "Discriminator" is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2). Well I understand I have to fix the shape by changing the input but where and how to do it.
Also there are 3 columns in data so how to go about making this work?
I think the fix out_shape=2 and not 3 because the generated output has 2 and you stated the number of classes to be 2 as well. Unless there is something else I am missing.
def __init__(self, latent_dim=100, out_shape=2):

ValueError:could not broadcast input array from shape (1221,) into shape (1221,7)

I am working on my project titled 'Prediction of Stock price using Stacked LSTM', but I am getting this error "ValueError: could not broadcast input array from shape (1221,) into shape (1221,7)", in the line
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
My code is:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
lstm_model = Sequential()
lstm_model.add(LSTM(50, return_sequences = True, input_shape = (100, 1)))
lstm_model.add(LSTM(50, return_sequences = True))
lstm_model.add(LSTM(50))
lstm_model.add(Dense(1))
lstm_model.compile(loss = 'mean_squared_error', optimizer = 'adam')
lstm_model.summary()
lstm_model.fit(X_train, Y_train, validation_data = (X_test, Y_test), epochs = 100, batch_size = 64, verbose = 1)
train_predict = lstm_model.predict(X_train)
test_predict = lstm_model.predict(X_test)
train_predict.shape
test_predict.shape
train_predict_dataset_like = np.zeros(shape=(len(train_predict), 7) )
# put the predicted values in the right field
train_predict_dataset_like[:,0] = train_predict[:,0]
# inverse transform and then select the right field
train_predict = scaler.inverse_transform(train_predict_dataset_like)[:,0]
test_predict_dataset_like = np.zeros(shape=(len(test_predict), 7) )
# put the predicted values in the right field
test_predict_dataset_like[:,0] = test_predict[:,0]
# inverse transform and then select the right field
test_predict = scaler.inverse_transform(test_predict_dataset_like)[:,0]
import math
from sklearn.metrics import mean_squared_error
math.sqrt(mean_squared_error(Y_train, train_predict))
math.sqrt(mean_squared_error(Y_test, test_predict))
### Plotting
# shift train predictions for plotting
look_back=100
trainPredictPlot = numpy.empty_like(data)
trainPredictPlot[:, :] = numpy.nan
##### This is where the error takes place ####
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(data)
testPredictPlot[:, :] = numpy.nan
##### This is where the error takes place ####
testPredictPlot[len(train_predict)+(look_back*2)+1:len(data)-1, :] = test_predict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(data))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
The shape of train_predict and test_predict are (1221, 1) & (1221, 1) respectively.
I guess the shape of trainPredictPlot should be (num, 7), and trainPredictPlot[look_back:len(train_predict)+look_back, :] means to take all the columns of trainPredictPlot, so in the end you will get (1221, 7), which doesn't match train_predict. So you need to do this:
trainPredictPlot[look_back:len(train_predict)+look_back, 1] = train_predict
The 1 above means to take the first column, you need to change it according to the actual situation.

How to match dimensions in CNN

I'm trying to build a CNN, where the goal is from 3 features to predict the label, but is giving an error of dimension.
Could someone help me?
updated after comments from #M.Innat
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential, load_model
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
import tensorflow as tf
import random
# Create data
n = 8500
l = [2, 3, 4, 5,6]
k = int(np.ceil(n/len(l)))
labels = [item for item in l for i in range(k)]
random.shuffle(labels,random.random)
labels =np.array(labels)
label_unique = np.unique(labels)
x = np.linspace(613000, 615000, num=n) + np.random.uniform(-5, 5, size=n)
y = np.linspace(7763800, 7765800, num=n) + np.random.uniform(-5, 5, size=n)
z = np.linspace(1230, 1260, num=n) + np.random.uniform(-5, 5, size=n)
X = np.column_stack((x,y,z))
Y = labels
# Split the dataset into training and testing.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)
seq_len=len(X_train)
n_features=len(X_train[0])
droprate=0.1
exit_un=len(label_unique)
seq_len=len(X_train)
n_features=len(X_train[0])
droprate=0.1
exit_un=len(label_unique)
print('n_features: {} \n seq_len: {} \n exit_un: {}'.format(n_features,seq_len,exit_un))
X_train = X_train[..., None][None, ...] # add channel axis+batch aix
Y_train = pd.get_dummies(Y_train) # transform to one-hot encoded
drop_prob = 0.5
my_model = Sequential()
my_model.add(Conv2D(input_shape=(seq_len,n_features,1),filters=32,kernel_size=(3,3),padding='same',activation="relu")) # 1 channel of grayscale.
my_model.add(MaxPooling2D(pool_size=(2,1)))
my_model.add(Conv2D(filters=64,kernel_size=(5,5), padding='same',activation="relu"))
my_model.add(MaxPooling2D(pool_size=(2,1)))
my_model.add(Flatten())
my_model.add(Dense(units = 1024, activation="relu"))
my_model.add(Dropout(rate=drop_prob))
my_model.add(Dense(units = exit_un, activation="softmax"))
n_epochs = 100
batch_size = 10
learn_rate = 0.005
# Define the optimizer and then compile.
my_optimizer=Adam(lr=learn_rate)
my_model.compile(loss = "categorical_crossentropy", optimizer = my_optimizer, metrics=['categorical_crossentropy','accuracy'])
my_summary = my_model.fit(X_train, Y_train, epochs=n_epochs, batch_size = batch_size, verbose = 1)
The error I have is:
ValueError: Data cardinality is ambiguous:
x sizes: 1
y sizes: 5950
Make sure all arrays contain the same number of samples.
You're passing the input sample without the channel axis and also the batch axis. Also, according to your loss function, you should transform your integer label to one-hot encoded.
exit_un=len(label_unique)
drop_prob = 0.5
X_train = X_train[..., None][None, ...] # add channel axis+batch aix
X_train = np.repeat(X_train, repeats=100, axis=0) # batch-ing
Y_train = np.repeat(Y_train, repeats=100, axis=0) # batch-ing
Y_train = pd.get_dummies(Y_train) # transform to one-hot encoded
print(X_train.shape, Y_train.shape)
my_model = Sequential()
...
update
Based on the discussion, it seems like you need the conv1d operation in the modeling time and need to reshape your sample as mentioned in the comment. Here is the colab, it should work now.

Value Error due to Numpy returning an object

I'm trying to make the following code piece at the end run.
However, i'm getting the following error when i try to fit my model:
"ValueError: setting an array element with a sequence."
I'm trying to use a RNN to predict the next 5 days of prices. So, in the function create_ts I'm trying to create two time series, one with the first X items and another with X+1, X+2, X+3, X+4, and X+5 - these five items being the next five days of prices i'd like to predict.
I suspect the problem is here somewhere:
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)
Thanks in advance for any help!
Below is the full code piece:
from keras import backend as k
import os
from importlib import reload
def set_keras_backend(backend):
if k.backend() != backend:
os.environ['KERAS_BACKEND'] = backend
reload(k)
assert k.backend() == backend
set_keras_backend("cntk")
import numpy as np
import pandas as pd
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import math
np.random.seed(7)
#load dataset
fileloc = "C:\\Stock Data\\CL1.csv"
stock_data = pd.read_csv(fileloc)
stock_data.head()
stock_data.dtypes
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data['Price'] = pd.to_numeric(stock_data['Price'], downcast = 'float')
stock_data.set_index('Date', inplace=True)
stock_close = stock_data['Price']
stock_close = stock_close.values.reshape(len(stock_close), 1)
plt.plot(stock_close)
#normalize data
scaler = MinMaxScaler(feature_range = (0,1))
stock_close = scaler.fit_transform(stock_close)
#split data into a train, test set
train_size = int(len(stock_close)*0.7)
test_size = len(stock_close) - train_size
stock_train, stock_test = stock_close[0:train_size, :], stock_close[train_size:len(stock_close), :]
#convert the data into a time series looking back over a period fo days
def create_ts(ds, series, day_gap):
x, y = [], []
for i in range(len(ds) - series - 1):
item = ds[i:(i+series),0]
x.append(item)
next_item = ds[i+series:(i+series+day_gap), 0]
y.append(next_item)
#print(type(np.array(x)), type(np.array(y)))
return np.array(x), np.array(y).reshape(-1,1)
series = 5
predict_days = 5
train_x, train_y = create_ts(stock_train, series, predict_days)
test_x, test_y = create_ts(stock_test, series, predict_days)
#reshape into LSTM format - samples, steps, features
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
#build model
model = Sequential()
model.add(LSTM(4,input_shape = (series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer = 'adam')
#fit model
model.fit(train_x, train_y, epochs = 100, batch_size = 32)

How to feed my network with the correct array size in tensorflow

I have the following code and I am trying to train the network that I built with Belgian traffic signs , here is the code below :
import tensorflow as tf
import os
import skimage.io
from skimage import transform
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
config=tf.ConfigProto(log_device_placement=True)
#config_soft = tf.ConfigProto(allow_soft_placement =True)
def load_data(data_directory):
directories = [d for d in os.listdir(data_directory)
if os.path.isdir(os.path.join(data_directory, d))]
labels = []
images = []
for d in directories:
label_directory = os.path.join(data_directory, d)
file_names = [os.path.join(label_directory, f)
for f in os.listdir(label_directory)
if f.endswith(".ppm")]
for f in file_names:
images.append(skimage.io.imread(f))
labels.append(int(d))
return images, labels
Root_Path = "/home/raed/Dropbox/Thesis/Codes/Tensorflow"
training_Directory = os.path.join(Root_Path,"Training")
testing_Directory = os.path.join(Root_Path,"Testing")
images, labels = load_data(training_Directory)
# Convert lists to array in order to retrieve to facilitate information retrieval
images_array = np.asarray(images)
labels_array = np.asanyarray(labels)
#print some information about the datasets
print(images_array.ndim)
print(images_array.size)
print(labels_array.ndim)
print(labels_array.nbytes)
print(len(labels_array))
# plotting the distribution of different signs
sns.set(palette="deep")
plt.hist(labels,62)
# Selecting couple of images based on their indices
traffic_signs = [300,2250,3650,4000]
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.show()
# Fill out the subplots with the random images and add shape, min and max values
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images_array[traffic_signs[i]].shape,
images_array[traffic_signs[i]].max(),
images_array[traffic_signs[i]].min()))
# Get unique labels
unique_labels = set(labels_array)
# initialize the figure
plt.figure(figsize=(15,15))
i=1
for label in unique_labels:
image = images_array[labels.index(label)]
plt.subplot(8,8,i)
plt.axis('off')
plt.title('label:{0} ({1})'.format(label, labels.count(label)))
i=i+1
plt.imshow(image)
plt.show()
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
for i in range(len(traffic_signs)):
plt.subplot(1,4,i+1)
plt.imshow(images_array[traffic_signs[i]])
plt.axis('off')
plt.show()
print("Shape:{0},max:{1}, min:{2}".format(images28_array[i].shape,
images28_array[i].max(),
images28_array[i].min()))
#convert to grayscale
gray_images = skimage.color.rgb2gray(images28_array)
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.axis('off')
plt.imshow(gray_images[traffic_signs[i]], cmap="gray")
plt.subplots_adjust(wspace=0.5)
# Show the plot
plt.show()
# prepare placeholders
x = tf.placeholder(dtype=tf.float32, shape =[None, 28,28])
y = tf.placeholder(dtype= tf.int32, shape=[None])
#Flatten the input data
images_flat = tf.layers.flatten(x)
#Fully connected layer , Multi-layer Perceptron (MLP)
logits = tf.contrib.layers.fully_connected(images_flat,62, tf.nn.relu)
#Define loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
#define an optimizer (Stochastic Gradient Descent )
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
#convert logits to label indices
correct_prediction = tf.arg_max(logits,1)
#define an accuracy metric
accuracy =tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#########################################
print('######### Main Program #########')
#########################################
print("images_flat: ", images_flat)
print("logits: ", logits)
print("loss: ", loss)
print("Optimizer:",optimizer)
print("predicted_labels: ", correct_prediction)
tf.set_random_seed(1235)
#images28 = np.asanyarray(images28).reshape(-1, 28, 28,1)
#with tf.Session() as training_session:
# training_session.run(tf.global_variables_initializer())
# for i in range(201):
# print('Epoch', i)
# _,accuracy_value = training_session([optimizer, accuracy],feed_dict={x:images28, y:labels})
# if i%10 ==0:
# print("Loss", loss)
# print('Epochs Done!!')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(201):
_, loss_value = sess.run([optimizer, loss], feed_dict={x: gray_images, y: labels})
if i % 10 == 0:
print("Loss: ", loss)
I also did a series of transformation before feeding the netwok as follows :
images28 = [transform.resize(image, (28, 28)) for image in images]
images28_array = np.asanyarray(images28)
But on execution I am getting the following error:
ValueError: Cannot feed value of shape (4575, 28, 28, 3) for Tensor 'Placeholder_189:0', which has shape '(?, 28, 28)'
Could you please help me , where am I doing wrong in training this network, please refer to the following link for more information:
https://www.datacamp.com/community/tutorials/tensorflow-tutorial