why is this model giving me a "Value Error" - numpy

Its giving me a value error that I don't understand. Here is what it says :
Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s), but instead got the following list of 1 arrays" my data has 8 columns and i m trying to predict the last 2 for output.
This is a ranking algorithm that I'm experimenting with my own data with :
import pandas as pd
import keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras import backend
from keras.layers import Activation, Dense, Input, Subtract
from keras.models import Model
INPUT_DIM = 7
# Model.
h_1 = Dense(128, activation="relu")
h_2 = Dense(64, activation="relu")
h_3 = Dense(32, activation="relu")
s = Dense(1)
# Relevant document score.
rel_doc = Input(shape=(INPUT_DIM,), dtype="float32")
h_1_rel = h_1(rel_doc)
h_2_rel = h_2(h_1_rel)
h_3_rel = h_3(h_2_rel)
rel_score = s(h_3_rel)
# Irrelevant document score.
irr_doc = Input(shape=(INPUT_DIM,), dtype="float32")
h_1_irr = h_1(irr_doc)
h_2_irr = h_2(h_1_irr)
h_3_irr = h_3(h_2_irr)
irr_score = s(h_3_irr)
# Subtract scores.
diff = Subtract()([rel_score, irr_score])
# Pass difference through sigmoid function.
prob = Activation("sigmoid")(diff)
# Build model.
model = Model(inputs=[rel_doc, irr_doc], outputs=prob)
model.compile(optimizer="adadelta", loss="binary_crossentropy")
# data.
data=pd.read_csv('ranking_dataset_remastered.csv')
print (data.head())
X = data.iloc[:, 1:7]
y = data.iloc[:, 6:7]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =
0.2)
)
# Train model.
NUM_EPOCHS = 20
BATCH_SIZE = 512
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1)
# Generate scores from document/query features.
get_score = backend.function([rel_doc], [rel_score])
get_score([X_train])
get_score([y_train])

When you defined your model with this line:
model = Model(inputs=[rel_doc, irr_doc], outputs=prob)
You created what keras refers to as a multi-input model, which essentially means that your model is expecting more than one inputs (in your case 2: rel_doc and irr_doc).
However during training you are just passing 1 input, X_train:
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1)
What you should do in order to work is to have two arrays, one representing relevant and one irrelevant documents and feed them both to the model during training like this:
history = model.fit([X_rel_train, X_irr_train], y_train, batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS, verbose=1)

Related

ValueError: Input 0 of layer “Discriminator” is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score,\
accuracy_score, balanced_accuracy_score,classification_report,\
plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, Concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal
import tensorflow.keras.backend as K
from sklearn.utils import shuffle
import pickle
from tqdm import tqdm
import numpy as np
from scipy import stats
import pandas as pd
np.random.seed(1635848)
def get_data_XYZ_one_dimensional(n, a=-2, c=1/2, random_state=None, verbose=True):
"""
Generates pseudo-random data distributed according to the distribution defined in section 2.1 of the document
"Math/Confounders and data generation.pdf".
:param n: Number of data points to generate.
:param a: Mean of X.
:param c: Shape parameter for Weibull distribution.
:param random_state: Used to set the seed of numpy.random before generation of random numbers.
:param verbose: If True will display a progress bar. If False it will not display a progress bar.
:return: Pandas DataFrame with three columns (corresponding to X, Y and Z) and n rows (corresponding to the n
generated pseudo-random samples).
"""
np.random.seed(random_state)
output = []
iterator = tqdm(range(n)) if verbose else range(n)
for _ in iterator:
X = stats.norm.rvs(loc=-2, scale=1)
Y = stats.bernoulli.rvs(p=1/(1+np.exp(-X)))
if Y == 0:
Z = stats.expon.rvs(scale=np.exp(-X)) # note: np.exp(-X) could be cached for more computational efficiency but would render the code less useful
elif Y == 1:
Z = stats.weibull_min.rvs(c=c, scale=np.exp(-X))
else:
assert False
output.append((X, Y, Z))
return pd.DataFrame(output, columns=["Personal information", "Treatment", "Time to event"])
data = get_data_XYZ_one_dimensional(n=100, random_state=0)
print(data)
# The Architecture of CGAN
class cGAN():
"""
Class containing 3 methods (and __init__): generator, discriminator and train.
Generator is trained using random noise and label as inputs. Discriminator is trained
using real/fake samples and labels as inputs.
"""
def __init__(self,latent_dim=100, out_shape=3):
self.latent_dim = latent_dim
self.out_shape = out_shape
self.num_classes = 2
# using Adam as our optimizer
optimizer = Adam(0.0002, 0.5)
# building the discriminator
self.discriminator = self.discriminator()
self.discriminator.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
# building the generator
self.generator = self.generator()
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,))
gen_samples = self.generator([noise, label])
# we don't train discriminator when training generator
self.discriminator.trainable = False
valid = self.discriminator([gen_samples, label])
# combining both models
self.combined = Model([noise, label], valid)
self.combined.compile(loss=['binary_crossentropy'],
optimizer=optimizer,
metrics=['accuracy'])
def generator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(128, input_dim=self.latent_dim))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(256))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(Dropout(0.2))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(self.out_shape, activation='tanh'))
noise = Input(shape=(self.latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding])
gen_sample = model(model_input)
model.summary()
return Model([noise, label], gen_sample, name="Generator")
def discriminator(self):
init = RandomNormal(mean=0.0, stddev=0.02)
model = Sequential()
model.add(Dense(512, input_dim=self.out_shape, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(128, kernel_initializer=init))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
gen_sample = Input(shape=(self.out_shape,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(self.num_classes, self.out_shape)(label))
model_input = multiply([gen_sample, label_embedding])
validity = model(model_input)
model.summary()
return Model(inputs=[gen_sample, label], outputs=validity, name="Discriminator")
def train(self, X_train, y_train, pos_index, neg_index, epochs, sampling=False, batch_size=32, sample_interval=100, plot=True):
# though not recommended, defining losses as global helps as in analysing our cgan out of the class
global G_losses
global D_losses
G_losses = []
D_losses = []
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
# if sampling==True --> train discriminator with 8 sample from positive class and rest with negative class
if sampling:
idx1 = np.random.choice(pos_index, 3)
idx0 = np.random.choice(neg_index, batch_size-3)
idx = np.concatenate((idx1, idx0))
# if sampling!=True --> train discriminator using random instances in batches of 32
else:
idx = np.random.choice(len(y_train), batch_size)
samples, labels = X_train[idx], y_train[idx]
samples, labels = shuffle(samples, labels)
# Sample noise as generator input
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_samples = self.generator.predict([noise, labels])
# label smoothing
if epoch < epochs//1.5:
valid_smooth = (valid+0.1)-(np.random.random(valid.shape)*0.1)
fake_smooth = (fake-0.1)+(np.random.random(fake.shape)*0.1)
else:
valid_smooth = valid
fake_smooth = fake
# Train the discriminator
self.discriminator.trainable = True
d_loss_real = self.discriminator.train_on_batch([samples, labels], valid_smooth)
d_loss_fake = self.discriminator.train_on_batch([gen_samples, labels], fake_smooth)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train Generator
self.discriminator.trainable = False
sampled_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
# Train the generator
g_loss = self.combined.train_on_batch([noise, sampled_labels], valid)
if (epoch+1)%sample_interval==0:
print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f'
% (epoch, epochs, d_loss[0], g_loss[0]))
G_losses.append(g_loss[0])
D_losses.append(d_loss[0])
if plot:
if epoch+1==epochs:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()
data.Treatment.value_counts()
scaler = StandardScaler()
X = scaler.fit_transform(data.drop('Treatment', 1))
y = data['Treatment'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
lgb_1 = lgb.LGBMClassifier()
lgb_1.fit(X_train, y_train)
y_pred = lgb_1.predict(X_test)
# evaluation
print(classification_report(y_test, y_pred))
plot_confusion_matrix(lgb_1, X_test, y_test)
plt.show()
le = preprocessing.LabelEncoder()
for i in ['Personal information', 'Treatment', 'Time to event']:
data[i] = le.fit_transform(data[i].astype(str))
y_train = y_train.reshape(-1,1)
pos_index = np.where(y_train==1)[0]
neg_index = np.where(y_train==0)[0]
cgan.train(X_train, y_train, pos_index, neg_index, epochs=500)
Here, the training gives an error ValueError: Input 0 of layer "Discriminator" is incompatible with the layer: expected shape=(None, 3), found shape=(100, 2). Well I understand I have to fix the shape by changing the input but where and how to do it.
Also there are 3 columns in data so how to go about making this work?
I think the fix out_shape=2 and not 3 because the generated output has 2 and you stated the number of classes to be 2 as well. Unless there is something else I am missing.
def __init__(self, latent_dim=100, out_shape=2):

Strange results from a neural network build using Keras

I build an sentiment classifier using Keras to predict if a sentence has a sentiment score of 1, 2, 3, 4 or 5. However I am getting some strange results. I will first show my code:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import numpy as np
# the data only reflects the structure of the actual data
# the real data has way larger text and more entries
X_train = ['i am glad i heard about that', 'that is one ugly bike']
y_train = pd.Series(np.array([1, 4])) # pandas series
X_test = ['that hurted me']
y_test = pd.Series(np.array([1, 4])) # pandas series
# tokenizing
tokenizer = Tokenizer(num_words = 5)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
# performing some padding
padding_len = 4
X_train_seq_padded = pad_sequences(X_train_seq, maxlen = padding_len)
X_test_seq_padded = pad_sequences(X_test_seq, maxlen = padding_len)
# building the model
model = Sequential()
model.add(Dense(16, input_dim = padding_len, activation = 'relu', name = 'hidden-1'))
model.add(Dense(16, activation = 'relu', name = 'hidden-2'))
model.add(Dense(16, activation = 'relu', name = 'hidden-3'))
model.add(Dense(6, activation='softmax', name = 'output_layer'))
# compiling the model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy'])
# training the model
callbacks = [EarlyStopping(monitor = 'accuracy', patience = 5, mode = 'max')]
my_model = model.fit(X_train_seq_padded, to_categorical(y_train), epochs = 100, batch_size = 1000, callbacks = callbacks, validation_data = (X_test, to_categorical(y_test)))
Using the actual data I keep getting results around 0.67xx (xx random numbers) which are reached after 1/2 epochs, no matter what changes to the code I introduce (and some are extreme).
I tried changing the padding to 1, 10, 100, 1000.
I tried removing the layer hidden-2 and hidden-3.
I tried adding stop word removal before tokenizing.
I tried using the tahn activation function in the hidden layers.
I used the sgd optimizer.
Example output of one setup:
Now my question is, is there something wrong with my code or are these actual possible results?

Implementing Variational Auto Encoder using Tensoroflow_Probability NOT on MNIST data set

I know there are many questions related to Variational Auto Encoders. However, this question in two aspects differs from the existing ones: 1) it is implemented using Tensforflow V2 and Tensorflow_probability; 2) It does not use MNIST or any other image data set.
As about the problem itself:
I am trying to implement VAE using Tensorflow_probability and Keras. and I want to train and evaluate it on some synthetic data sets --as part of my research. I provided the code below.
Although the implementation is done and during the training, the loss value decreases but once I want to evaluate the trained model on my test set I face different errors.
I am somehow confident that the issue is related to input/output shape but unfortunately I did not manage the solve it.
Here is the code:
import numpy as np
import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow_probability as tfp
from tensorflow.keras import layers as tfkl
from sklearn.datasets import make_classification
from tensorflow_probability import layers as tfpl
from sklearn.model_selection import train_test_split
tfd = tfp.distributions
n_epochs = 5
n_features = 2
latent_dim = 1
n_units = 4
learning_rate = 1e-3
n_samples = 400
batch_size = 32
# Generate synthetic data / load data sets
x_in, y_in = make_classification(n_samples=n_samples, n_features=n_features, n_informative=2, n_redundant=0,
n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=[0.5, 0.5],
flip_y=0.01, class_sep=1.0, hypercube=True,
shift=0.0, scale=1.0, shuffle=False, random_state=42)
x_in = x_in.astype('float32')
y_in = y_in.astype('float32') # .reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x_in, y_in, test_size=0.4, random_state=42, shuffle=True)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5, random_state=42, shuffle=True)
print("shapes:", x_train.shape, y_train.shape, x_test.shape, y_test.shape, x_val.shape, y_val.shape)
prior = tfd.Independent(tfd.Normal(loc=[tf.zeros(latent_dim)], scale=1.), reinterpreted_batch_ndims=1)
train_dataset = tf.data.Dataset.from_tensor_slices(x_train).batch(batch_size)
valid_dataset = tf.data.Dataset.from_tensor_slices(x_val).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices(x_test).batch(batch_size)
encoder = tf.keras.Sequential([
tfkl.InputLayer(input_shape=[n_features, ], name='enc_input'),
tfkl.Lambda(lambda x: tf.cast(x, tf.float32)), # - 0.5
tfkl.Dense(n_units, activation='relu', name='enc_dense1'),
tfkl.Dense(int(n_units / 2), activation='relu', name='enc_dense2'),
tfkl.Dense(tfpl.MultivariateNormalTriL.params_size(latent_dim),
activation=None, name='mvn_triL1'),
tfpl.MultivariateNormalTriL(
# weight >> num_train_samples or some thing except 1 to convert VAE to beta-VAE
latent_dim, activity_regularizer=tfpl.KLDivergenceRegularizer(prior, weight=1.), name='bottleneck'),
])
decoder = tf.keras.Sequential([
tfkl.InputLayer(input_shape=latent_dim, name='dec_input'),
# tfkl.Dense(n_units, activation='relu', name='dec_dense1'),
# tfkl.Dense(int(n_units * 2), activation='relu', name='dec_dense2'),
tfpl.IndependentBernoulli([n_features], tfd.Bernoulli.logits, name='dec_output'),
])
vae = tfk.Model(inputs=encoder.inputs, outputs=decoder(encoder.outputs), name='VAE')
print("enoder:", encoder)
print(" ")
print("encoder.inputs:", encoder.inputs)
print(" ")
print(" encoder.outputs:", encoder.outputs)
print(" ")
print("decoder:", decoder)
print(" ")
print("decoder:", decoder.inputs)
print(" ")
print("decoder.outputs:", decoder.outputs)
print(" ")
# negative log likelihood i.e the E_{S(eps)} [p(x|z)];
# because the KL term was added in the last layer of the encoder, i.e., via activity_regularizer.
# this loss function takes two arguments, namely the original data points x, and the output of the model,
# which we call it rv_x (because it is a random variable)
negloglik = lambda x, rv_x: -rv_x.log_prob(x)
vae.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
loss=negloglik,)
vae.summary()
history = vae.fit(train_dataset, epochs=n_epochs, validation_data=valid_dataset,)
print("x.shape:", x_test.shape)
x_hat = vae(x_test)
print("original:")
print(x_test)
print(" ")
print("Decoded Random Samples:")
print(x_hat.sample())
print(" ")
print("Decoded Means:")
print(x_hat.mean())
The Questions:
With the above code I receive the following error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 80 values, but the requested shape has 160 [Op:Reshape]
As far I know we can add as many layers as I want in the decoder model before its output layer --as it is done a convolutional VAEs, am I right?
If I uncomment the following two lines of code in decoder:
# tfkl.Dense(n_units, activation='relu', name='dec_dense1'),
# tfkl.Dense(int(n_units * 2), activation='relu', name='dec_dense2'),
I see the following warnings and the upcoming error:
WARNING:tensorflow:Gradients do not exist for variables ['dec_dense1/kernel:0', 'dec_dense1/bias:0', 'dec_dense2/kernel:0', 'dec_dense2/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['dec_dense1/kernel:0', 'dec_dense1/bias:0', 'dec_dense2/kernel:0', 'dec_dense2/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['dec_dense1/kernel:0', 'dec_dense1/bias:0', 'dec_dense2/kernel:0', 'dec_dense2/bias:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['dec_dense1/kernel:0', 'dec_dense1/bias:0', 'dec_dense2/kernel:0', 'dec_dense2/bias:0'] when minimizing the loss.
And the error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 640 values, but the requested shape has 160 [Op:Reshape]
Now the question is why the decoder layers are not used during the training as it is mentioned in the warning.
PS, I also tried to pass the x_train, x_valid, x_test directly during the training and evaluation process but it does not help.
Any helps would be indeed appreciated.

How to use the model.predict for data after training tensorflow model?

i followed the guide found here(regression):
https://stackabuse.com/tensorflow-2-0-solving-classification-and-regression-problems/
using this dataset:
https://drive.google.com/file/d/1mVmGNx6cbfvRHC_DvF12ZL3wGLSHD9f_/view
and ended up with this code:
data = pd.read_csv(r'path')
X = data.iloc[:, 0:4].values
y = data.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
input_layer = Input(shape=(X.shape[1],))
dense_layer_1 = Dense(100, activation='relu')(input_layer)
dense_layer_2 = Dense(50, activation='relu')(dense_layer_1)
dense_layer_3 = Dense(25, activation='relu')(dense_layer_2)
output = Dense(1)(dense_layer_3)
model = Model(inputs=input_layer, outputs=output)
model.compile(loss="mean_squared_error" , optimizer="adam", metrics=["mean_squared_error"])
history = model.fit(X_train, y_train, batch_size=2, epochs=100, verbose=1, validation_split=0.2)
from sklearn.metrics import mean_squared_error
from math import sqrt
pred_train = model.predict(X_train)
print(np.sqrt(mean_squared_error(y_train,pred_train)))
pred = model.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred)))
Everything works and the model gets trained, but how do i actually use it? I want to input 4 intergers and in return get the prediction. So for example take the array [9, 4554, 1950, 0.634] and then get the predicted value. No matter what i do the model won't accept the data i am using.
Thanks for the help!
Main Problem which you are facing as per my understanding is dimension Because you insert [9,...,0.634] which of shape (4,) it mean 1D while X_test,X_train require to be 2D as per documentationo you have to convert 1D to 2D.
How You Convert
import numpy as np
X_test=[9,...,0.634]
X_test=np.array(X_test)
X_test=X_test.reshape(1,4)
model.predict(X_test)
s

Keras model predict fails

im a bit of a noob in ML. ive been trying to get a model retrained on the flowers dataset to work on keras-js withous success. whenever i try to run predict on the model i get "Error: predict() must take an object where the keys are the named inputs of the model: input_1." pls help here is my code
test.py
import sys
import json
import numpy as np
from collections import defaultdict
# It's very important to put this import before keras,
# as explained here: Loading tensorflow before scipy.misc seems to cause imread to fail #1541
# https://github.com/tensorflow/tensorflow/issues/1541
import scipy.misc
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras import backend as K
from keras.utils import np_utils
import dataset
import net
np.random.seed(1337)
spe = 500
n = 299
batch_size = 128
nb_epoch = 1
nb_phase_two_epoch = 1
data_directory, test_directory, model_file_prefix = sys.argv[1:]
print "loading dataset"
X, y, tags = dataset.dataset(data_directory, n)
nb_classes = len(tags)
sample_count = len(y)
train_size = sample_count * 4 // 5
X_train = X[:train_size]
y_train = y[:train_size]
Y_train = np_utils.to_categorical(y_train, nb_classes)
X_test = X[train_size:]
y_test = y[train_size:]
Y_test = np_utils.to_categorical(y_test, nb_classes)
datagen = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=0,
width_shift_range=0.125,
height_shift_range=0.125,
horizontal_flip=True,
vertical_flip=False,
fill_mode='nearest')
#datagen.fit(X_train)
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
data_directory,
target_size=(299,299),
batch_size=16,
class_mode='categorical'
)
validation_generator = train_datagen.flow_from_directory(
data_directory,
target_size=(299,299),
batch_size=16,
class_mode='categorical'
)
def evaluate(model, vis_filename=None):
Y_pred = model.predict(X_test, batch_size=batch_size)
y_pred = np.argmax(Y_pred, axis=1)
accuracy = float(np.sum(y_test==y_pred)) / len(y_test)
print "accuracy:", accuracy
confusion = np.zeros((nb_classes, nb_classes), dtype=np.int32)
for (predicted_index, actual_index, image) in zip(y_pred, y_test, X_test):
confusion[predicted_index, actual_index] += 1
print "rows are predicted classes, columns are actual classes"
for predicted_index, predicted_tag in enumerate(tags):
print predicted_tag[:7],
for actual_index, actual_tag in enumerate(tags):
print "\t%d" % confusion[predicted_index, actual_index],
print
if vis_filename is not None:
bucket_size = 10
image_size = n // 4 # right now that's 56
vis_image_size = nb_classes * image_size * bucket_size
vis_image = 255 * np.ones((vis_image_size, vis_image_size, 3), dtype='uint8')
example_counts = defaultdict(int)
for (predicted_tag, actual_tag, normalized_image) in zip(y_pred, y_test, X_test):
example_count = example_counts[(predicted_tag, actual_tag)]
if example_count >= bucket_size**2:
continue
image = dataset.reverse_preprocess_input(normalized_image)
image = image.transpose((1, 2, 0))
image = scipy.misc.imresize(image, (image_size, image_size)).astype(np.uint8)
tilepos_x = bucket_size * predicted_tag
tilepos_y = bucket_size * actual_tag
tilepos_x += example_count % bucket_size
tilepos_y += example_count // bucket_size
pos_x, pos_y = tilepos_x * image_size, tilepos_y * image_size
vis_image[pos_y:pos_y+image_size, pos_x:pos_x+image_size, :] = image
example_counts[(predicted_tag, actual_tag)] += 1
vis_image[::image_size * bucket_size, :] = 0
vis_image[:, ::image_size * bucket_size] = 0
scipy.misc.imsave(vis_filename, vis_image)
print "loading original inception model"
model = net.build_model(nb_classes)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=["accuracy"])
# train the model on the new data for a few epochs
print "training the newly added dense layers"
print "samples per eph ",spe#X_train.shape[0]
model.fit_generator(train_generator,
samples_per_epoch=spe,
nb_epoch=nb_epoch,
validation_data=validation_generator,
nb_val_samples=spe,
)
net.save(model, tags, model_file_prefix)
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=["accuracy"])
# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
print "fine-tuning top 2 inception blocks alongside the top dense layers"
for i in range(1,11):
print "mega-epoch %d/10" % i
model.fit_generator(train_generator,
samples_per_epoch=spe,
nb_epoch=nb_phase_two_epoch,
validation_data=validation_generator,
nb_val_samples=spe,
)
#evaluate(model, str(i).zfill(3)+".png")
# evaluate(model, "000.jpg")
net.save(model, tags, model_file_prefix)
when run with keras-js i get the error
Error: predict() must take an object where the keys are the named inputs of the model: input_1.
pls help
Wasn't easy to read your code - indentation was off and I don't really know what's in dataset and the other imports.
That said, the problem is probably the format of X_test. During training, you use output from ImageDatagenerator which rescales the images to (299,299) together with the other manipulations. During evaluation, you use the raw data in X_test directly.
Hope this helps /p