Tensorflow nonsense reshape values - tensorflow

When using tensorflow.keras.layers.Reshape I've been getting strange errors. Where is it getting the 47409408 value from? 207936 corresponds to the correct size (69312*3).
A weird aspect is if I put a flatten layer before the reshape it works.
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 304, 228, 3) 30
_________________________________________________________________
reshape (Reshape) (None, 69312, 3) 0
=================================================================
Total params: 30
Trainable params: 30
Non-trainable params: 0
____________________________________
(0) Invalid argument: Input to reshape is a tensor with 207936 values, but the requested shape has 47409408
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from PIL import Image
from tensorflow.keras import datasets, layers, models, preprocessing
import os
from natsort import natsorted
from tensorflow.keras.models import Model
BATCH_SIZE = 32
EPOCHS = 15
LEARNING_RATE = 1e-4
#jpegs with values from 0 to 255
img_dir = ".../normalized_imgs"
# .npy files of size (69312,3)
pts_dir = ".../normalized_pts"
img_files = [os.path.join(img_dir, f)
for f in natsorted(os.listdir(img_dir))]
pts_files = [os.path.join(pts_dir, f)
for f in natsorted(os.listdir(pts_dir))]
img = Image.open(img_files[0])
pts = np.load(pts_files[0])
def parse_img_input(img_file, pts_file):
def _parse_input(img_file, pts_file):
# get image
d_filepath = img_file.numpy().decode()
d_image_decoded = tf.image.decode_jpeg(tf.io.read_file(d_filepath), channels=1)
d_image = tf.cast(d_image_decoded, tf.float32) / 255.0
# get numpy data
pts_filepath = pts_file.numpy().decode()
pts = np.load(pts_filepath, allow_pickle= True)
print("d_image ",d_image.shape )
return d_image, pts
return tf.py_function(_parse_input,
inp=[img_file, pts_file],
Tout=[tf.float32, tf.float32])
class SimpleCNN(Model):
def __init__(self):
super(SimpleCNN, self).__init__()
input_shape = (img.size[0], img.size[1], 1)
self.model = model = models.Sequential()
model.add(tf.keras.Input(shape= input_shape))
model.add(layers.Conv2D(3, (3,3), padding='same'))
model.add(layers.Reshape((pts.shape[0], pts.shape[1])))
# split input data into train, test sets
X_train_file, X_test_file, y_train_file, y_test_file = train_test_split(img_files, pts_files,
test_size=0.2,
random_state=0)
model = SimpleCNN()
dataset_train = tf.data.Dataset.from_tensor_slices((X_train_file, y_train_file))
dataset_train = dataset_train.map(parse_img_input)
dataset_test = tf.data.Dataset.from_tensor_slices((X_test_file, y_test_file))
dataset_test = dataset_test.map(parse_img_input)
model.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss= tf.losses.MeanSquaredError(), metrics= [tf.keras.metrics.get('accuracy')])
model.fit(dataset_train, epochs=EPOCHS, shuffle=True, validation_data= dataset_test)

Related

InvalidArgumentError: Matrix size-incompatible: In[0]: [256,3], In[1]: [65,1] [[{{node dense_51/BiasAdd}}]]

As I am new please help me resolve. I have been trying since months. Please ignore any small mistake and focus. Thanks in advance. I have been reshaping since I got this code now reached the bottom most layer but still getting this error mentioned above as a heading.
If you need any Hd5 files please let me know your email-id.
import tensorflow as tf
tf.compat.v1.disable_v2_behavior()
from __future__ import print_function, division
import numpy as np
import h5py
import scipy.io
import random
import sys,os
import itertools
import numbers
from collections import Counter
from warnings import warn
from abc import ABCMeta, abstractmethod
from tensorflow import keras, reshape
np.random.seed(1337)
import keras
from keras.optimizers import RMSprop, SGD
from keras.models import Sequential, model_from_yaml
from keras.layers.core import Dense, Dropout, Activation, Flatten
import keras.layers.core as core
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Input, multiply, Reshape
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.layers.wrappers import Bidirectional
from keras.constraints import maxnorm
from keras.layers.recurrent import LSTM, GRU
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Embedding
from sklearn.metrics import fbeta_score, roc_curve, auc, roc_auc_score, average_precision_score
import matplotlib.pyplot as plt
from keras.regularizers import l2, l1, l1_l2
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from keras.engine.topology import Layer
from keras import activations, initializers, regularizers, constraints
from keras.layers import Input
from keras.layers import ActivityRegularization
tf.compat.v1.disable_v2_behavior()
class Attention(tf.keras.layers.Layer):
def __init__(self,hidden ,init='glorot_uniform',activation='linear',W_regularizer=None,b_regularizer=None,W_constraint=None,**kwargs):
self.init = initializers.get(init)
self.activation = activations.get(activation)
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.hidden=hidden
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
input_dim = input_shape[-1]
self.input_length = input_shape[1]
self.W0 = self.add_weight(name ='{}_W1'.format(self.name), shape = (input_dim, self.hidden), initializer = 'glorot_uniform', trainable=True) # Keras 2 API
self.W = self.add_weight( name ='{}_W'.format(self.name), shape = (self.hidden, 1), initializer = 'glorot_uniform', trainable=True)
self.b0 = K.zeros((self.hidden,), name='{}_b0'.format(self.name))
self.b = K.zeros((1,), name='{}_b'.format(self.name))
# self.trainable_weights = [self.W0,self.W,self.b,self.b0]
self.regularizers =[]
if self.W_regularizer:
self.W_regularizer.set_param(self.W)
self.regularizers.append(self.W_regularizer)
if self.b_regularizer:
self.b_regularizer.set_param(self.b)
self.regularizers.append(self.b_regularizer)
self.constraints = {}
if self.W_constraint:
self.constraints[self.W0] = self.W_constraint
self.constraints[self.W] = self.W_constraint
super(Attention, self).build(input_shape)
def call(self,x,mask=None):
attmap = self.activation(K.dot(x, self.W0)+self.b0)
print("self.b.shape=",self.b.shape)
attmap = K.dot(attmap, self.W) + self.b
print("attmap.shape=",attmap.shape)
#till now it was for attention fully connected network/dot product
attmap = K.reshape(attmap, (-1, self.input_length))
attmap = K.softmax(attmap)
print("attmap.shape1=",attmap.shape)
print("x.shape1=",x.shape)
dense_representation = K.batch_dot(attmap, x, axes=(1, 1))
print("dense_representation.shape=",dense_representation.shape)
out = K.concatenate([dense_representation, attmap],axis=1)
print("out.shape=",out.shape)
return out
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1] + input_shape[1])
def get_config(self):
config = {'init': 'glorot_uniform',
'activation': self.activation.__name__,
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
'hidden': self.hidden if self.hidden else None}
base_config = super(Attention, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class attention_flatten(tf.keras.layers.Layer):
def __init__(self, keep_dim, **kwargs):
self.keep_dim = keep_dim
super(attention_flatten, self).__init__(**kwargs)
def build(self, input_shape):
pass
def compute_output_shape(self, input_shape):
if not all(input_shape[1:]):
raise Exception('The shape of the input to "Flatten" '
'is not fully defined '
'(got ' + str(input_shape[1:]) + '. '
'Make sure to pass a complete "input_shape" '
'or "batch_input_shape" argument to the first '
'layer in your model.')
return (input_shape[0], self.keep_dim)
def call(self, x, mask=None):
x=x[1:,:self.keep_dim]
return (x)
def set_up_model_up():
print('building model')
seq_input_shape = (2000,4,)
nb_filter = 64
filter_length = 6
input_shape = (2000,4,)
attentionhidden = 256
seq_input = Input(shape = seq_input_shape, name = 'seq_input')
convul1 = tf.keras.layers.Convolution1D(filters = nb_filter,
kernel_size = filter_length,
padding = 'valid',
activation = 'relu',
kernel_constraint = maxnorm(3),
)
# modil = Sequential()
pool_ma1 = keras.layers.MaxPooling1D(pool_size = 3)
dropout1 = Dropout(0.5977908689086315)
dropout2 = Dropout(0.30131233477637737)
decoder = Attention(hidden = attentionhidden, activation = 'linear')
dense1 = tf.keras.layers.Dense(1)
dense2 = tf.keras.layers.Dense(256)
output_1 = pool_ma1(convul1(seq_input))
output_2 = (output_1)
output_6=tf.reshape(output_2, [1,1995,64])
print("output_2's'shap[2]e=",output_2.shape[2])
print("output_2's'shape=",output_2.shape)
att_decoder = decoder(output_6)
# tf.slice(output_6, begin, size, name=None)
output_3 = attention_flatten(3)(att_decoder)
output_2=tf.reshape(output_2, [1,3, 42560])
output_4 = dense1(((output_2)))
output_4=tf.keras.layers.Flatten()(output_4)
print("output_3p.shape=",output_3.shape)
print("output_4p.shape=",output_4.shape)
all_outp = K.concatenate([output_3, output_4],axis=0)
print(all_outp)
print("all_outp.shape",all_outp.shape)
output_5 = dense2(all_outp)
tf.keras.layers.Add()
output_f = tf.keras.layers.Activation('sigmoid')(output_5)
output_c=tf.keras.layers.Add()(output_f)
modil=tf.keras.models.Model(inputs = seq_input, outputs = output_f)
modil.build(input_shape=input_shape)
modil.compile(loss = 'binary_crossentropy', optimizer = 'nadam', metrics = ['accuracy'])
print (modil.summary())
print("len(modil number of layers)",len(modil.layers))
return modil
def test(n_estimators = 16):
model = set_up_model_up()
model.save_weights('Sequential_model_weights.h5')
model.load_weights('Sequential_model_weights.h5',by_name=True)
X_test = np.load('/content/drive/MyDrive/X_test.npy', mmap_mode='r')
y_test = np.load('/content/drive/MyDrive/y_test.npy', mmap_mode='r')
ensemble = np.zeros(len(X_test))
for i in range(n_estimators):
print ('testing', i, 'model')
print ('model shape is', model.summary)
model.load_weights('/content/drive/MyDrive/model/bestmodel_split_chr_GD_'+ str(i) + '.hdf5')
print ('model shape after loading is', model.summary)
print ('Predicting...')
print ('testing',X_test.shape)
print (len(model.layers))
# y_score = model.predict(np.expand_dims(np.array(X_test, dtype=np.float32), 0), verbose = 1, batch_size = 256)
formatmul= np.empty((3,2000,4), dtype=object)
for x in range(0, 2):
for y in range(0, 1999):
for z in range(0, 3):
formatmul[x][y][z]=X_test[x][y][z]
# y_score = model.predict(X_test).reshape(665,-1), verbose = 1, batch_size = 256)
print("model.output_shape",model.output_shape)
print("model.input_shape",model.input_shape)
# y_score = model.predict(formatmul, batch_size=42560)
y_score = model.predict(np.array(formatmul, dtype=np.float32), batch_size =64)
y_pred = []
for item in y_score:
y_pred.append(item[0])
y_pred = np.array(y_pred)
ensemble += y_pred
ensemble /= n_estimators
np.save('/content/drive/MyDrive/test_result/y_test', y_test)
np.save('/content/drive/MyDrive/test_result/y_pred', ensemble)
auroc = roc_auc_score(y_test, ensemble)
aupr = average_precision_score(y_test, ensemble)
print ('auroc', auroc)
print ('aupr' , aupr)
test(n_estimators = 16)
Stack trace:
WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
building model
output_2's'shap[2]e= 64
output_2's'shape= (?, 665, 64)
self.b.shape= (1,)
attmap.shape= (1, 1995, 1)
attmap.shape1= (1, 1995)
x.shape1= (1, 1995, 64)
dense_representation.shape= (1, 64)
out.shape= (1, 2059)
output_3p.shape= (0, 3)
output_4p.shape= (1, 3)
Tensor("concat:0", shape=(1, 3), dtype=float32)
all_outp.shape (1, 3)
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
conv1d (Conv1D) (None, 1995, 64) 1600 seq_input[0][0]
__________________________________________________________________________________________________
tf_op_layer_Reshape (TensorFlow [(1, 1995, 64)] 0 max_pooling1d[0][0]
__________________________________________________________________________________________________
tf_op_layer_Reshape_1 (TensorFl [(1, 3, 42560)] 0 max_pooling1d[0][0]
__________________________________________________________________________________________________
attention (Attention) (1, 2059) 16897 tf_op_layer_Reshape[0][0]
__________________________________________________________________________________________________
dense (Dense) (1, 3, 1) 42561 tf_op_layer_Reshape_1[0][0]
__________________________________________________________________________________________________
attention_flatten (attention_fl (0, 3) 0 attention[0][0]
__________________________________________________________________________________________________
flatten (Flatten) (1, 3) 0 dense[0][0]
__________________________________________________________________________________________________
tf_op_layer_concat (TensorFlowO [(1, 3)] 0 attention_flatten[0][0]
flatten[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (1, 256) 1024 tf_op_layer_concat[0][0]
__________________________________________________________________________________________________
activation (Activation) (1, 256) 0 dense_1[0][0]
==================================================================================================
Total params: 62,082
Trainable params: 62,082
Non-trainable params: 0
__________________________________________________________________________________________________
None
len(modil number of layers) 10
testing 0 model
model shape is <bound method Model.summary of <tensorflow.python.keras.engine.functional.Functional object at 0x7f5faf959350>>
model shape after loading is <bound method Model.summary of <tensorflow.python.keras.engine.functional.Functional object at 0x7f5faf959350>>
Predicting...
testing (172832, 2000, 4)
10
model.output_shape (1, 256)
model.input_shape (None, 2000, 4)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:2426: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.
warnings.warn('`Model.state_updates` will be removed in a future version. '
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-1-b9a05eeb9fa1> in <module>()
240 # if __name__ == '__main__':
241 # set_up_model_up()
--> 242 test(n_estimators = 16)
5 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs)
1480 ret = tf_session.TF_SessionRunCallable(self._session._session,
1481 self._handle, args,
-> 1482 run_metadata_ptr)
1483 if run_metadata:
1484 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2

Before reshaping xtraindata and xtest data, I got error:
"Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2.". After reshaping xtraindata and xtestdata as (1400,24,24,1) and (600,24,24,1) in order. Then I got error like this:
"Incompatible shapes: [32,1] vs. [32,6,6,1]
[[node mean_squared_error/SquaredDifference (defined at C:\Users\User\Documents\car_person.py:188) ]] [Op:__inference_test_function_7945]
Function call stack:
test_function"
I cannot make evaluate function working on created model. What should I do in order to make test data compatible with model?
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import cv2
import pandas as pd
import tensorflow as tf
import itertools as it
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
except RuntimeError as e:
print(e)
#gpu_options=K.tf.GPUOptions(per_process_gpu_memory_fraction=0.35)
path = "C:/Users/User/Desktop/tunel_data"
training_data=[]
def create_training_data(training_data, path):
categories = ["tunel_data_other", "tunel_data_car"]
for category in categories:
path=os.path.join(path, category)
for img in os.listdir(path):
print(img)
if category=="tunel_data_other":
class_num= 0
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('car'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
elif category=="tunel_data_car":
class_num = 1
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('person'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
path = "C:/Users/User/Desktop/tunel_data"
return training_data
create_training_data(training_data, path)
x=[]
y=[]
for i in range(len(training_data)):
x.append(training_data[i][0])
y.append(training_data[i][1])
#print(x)
#print(y)
x = np.array(x).reshape(2000, 576)
"""
principle_features = PCA(n_components=250)
feature = principle_features.fit_transform(x)
"""
feature = x
label = y
feature_df = pd.DataFrame(feature)
#df = DataFrame (People_List,columns=['First_Name','Last_Name','Age'])
label_df = pd.DataFrame(label)
data = pd.concat([feature_df, label_df], axis=1).to_csv('complete.csv')
data = pd.read_csv("complete.csv")
data = data.sample(frac=1).reset_index(drop=True)
print(data)
x_test, x_train, y_test, y_train = train_test_split(x, y, test_size=0.7, random_state=65)
xtraindata=pd.DataFrame(data=x_train[:,:])
xtestdata=pd.DataFrame(data=x_test[:,:])
print(xtraindata)
ytraindata=pd.DataFrame(data=y_train[:])
ytestdata=pd.DataFrame(data=y_test[:])
print(ytraindata)
xtraindata = np.asarray(xtraindata)
ytraindata = np.asarray(ytraindata)
xtestdata = np.asarray(xtestdata)
ytestdata = np.asarray(ytestdata)
x=np.asarray(x)
y=np.asarray(y)
xtraindata = xtraindata.reshape(1400,24,24,1)
xtestdata = xtestdata.reshape(600,24,24,1)
activation = ["tanh", "relu", "sigmoid", "softmax"]
input_size1 = range(10)
input_size2 = range(10)
k_scores = []
in_size = []
possible = list(it.permutations(activation, 4))
for c in possible:
for i in input_size1:
for a in input_size2:
model = tf.keras.Sequential([tf.keras.layers.Conv2D(256, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(512, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Dense(250, activation=c[0]),
tf.keras.layers.Dense(i, activation=c[1]),
tf.keras.layers.Dense(a, activation=c[2]),
tf.keras.layers.Dense(1, activation=c[3])])
model.compile(optimizer='sgd', loss='mse')
val_loss = model.evaluate(xtestdata, ytestdata, verbose=1)
k_scores.append(val_loss)
in_size.append([i,a])
print(k_scores)
print("Best activation functions for each layer:", possible[(k_scores.index((min(k_scores)))) % len(possible)],
"/n Best input sizes:", "840", in_size[k_scores.index((min(k_scores)))][0], in_size[k_scores.index((min(k_scores)))][1], "1")
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(250, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][0]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][0], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][1]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][1], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][2]))
model.add(tf.keras.layers.Dense(1, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][3]))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", "mse"])
model.fit(x, y, batch_size=16, epochs=5)
predictions = model.predict([x_test])
print(predictions)
print(predictions.shape)
output layer size is different. you want size (32, 1) but model's output is (32, 6, 6, 1)
insert Flatten() between MaxPooling2D and Dense() maybe this work's well.
and here is the tip. .evaluate method is only for trained model. you should use .fit first.

How to make a Keras Dense Layer deal with 3D tensor as input for this Softmax Fully Connected Layer?

I am working on a custom problem, and i have to change the fully connected layer (Dense with softmax), My model code is something like this (with Keras Framework):
.......
batch_size = 8
inputs = tf.random.uniform(shape=[batch_size,1024,256],dtype=tf.dtypes.float32)
preds = Dense(num_classes,activation='softmax')(x) #final layer with softmax activation
....
model = Model(inputs=base_model.input,outputs=preds)
So, i have to change the Code of Dense Layer to output a Tensor of probabilities with the shape of [batch_size, 1024, num_classes], without using a for loop, i need it to be optimized and not a consuming time function
The Dense code version that i want to change:
class Dense(Layer):
"""Just your regular densely-connected NN layer.
`Dense` implements the operation:
`output = activation(dot(input, kernel) + bias)`
where `activation` is the element-wise activation function
passed as the `activation` argument, `kernel` is a weights matrix
created by the layer, and `bias` is a bias vector created by the layer
(only applicable if `use_bias` is `True`).
Note: if the input to the layer has a rank greater than 2, then
it is flattened prior to the initial dot product with `kernel`.
# Example
```python
# as first layer in a sequential model:
model = Sequential()
model.add(Dense(32, input_shape=(16,)))
# now the model will take as input arrays of shape (*, 16)
# and output arrays of shape (*, 32)
# after the first layer, you don't need to specify
# the size of the input anymore:
model.add(Dense(32))
```
# Arguments
units: Positive integer, dimensionality of the output space.
activation: Activation function to use
(see [activations](../activations.md)).
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
# Input shape
nD tensor with shape: `(batch_size, ..., input_dim)`.
The most common situation would be
a 2D input with shape `(batch_size, input_dim)`.
# Output shape
nD tensor with shape: `(batch_size, ..., units)`.
For instance, for a 2D input with shape `(batch_size, input_dim)`,
the output would have shape `(batch_size, units)`.
"""
def __init__(self, units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(Dense, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = K.bias_add(output, self.bias)
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def get_config(self):
config = {
'units': self.units,
'activation': activations.serialize(self.activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint)
}
base_config = super(Dense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
There are three different ways in which this can be done (that I can think of). If you want to have a single dense layer, that maps a vector of 256 elements to a vector of num_classes elements, and apply it all across your batch of data (that is, use the same 256 x num_classes matrix of weights for every sample), then you don't need to do anything special, just use a regular Dense layer:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2570
Another way would be to have a single huge Dense layer that takes all 1024 * 256 values in at the same time and produces all 1024 * num_classes values at the output, that is, a layer with a matrix of weights with shape (1024 * 256) x (1024 * num_classes) (in the order if gigabytes of memory!). This is easy to do too, although it seems unlikely to be what you need:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Flatten, Dense, Reshape, Softmax
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
res = Flatten()(inp)
# This takes _a lot_ of memory!
layer = Dense(1024 * num_classes, activation=None)
out_res = layer(res)
# Apply softmax after reshaping
out_preact = Reshape((-1, num_classes))(out_res)
out = Softmax()(out_preact)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2684364800
Finally, you may want to have a set of 1024 weight matrices, each one applied to the corresponding sample in the input, which would imply an array of weights with shape (1024, 256, num_classes). I don't think this can be done with one of the standard Keras layers (or don't know how to)1, but it's easy enough to write a custom layer based on Dense to do that:
import tensorflow as tf
from tensorflow.keras.layers import Dense, InputSpec
class Dense2D(Dense):
def __init__(self, *args, **kwargs):
super(Dense2D, self).__init__(*args, **kwargs)
def build(self, input_shape):
assert len(input_shape) >= 3
input_dim1 = input_shape[-2]
input_dim2 = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim1, input_dim2, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(input_dim1, self.units),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=3, axes={-2: input_dim1, -1: input_dim2})
self.built = True
def call(self, inputs):
# Multiply each set of weights with each input element
output = tf.einsum('...ij,ijk->...ik', inputs, self.kernel)
if self.use_bias:
output += self.bias
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 3
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
You would then use it like this:
import tensorflow as tf
from tensorflow.keras import Input
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = Dense2D(num_classes, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680
1: As today points out in the comments, you can actually use a LocallyConnected1D layer to do the same that I tried to do with my Dense2D layer. It is as simple as this:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import LocallyConnected1D
batch_size = 8
num_classes = 10
inp = Input(shape=(1024, 256))
layer = LocallyConnected1D(num_classes, 1, activation='softmax')
out = layer(inp)
print(out.shape)
# (None, 1024, 10)
print(layer.count_params())
# 2631680

ValueError: in case of LSTM with `stateful=True`

I tried to use LSTM network with stateful=True as follows:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import LambdaCallback
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
raw = np.sin(2*np.pi*np.arange(1024)/float(1024/2))
data = pd.DataFrame(raw)
window_size = 3
data_s = data.copy()
for i in range(window_size):
data = pd.concat([data, data_s.shift(-(i+1))], axis = 1)
data.dropna(axis=0, inplace=True)
print (data)
ds = data.values
n_rows = ds.shape[0]
ts = int(n_rows * 0.8)
train_data = ds[:ts,:]
test_data = ds[ts:,:]
train_X = train_data[:,:-1]
train_y = train_data[:,-1]
test_X = test_data[:,:-1]
test_y = test_data[:,-1]
print (train_X.shape)
print (train_y.shape)
print (test_X.shape)
print (test_y.shape)
(816, 3)
(816,)
(205, 3)
(205,)
batch_size = 3
n_feats = 1
train_X = train_X.reshape(train_X.shape[0], batch_size, n_feats)
test_X = test_X.reshape(test_X.shape[0], batch_size, n_feats)
print(train_X.shape, train_y.shape)
regressor = Sequential()
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
resetCallback = LambdaCallback(on_epoch_begin=lambda epoch,logs: regressor.reset_states())
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
previous_inputs = test_X
regressor.reset_states()
previous_predictions = regressor.predict(previous_inputs).reshape(-1)
test_y = test_y.reshape(-1)
plt.plot(test_y, color = 'blue')
plt.plot(previous_predictions, color = 'red')
plt.show()
However, I got:
ValueError: Error when checking target: expected dense_1 to have 3 dimensions, but got array with shape (816, 1)
PS this code has been adapted from https://github.com/danmoller/TestRepo/blob/master/testing%20the%20blog%20code%20-%20train%20and%20pred.ipynb
Two minor bugs:
Here you have
regressor.add(LSTM(units = 64, batch_input_shape=(train_X.shape[0], batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=True))
This LSTM will return a 3D vector, but your y is 2D which throws a valuerror. You can fix this with return_sequences=False. I'm not sure why you initially had train_X.shape[0] inside of your batch_input, the number of samples in your entire set shouldn't affect the size of each batch.
regressor.add(LSTM(units = 64, batch_input_shape=(1, batch_size, n_feats),
activation = 'sigmoid',
stateful=True, return_sequences=False))
After this you have
regressor.fit(train_X, train_y, batch_size=7, epochs = 1, callbacks=[resetCallback])
In a stateful network you can only put in a number of inputs that divides the batch size. Since 7 doesn't divide 816 we change this to 1:
regressor.fit(train_X, train_y, batch_size=1, epochs = 1, callbacks=[resetCallback])
The same goes in your predict. You must specify batch_size=1:
previous_predictions = regressor.predict(previous_inputs, batch_size=1).reshape(-1)

Keras : Value Error | Shape Miss-match while transfer the pretrained model weight

I have pre-trained a model and save.Now I want to use it for train - test purpose of another dataset. Kind of like transfer learning.
My model architecture is given below.
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d_1 (Conv1D) (None, 298, 32) 128
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 149, 32) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 4768) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 4768) 0
_________________________________________________________________
dense_1 (Dense) (None, 128) 610432
_________________________________________________________________
dense_2 (Dense) (None, 4) 516
=================================================================
Total params: 611,076
Trainable params: 611,076
Non-trainable params: 0
_________________________________________________________________
So, now when i load this model to train-test another model, I am getting some error. I am not sure about it. I am giving my program below.
# -*- coding: utf-8 -*-
"""Created on Mon Dec 18 23:18:36 2017
#author: Md Junayed Hasan
"""
# PART 1 - Data preprocessing
import numpy as np
import pandas as pd
# importing the dataset
# Description: Dataset has row : 5969 and Column : 301
dataset = pd.read_csv('dataset.csv')
# Independent Variables
# In x taking cloumn till 300
x = dataset.iloc[:, :-1].values
# Dependent Variables
y = dataset.iloc[:, 300:301].values
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y[:, 0] = labelencoder.fit_transform(y[:, 0])
classes = list(labelencoder.classes_)
# split train data into train and validation
from sklearn.cross_validation import train_test_split
x_train,x_valid, y_train, y_valid = train_test_split(x,y, test_size=0.8, random_state=23)
# Feature Scaling / Normalization / Standardization
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_valid = sc_x.fit_transform(x_valid)
# Initializers
nb_filters = 32 # number of feature detector
nb_kernel_size = 3
nb_strides_conv = 1
nb_features = len(np.transpose(x_train)) # sample size
nb_total_samples = len(x_train) # sample number
nb_pool_size = 2
nb_strides_pool = 2
nb_dropout = 0.1
nb_labels = len(classes)
nb_epochs = 2 # for example
nb_batch_size = 15
nb_channels = 1
# 1 D CNN converting DATA into 3D tensor is must
x_train_r= np.reshape(x_train,(len(x_train),nb_features,nb_channels))
x_valid_r= np.reshape(x_valid,(len(x_valid),nb_features,nb_channels))
# CNN Func
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.models import load_model
def CNN_1D(nb_channels,nb_labels,x,y,x_valid_r,y_valid):
model = Sequential()
model.add(Dense(128,input_dim =4768, activation = 'relu'))
model.add(Dense(32, activation = 'relu' ))
model.add(Dense(nb_labels, activation = 'softmax'))
model.load_weights('Scenario1- WC1-2 - WEIGHTS.h5')
sgd = SGD(lr=0.01, nesterov=True, decay=1e-6, momentum=0.9)
model.compile(loss='sparse_categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
y_pred = model.predict(x)
y_pred = (y_pred > 0.5)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_valid, y_pred)
print("FINISHED")
pass
# PART 3 - 1D CNN Function Call
CNN_1D(nb_channels,nb_labels,x,y,x_valid_r,y_valid)
while getting output, I am getting this error.
ValueError: Shapes must be equal rank, but are 2 and 3 for 'Assign' (op: 'Assign') with input shapes: [4768,128], [3,1,32].
I put my details network architecture and dataset architecture also. Please help me to figure it out.
Both models you describe are completely different.
While your model has convolutional layers, pooling layers, etc., you're trying to transfer the weights to a model made only of Dense layers.
This is simply not possible.
The only model that can receive those weights is:
model = Sequential()
model.add(Conv1D(32,
kernel_size,
input_shape=(inputLength,inputChannels),
activation = any
padding=validOrSame))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(128, activation = any))
model.add(Dense(4, activation = any))
model.load_weights('Scenario1- WC1-2 - WEIGHTS.h5')
By the calculations in the summary you can have these combinations:
kernel_size=3; inputChannels=1
kernel_size=1; inputChannels=3
Only one of the above options will work.
And:
nputLength=300; validOrSame='valid'
inputLength=298; validOrSame='same'
Only you may know what your original model was to decide about those vars.
Transfering part of the weights
If you create a model compatible with the weights and call it oldModel:
oldModel.load_weights('Scenario1- WC1-2 - WEIGHTS.h5')
Then you create the new model changing the last Dense layer from 4 to 6 units and call it newModel.
After having both models, you transfer the weights:
for i in range(len(oldModel.layers)-1): #loop excluding the last layer
newModel.layers[i].set_weights(oldModel.layers[i].get_weights())