Create timeseries dataset for TensorFlow v2 - tensorflow

I'm trying to feed a model CNN+LSTM with data from a csv. What I'm missing is setting a correct dataset to start training my model.
This is my test model:
def test_model():
model = models.Sequential()
model.add(TimeDistributed(Conv1D(32, 4, strides=1, activation='relu', padding="valid"), input_shape=[None, 6, 20]))
model.add(TimeDistributed(MaxPooling1D(pool_size=2), input_shape=[None, 6, 20]))
model.add(TimeDistributed(Conv1D(64, 4, strides=1, activation='relu', padding="valid"), input_shape=[None, 6, 20]))
model.add(TimeDistributed(MaxPooling1D(pool_size=2), input_shape=[None, 6, 20]))
model.add(TimeDistributed(Flatten(), input_shape=[None, 6, 20]))
model.add(LSTM(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(100, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(5, activation="softmax"))
model.compile(optimizer=keras.optimizers.Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
return model
Here you can download test.csv
CSV dataset is made of 6 features + 1 multilabel. I need to create a time series each 20 row, so I think my shape should be [None, 6, 20] ordered by ascending timestamp value.
I'm new in TensorFlow and I don't know how to create an appropriate dataset from scratch, I was able to load a dataset from directory with images (for CNN) but in this scenario I really don't know how to do it.
This is what I tried to generate my dataset:
with open('test.csv') as csvfile:
dataset = list()
reader = csv.reader(csvfile, delimiter=',')
next(reader)
count = 0
timeseries = list()
labels = list()
for row in reader:
count = count + 1
if count <= 20:
timeseries.append(
[float(row[0]), float(row[1]), float(row[2]), float(row[3]), float(row[4]), float(row[5])])
else:
dataset.append(timeseries)
labels.append(int(row[6].split("L")[-1]))
timeseries = list()
count = 0
After that I transformed it in a tf.DataSet like this:
dataset = tf.data.Dataset.from_tensor_slices(dataset)
labels = tf.data.Dataset.from_tensor_slices(labels)
Here I got a <TensorSliceDataset shapes: (20, 6), types: tf.float32> like I want. Then I fed a K-Fold with it:
estimator = KerasClassifier(build_fn=test_model, epochs=60, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, dataset, labels, cv=kfold)
When running cross_val_score I got this error:
TypeError: Singleton array array(<TensorSliceDataset shapes: (20, 6), types: tf.float32>, dtype=object) cannot be considered a valid collection.
What I'm missing?

Related

Is it possible to concatenate GNN with a Dense layer Keras API?

I need to concatenate a GNN with a dense architecture. Both are 2 types of data that should be concatenated, but I'm having an error. This is the architecture
# GNN
fltr = GCNConv.preprocess(A) # A is the adjacency matrix
X_in = Input(shape=(N, F))
A_in = Input(tensor=sp_matrix_to_sp_tensor(fltr))
graph_conv = GCNConv(32, activation=‘elu')([X_in, A_in])
graph_conv =Dropout(0.2)(graph_conv)
graph_conv = GCNConv(16, activation=‘elu')([X_in, A_in])
graph_conv =Dropout(0.2)(graph_conv)
flatten = Flatten()(graph_conv)
fc = Dense(256, activation='relu')(flatten)
# DENSE
# Define the model architecture
input_shelf_tensor = Input(shape=(24,))
model_shelves = Dense(64, activation='relu')(input_shelf_tensor)
# CONCATENATE
concatenated_model = tf.keras.layers.Concatenate(axis=1)([model_shelves, fc]) # tf.keras.layers.Concatenate(axis=1)([model_shelves, fc])
concatenated_model = Dense(512, activation='relu')(concatenated_model)
final_output_layer = Dense(1, activation='softmax')(concatenated_model)
#compile and train
modelG = Model(inputs=[X_in, A_in, input_shelf_tensor], outputs=final_output_layer)
modelG.compile(optimizer='adam', loss='binary_crossentropy’)
history = modelG.fit([X_train, fltr, Shelf_Input], y_train, epochs=1, batch_size=32, validation_split=0.2)
The GNN is working separately, but when merging, the following error is raising:
ValueError: The two structures don't have the same nested structure.
First structure: type=TensorSpec str=TensorSpec(shape=(2096, 24), dtype=tf.float64, name=None)
Second structure: type=SparseTensor str=SparseTensor(indices=Tensor("SparseReorder_1:0", shape=(52, 2), dtype=int64), values=Tensor("SparseReorder_1:1", shape=(52,), dtype=float64), dense_shape=Tensor("SparseTensor_3/dense_shape:0", shape=(2,), dtype=int64))
More specifically: Substructure "type=SparseTensor str=SparseTensor(indices=Tensor("SparseReorder_1:0", shape=(52, 2), dtype=int64), values=Tensor("SparseReorder_1:1", shape=(52,), dtype=float64), dense_shape=Tensor("SparseTensor_3/dense_shape:0", shape=(2,), dtype=int64))" is a sequence, while substructure "type=TensorSpec str=TensorSpec(shape=(2096, 24), dtype=tf.float64, name=None)" is not
Entire first structure:
.
Entire second structure:
Someone can tell me if is it possible to merge this 2 types of architectures?
thanks!

Can't get multi-output CNN to work (tensorflow and keras)

I'm currently working on a task of fiber tip tracking on an endoscopic video.
For this purpose I have two models:
classifier that tells whether image contains fiber (is_visible)
regressor that predicts fiber tip position (x, y)
I am using ResNet18 pretrained on ImageNet for this purpose and it works great. But I'm experiencing performance issues,
so I decided to combine these two models into a single one using multi-output approach.
But so far I haven't been able to get it to work.
TENSORFLOW:
TensorFlow version: 2.10.1
DATATSET:
My dataset is stored in a HDF5 format. Each sample has:
an image (224, 224, 3)
uint8 for visibility flag
and two floats for fiber tip position (x, y)
I am loading this dataset using custom generator as follows:
output_types = (tf.float32, tf.uint8, tf.float32)
output_shapes = (
tf.TensorShape((None, image_height, image_width, number_of_channels)), # image
tf.TensorShape((None, 1)), # is_visible
tf.TensorShape((None, 1, 1, 2)), # x, y
)
train_dataset = tf.data.Dataset.from_generator(
generator, output_types=output_types, output_shapes=output_shapes,
)
MODEL:
My model is defined as follows:
model = ResNet18(input_shape=(224, 224, 3), weights="imagenet", include_top=False)
inputLayer = model.input
innerLayer = tf.keras.layers.Flatten()(model.output)
is_visible = tf.keras.layers.Dense(1, activation="sigmoid", name="is_visible")(innerLayer)
position = tf.keras.layers.Dense(2)(innerLayer)
position = tf.keras.layers.Reshape((1, 1, 2), name="position")(position)
model = tf.keras.Model(inputs=[inputLayer], outputs=[is_visible, position])
adam = tf.keras.optimizers.Adam(1e-4)
model.compile(
optimizer=adam,
loss={
"is_visible": "binary_crossentropy",
"position": "mean_squared_error",
},
loss_weights={
"is_visible": 1.0,
"position": 1.0
},
metrics={
"is_visible": "accuracy",
"position": "mean_squared_error"
},
)
PROBLEM:
Dataset is working great, I can loop through each batch. But when it comes to training
model.fit(
train_dataset,
validation_data=validation_dataset,
epochs=100000,
callbacks=callbacks,
)
I get the following error
ValueError: Can not squeeze dim[3], expected a dimension of 1, got 2 for '{{node mean_squared_error/weighted_loss/Squeeze}} = SqueezeT=DT_FLOAT, squeeze_dims=[-1]' with input shapes: [?,1,1,2].
I tried to change the dataset format like so:
output_types = (tf.float32, tf.uint8, tf.float32, tf.float32)
output_shapes = (
tf.TensorShape((None, image_height, image_width, number_of_channels)), # image
tf.TensorShape((None, 1)), # is_visible
tf.TensorShape((None, 1)), # x
tf.TensorShape((None, 1)), # y
)
But these leads to another error:
ValueError: Data is expected to be in format x, (x,), (x, y), or (x, y, sample_weight), found: (<tf.Tensor 'IteratorGetNext:0' shape=(None, 224, 224, 3) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(None, 1) dtype=uint8>, <tf.Tensor 'IteratorGetNext:2' shape=(None, 1) dtype=float32>, <tf.Tensor 'IteratorGetNext:3' shape=(None, 1) dtype=float32>)
I tried to wrap is_visible and (x,y) returned from train_dataset into dictionary like so:
yield image_batch, {"is_visible": is_visible_batch, "position": position_batch}
Also tried these options:
yield image_batch, (is_visible_batch, position_batch)
yield image_batch, [is_visible_batch, position_batch]
But that didn't help
Can anyone tell me what am I doing wrong? I am totally stuck ))

Can't resolve ValueError: as_list() is not defined on an unknown TensorShape

I'm busy creating a pre-processing pipeline in a tensorflow dataset that takes in a list of the relative paths to files, decodes the file name from a bytes string to a regular string, loads the numpy array (which contains mel-frequency cepstral coefficients), reshapes it to have one channel, i.e. adds a dimension with size 1 on the end, extracts the corresponding label by using the parent directory name (the parent directory name indicates the class), and then returns the array and label.
I've read up about this problem but nothing seems to work. I tried setting the shape in the function, but it was to no avail.
Would appreciate any help.
Here's the relevant code:
def get_mfccs_and_label(file_path):
output_shape = (36, 125, 1)
file_path = file_path.decode()
emotion = file_path.split("/")[-2]
combined_mfccs = np.load(file_path)
combined_mfccs = tf.convert_to_tensor(combined_mfccs)
combined_mfccs = tf.reshape(combined_mfccs, output_shape)
combined_mfccs.set_shape(output_shape)
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sadness', 'surprise']
category_encoder = tf.keras.layers.CategoryEncoding(num_tokens=7,
output_mode="one_hot")
one_hot_encoded_label = category_encoder(emotions.index(emotion))
one_hot_encoded_label.set_shape(7)
return combined_mfccs, one_hot_encoded_label
combined_mfcc_files = glob.glob("challengeA_data/combined_mfccs/*/*.npy")
files_ds = tf.data.Dataset.from_tensor_slices(combined_mfcc_files)
ds = files_ds.map(lambda file: tf.numpy_function(get_mfccs_and_label, [file], [tf.float32, tf.float32]),
num_parallel_calls=tf.data.AUTOTUNE)
ds = ds.shuffle(buffer_size=100)
num_instances = len(ds)
num_train = int(num_instances * 0.8)
num_val = int(num_instances * 0.2)
train_ds = ds.take(num_train)
val_ds = ds.skip(num_train)
batch_size = 64
train_ds = train_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
model = models.Sequential([
layers.Input(shape=(36, 125, 1)),
layers.Conv2D(8, 5, activation="relu"),
layers.MaxPool2D(2),
layers.Dropout(0.2),
layers.Conv2D(16, 5, activation="relu"),
layers.MaxPool2D(2),
layers.Dropout(0.2),
layers.Conv2D(200, 5, activation="relu"),
layers.MaxPool2D(2),
layers.Dropout(0.2),
layers.Flatten(),
layers.Dense(1024, activation="relu"),
layers.Dropout(0.5),
layers.Dense(512, activation="relu"),
layers.Dropout(0.5),
layers.Dense(7, activation="softmax")
])
model.summary()
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=["accuracy"]
)
EPOCHS = 10
# ----> "as_list()..." error raised when calling model.fit()
cnn_with_combined_mfcc_history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS)

Dimensions must be equal, but are 2 and 3 for node binary_crossentropy/mul

I was checking the code I found here, the example at Multivariate Multi-Step LSTM Models - > Multiple Input Multi-Step Output.
I altered the code and used binary_crossentropy and sigmoid activation for the last layer.
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
X, y = list(), list()
for i in range(len(sequences)):
# find the end of this pattern
end_ix = i + n_steps_in
out_end_ix = end_ix + n_steps_out-1
# check if we are beyond the dataset
if out_end_ix > len(sequences):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1:out_end_ix, -1]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
# define input sequence
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
# choose a number of time steps
n_steps_in, n_steps_out = 3, 3
# convert into input/output
X, y = split_sequences(dataset, n_steps_in, n_steps_out)
n_features = X.shape[2]
# define model
model = Sequential()
model.add((LSTM(5, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features))))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# fit model
model.fit(X, y, epochs=20, verbose=0, batch_size=1)
The above code runs fine. But, when I try to change the n_steps_in, n_steps_out and use for example: n_steps_in, n_steps_out = 3, 2, it gives:
ValueError: Dimensions must be equal, but are 2 and 3 for '{{node binary_crossentropy/mul}} = Mul[T=DT_FLOAT](binary_crossentropy/Cast, binary_crossentropy/Log)' with input shapes: [1,2], [1,3].
Why this error comes up and how can I overcome this?
this is because your network is build to output 3D sequences of shape (None, 3, 1) while your targets have shape (None, 2, 1)
The best and automated way to handle this situation correctly is to build an encoder-decoder structure... Below the example:
model = Sequential()
model.add(LSTM(5, activation='relu', return_sequences=False,
input_shape=(n_steps_in, n_features))) # ENCODER
model.add(RepeatVector(n_steps_out))
model.add(LSTM(5, activation='relu', return_sequences=True)) # DECODER
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=20, batch_size=1)

error: The model expects 3 input arrays, but only received one array. Found: array with shape (10, 20, 50, 50, 1)

main_model = Sequential()
main_model.add(Conv3D(32, 3, 3,3, input_shape=(20,50,50,1)))'
main_model.add(Activation('relu'))
main_model.add(MaxPooling3D(pool_size=(2, 2,2))
main_model.add(Conv3D(64, 3, 3,3))
main_model.add(Activation('relu'))
main_model.add(MaxPooling3D(pool_size=(2, 2,2)))
main_model.add(Dropout(0.8))
main_model.add(Flatten())
#lower features model - CNN2
lower_model1 = Sequential()
lower_model1.add(Conv3D(32, 3, 3,3, input_shape=(20,50,50,1)))
lower_model1.add(Activation('relu'))
lower_model1.add(MaxPooling3D(pool_size=(2, 2,2)))
lower_model1.add(Dropout(0.8))
lower_model1.add(Flatten())
#lower features model - CNN3
lower_model2 = Sequential()
lower_model2.add(Conv3D(32, 3, 3,3, input_shape=(20,50,50,1)))
lower_model2.add(Activation('relu'))
lower_model2.add(MaxPooling3D(pool_size=(2, 2,2)))
lower_model2.add(Dropout(0.8))
lower_model2.add(Flatten())
merged_model = Merge([main_model, lower_model1,lower_model2],mode='concat')
final_model = Sequential()
final_model.add(merged_model)
final_model.add(Dense(1024,init='normal'))
final_model.add(Activation('relu'))
final_model.add(Dropout(0.5))
final_model.add(Dense(2,init='normal'))
final_model.add(Activation('softmax'))
final_model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=
['accuracy'])
train=train_data[-10:]
test=train_data[-2:]
X = np.array([i[0] for i in train]).reshape(-1,20,50,50,1)
Y = [i[1] for i in train]
test_x = np.array([i[0] for i in test]).reshape(-1,20,50,50,1)
test_y = [i[1] for i in test]
final_model.fit(np.array(X),np.array(Y),validation_data=
(np.array(test_x),np.array(test_y)),batch_size=batch_size,nb_epoch =
nb_epoch,validation_split=0.2,shuffle=True,verbose=1)
i'm using 50x50 images contained in 20 chunk and that's y my numpy array is 20x50x50
1st and 2nd modelsI'm using sequential model for multi scale 3d cnn network...i don't know y i'm getting this kind of result
see val_acc,val_loss stays the same in every epoch