Multiple inputs of keras model with tf.data.Dataset.from_generator in Tensorflow 2 - tensorflow

I am trying to implement a model in keras that will have multiple inputs:
image (200x200)
some numbers (1x50)
three 1d signals (1x50000, 2x100000)
To feed that model, I want to write a generator to use with tf.data.Dataset.from_generator. From the docs of from_generator, its not clear to me how I should provide its parameters output_types, output_shapes. Can anyone help me with this?

I had a similar issue, and it took me many tries to get the structure right for those inputs. Here's an example of a network with 3 inputs and 2 outputs, complete to the .fit call.
The following works in tensorflow 2.1.0
import tensorflow as tf
import numpy as np
def generator(N=10):
"""
Returns tuple of (inputs,outputs) where
inputs = (inp1,inp2,inp2)
outputs = (out1,out2)
"""
dt=np.float32
for i in range(N):
inputs = (np.random.rand(N,3,3,1).astype(dt),
np.random.rand(N,3,3,1).astype(dt),
np.random.rand(N,3,3,1).astype(dt))
outputs = (np.random.rand(N,3,3,1).astype(dt),
np.random.rand(N,3,3,1).astype(dt))
yield inputs,outputs
# Create dataset from generator
types = ( (tf.float32,tf.float32,tf.float32),
(tf.float32,tf.float32) )
shapes = (([None,3,3,1],[None,3,3,1],[None,3,3,1]),
([None,3,3,1],[None,3,3,1]))
data = tf.data.Dataset.from_generator(generator,
output_types=types,
output_shapes=shapes
)
# Define a model
inp1 = tf.keras.Input(shape=(3,3,1),name='inp1')
inp2 = tf.keras.Input(shape=(3,3,1),name='inp2')
inp3 = tf.keras.Input(shape=(3,3,1),name='inp3')
out1 = tf.keras.layers.Conv2D(1,kernel_size=3,padding='same')(inp1)
out2 = tf.keras.layers.Conv2D(1,kernel_size=3,padding='same')(inp2)
model = tf.keras.Model(inputs=[inp1,inp2,inp3],outputs=[out1,out2])
model.compile(loss=['mse','mse'])
# Train
model.fit(data)

So assuming you have a generator that is similar to this mock:
def dummy_generator():
number_of_records = 100
for i in range(100):
an_image = tf.random.uniform((200,200,3))
some_numbers = tf.random.uniform((50,))
signal1 = tf.random.uniform((50000,))
signal2 = tf.random.uniform((100000,))
signal3 = tf.random.uniform((100000,))
yield an_image, some_numbers, signal1, signal2, signal3
each record is of datatype float32 so the output types are easy:
out_types = (tf.float32, tf.float32, tf.float32, tf.float32, tf.float32)
for the output shapes we just list the shapes in the same order:
out_shapes = ((200,200,3), (50,), (50000,), (100000,), (100000,))
so now we can just call from_generator:
ds = tf.data.Dataset.from_generator(dummy_generator,
output_types=out_types,
output_shapes=out_shapes)

model.fit([input_1, input_2, input_3], y, epochs=EPOCHS)
You got to have n(3 in the case above) input layers in your model.

Related

How do you fit a tf.Dataset to a Keras Autoencoder Model when the Dataset has been generated using TFX?

Problem
As the title suggests I have been trying to create a pipeline for training an Autoencoder model using TFX. The problem I'm having is fitting the tf.Dataset returned by the DataAccessor.tf_dataset_factory object to the Autoencoder.
Below I summarise the steps I've taken through this project, and have some Questions at the bottom if you wish to skip the background information.
Intro
TFX Pipeline
The TFX components I have used so far have been:
CsvExampleGenerator (the dataset has 82 columns, all numeric, and the sample csv has 739 rows)
StatisticsGenerator / SchemaGenerator, the schema has been edited as is now loaded in using an Importer
Transform
Trainer (this is the component I am currently having problems with)
Model
The model that I am attempting to train is based off of the example laid out here https://www.tensorflow.org/tutorials/generative/autoencoder. However, my model is being trained on tabular data, searching for anomalous results, as opposed to image data.
As I have tried a couple of solutions I have tried using both the Keras.layers and Keras.model format for defining the model and I outline both below:
Subclassing Keras.Model
class Autoencoder(keras.models.Model):
def __init__(self, features):
super(Autoencoder, self).__init__()
self.encoder = tf.keras.Sequential([
keras.layers.Dense(82, activation = 'relu'),
keras.layers.Dense(32, activation = 'relu'),
keras.layers.Dense(16, activation = 'relu'),
keras.layers.Dense(8, activation = 'relu')
])
self.decoder = tf.keras.Sequential([
keras.layers.Dense(16, activation = 'relu'),
keras.layers.Dense(32, activation = 'relu'),
keras.layers.Dense(len(features), activation = 'sigmoid')
])
def call(self, x):
inputs = [keras.layers.Input(shape = (1,), name = f) for f in features]
dense = keras.layers.concatenate(inputs)
encoded = self.encoder(dense)
decoded = self.decoder(encoded)
return decoded
Subclassing Keras.Layers
def _build_keras_model(features: List[str]) -> tf.keras.Model:
inputs = [keras.layers.Input(shape = (1,), name = f) for f in features]
dense = keras.layers.concatenate(inputs)
dense = keras.layers.Dense(32, activation = 'relu')(dense)
dense = keras.layers.Dense(16, activation = 'relu')(dense)
dense = keras.layers.Dense(8, activation = 'relu')(dense)
dense = keras.layers.Dense(16, activation = 'relu')(dense)
dense = keras.layers.Dense(32, activation = 'relu')(dense)
outputs = keras.layers.Dense(len(features), activation = 'sigmoid')(dense)
model = keras.Model(inputs = inputs, outputs = outputs)
model.compile(
optimizer = 'adam',
loss = 'mae'
)
return model
TFX Trainer Component
For creating the Trainer Component I have been mainly following the implementation details laid out here: https://www.tensorflow.org/tfx/guide/trainer
As well as following the default penguins example: https://www.tensorflow.org/tfx/tutorials/tfx/penguin_simple#write_model_training_code
run_fn defintion
def run_fn(fn_args: tfx.components.FnArgs) -> None:
tft_output = tft.TFTransformOutput(fn_args.transform_output)
train_dataset = _input_fn(
file_pattern = fn_args.train_files,
data_accessor = fn_args.data_accessor,
tf_transform_output = tft_output,
batch_size = fn_args.train_steps
)
eval_dataset = _input_fn(
file_pattern = fn_args.eval_files,
data_accessor = fn_args.data_accessor,
tf_transform_output = tft_output,
batch_size = fn_args.custom_config['eval_batch_size']
)
# model = Autoencoder(
# features = fn_args.custom_config['features']
# )
model = _build_keras_model(features = fn_args.custom_config['features'])
model.compile(optimizer = 'adam', loss = 'mse')
model.fit(
train_dataset,
steps_per_epoch = fn_args.train_steps,
validation_data = eval_dataset,
validation_steps = fn_args.eval_steps
)
...
_input_fn definition
def _apply_preprocessing(raw_features, tft_layer):
transformed_features = tft_layer(raw_features)
return transformed_features
def _input_fn(
file_pattern,
data_accessor: tfx.components.DataAccessor,
tf_transform_output: tft.TFTransformOutput,
batch_size: int) -> tf.data.Dataset:
"""
Generates features and label for tuning/training.
Args:
file_pattern: List of paths or patterns of input tfrecord files.
data_accessor: DataAccessor for converting input to RecordBatch.
tf_transform_output: A TFTransformOutput.
batch_size: representing the number of consecutive elements of returned
dataset to combine in a single batch
Returns:
A dataset that contains features where features is a
dictionary of Tensors.
"""
dataset = data_accessor.tf_dataset_factory(
file_pattern,
tfxio.TensorFlowDatasetOptions(batch_size = batch_size),
tf_transform_output.transformed_metadata.schema
)
transform_layer = tf_transform_output.transform_features_layer()
def apply_transform(raw_features):
return _apply_preprocessing(raw_features, transform_layer)
return dataset.map(apply_transform).repeat()
This differs from the _input_fn example given above as I was following the example in the next tfx tutorial found here: https://www.tensorflow.org/tfx/tutorials/tfx/penguin_tft#run_fn
Also for reference, there is no Target within the example data so there is no label_key to be passed to the tfxio.TensorFlowDatasetOptions object.
Error
When trying to run the Trainer component using a TFX InteractiveContext object I receive the following error.
ValueError: No gradients provided for any variable: ['dense_460/kernel:0', 'dense_460/bias:0', 'dense_461/kernel:0', 'dense_461/bias:0', 'dense_462/kernel:0', 'dense_462/bias:0', 'dense_463/kernel:0', 'dense_463/bias:0', 'dense_464/kernel:0', 'dense_464/bias:0', 'dense_465/kernel:0', 'dense_465/bias:0'].
From my own attempts to solve this I believe the problem lies in the way that an Autoencoder is trained. From the Autoencoder example linked here https://www.tensorflow.org/tutorials/generative/autoencoder the data is fitted like so:
autoencoder.fit(x_train, x_train,
epochs=10,
shuffle=True,
validation_data=(x_test, x_test))
therefore it stands to reason that the tf.Dataset should also mimic this behaviour and when testing with plain Tensor objects I have been able to recreate the error above and then solve it when adding the target to be the same as the training data in the .fit() function.
Things I've Tried So Far
Duplicating Train Dataset
model.fit(
train_dataset,
train_dataset,
steps_per_epoch = fn_args.train_steps,
validation_data = eval_dataset,
validation_steps = fn_args.eval_steps
)
Raises error due to Keras not accepting a 'y' value when a dataset is passed.
ValueError: `y` argument is not supported when using dataset as input.
Returning a dataset that is a tuple with itself
def _input_fn(...
dataset = data_accessor.tf_dataset_factory(
file_pattern,
tfxio.TensorFlowDatasetOptions(batch_size = batch_size),
tf_transform_output.transformed_metadata.schema
)
transform_layer = tf_transform_output.transform_features_layer()
def apply_transform(raw_features):
return _apply_preprocessing(raw_features, transform_layer)
dataset = dataset.map(apply_transform)
return dataset.map(lambda x: (x, x))
This raises an error where the keys from the features dictionary don't match the output of the model.
ValueError: Found unexpected keys that do not correspond to any Model output: dict_keys(['feature_string', ...]). Expected: ['dense_477']
At this point I switched to using the keras.model Autoencoder subclass and tried to add output keys to the Model using an output which I tried to create dynamically in the same way as the inputs.
def call(self, x):
inputs = [keras.layers.Input(shape = (1,), name = f) for f in x]
dense = keras.layers.concatenate(inputs)
encoded = self.encoder(dense)
decoded = self.decoder(encoded)
outputs = {}
for feature_name in x:
outputs[feature_name] = keras.layers.Dense(1, activation = 'sigmoid')(decoded)
return outputs
This raises the following error:
TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.
I've been looking into solving this issue but am no longer sure if the data is being passed correctly and am beginning to think I'm getting side-tracked from the actual problem.
Questions
Has anyone managed to get an Autoencoder working when connected via TFX examples?
Did you alter the tf.Dataset or handled the examples in a different way to the _input_fn demonstrated?
So I managed to find an answer to this and wanted to leave what I found here in case anyone else stumbles onto a similar problem.
It turns out my feelings around the error were correct and the solution did indeed lie in how the tf.Dataset object was presented.
This can be demonstrated when I ran some code which simulated the incoming data using randomly generated tensors.
tensors = [tf.random.uniform(shape = (1, 82)) for i in range(739)]
# This gives us a list of 739 tensors which hold 1 value for 82 'features' simulating the dataset I had
dataset = tf.data.Dataset.from_tensor_slices(tensors)
dataset = dataset.map(lambda x : (x, x))
# This returns a dataset which marks the training set and target as the same
# which is what the Autoecnoder model is looking for
model.fit(dataset ...)
Following this I proceeded to do the same thing with the dataset returned by the _input_fn. Given that the tfx DataAccessor object returns a features_dict however I needed to combine the tensors in that dict together to create a single tensor.
This is how my _input_fn looks now:
def create_target_values(features_dict: Dict[str, tf.Tensor]) -> tuple:
value_tensor = tf.concat(list(features_dict.values()), axis = 1)
return (features_dict, value_tensor)
def _input_fn(
file_pattern,
data_accessor: tfx.components.DataAccessor,
tf_transform_output: tft.TFTransformOutput,
batch_size: int) -> tf.data.Dataset:
"""
Generates features and label for tuning/training.
Args:
file_pattern: List of paths or patterns of input tfrecord files.
data_accessor: DataAccessor for converting input to RecordBatch.
tf_transform_output: A TFTransformOutput.
batch_size: representing the number of consecutive elements of returned
dataset to combine in a single batch
Returns:
A dataset that contains (features, target_tensor) tuple where features is a
dictionary of Tensors, and target_tensor is a single Tensor that is a concatenated tensor of all the
feature values.
"""
dataset = data_accessor.tf_dataset_factory(
file_pattern,
tfxio.TensorFlowDatasetOptions(batch_size = batch_size),
tf_transform_output.transformed_metadata.schema
)
dataset = dataset.map(lambda x: create_target_values(features_dict = x))
return dataset.repeat()

evaluating two inputs and one output model tensorflow

I am trying to evaluate a model with 2 inputs and 1 output, each input goes to separate pretrained model and then the output from both the models get averaged. I am using the same data for both the inputs.
test_dir = 'D:\Graduation_project\Damage type not collected'
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,)
test_set = test_datagen.flow_from_directory(test_dir,
class_mode = 'categorical',
batch_size = 16,
target_size=(150,150))
test_set1 = test_datagen.flow_from_directory(test_dir,
class_mode = 'categorical',
batch_size = 16,
target_size=(150,150))
loading first model and renaming the layers
def load_dense_model():
densenet = tf.keras.models.load_model('D:\Graduation_project\saved models\damage_type_model.h5', compile=False)
for i, layer in enumerate(densenet.layers):
layer._name = 'Densenet_layer' + str(i)
return densenet
loading second model
def load_vgg19_model():
vgg19 = tf.keras.models.load_model('D:\Graduation_project\saved models\damage_type_VGG19.h5', compile=False)
return vgg19
creating ensemble model
def ensamble_model(first_model, second_model):
densenet = first_model()
vgg19 = second_model()
output_1 = densenet.get_layer('Densenet_layer613')
output_2 = vgg19.get_layer('dense_4')
avg = tf.keras.layers.Average()([output_1.output, output_2.output])
model = Model(inputs=[densenet.input, vgg19.input], outputs=avg)
return model
METRICS = [
'accuracy',
tf.metrics.TruePositives(name='tp'),
tf.metrics.FalsePositives(name='fp'),
tf.metrics.TrueNegatives(name='tn'),
tf.metrics.FalseNegatives(name='fn'),
tf.metrics.Precision(name='precision'),
tf.metrics.Recall(name='recall'),
tfa.metrics.F1Score(name='F1_Score', num_classes=5),
tfa.metrics.MultiLabelConfusionMatrix(num_classes=5)
]
model = ensamble_model(load_dense_model, load_vgg19_model)
compiling and evaluating the model
model.compile(optimizer = 'adam' , loss ='binary_crossentropy',
metrics = 'accuracy')
model.evaluate({'Densenet_layer0':test_set1, 'input_2':test_set})
evaluate() fails to run
ValueError: Failed to find data adapter that can handle input: (<class 'dict'> containing {"<class 'str'>"} keys and {"<class 'tensorflow.python.keras.preprocessing.image.DirectoryIterator'>"} values), <class 'NoneType'>
My guess is that your model complaining because you are feeding a dict/list of iterators that yield an image each, instead of feeding an iterator that yields the image twice (once for each model).
What would happen if you wrap your DirectoryIterator on a generator that can feed the data correctly?
def gen_itertest(test_dir):
test_set = test_datagen.flow_from_directory(test_dir,
class_mode = 'categorical',
batch_size = 16,
target_size=(150,150))
for i in range(len(test_set)):
x = test_set.next()
yield [x[0], x[0]], x[1] # Twice the input, only once the label
and then you can feed this to the evaluate
testset = gen_itertest('D:\Graduation_project\Damage type not collected')
result = model.evaluate(testset)
I am not sure this will work but because you haven't provide us with a minimal, reproducible example, I am not going to do one to test it.
Try calling the evaluate() like this:
result = model.evaluate(x=[test_set1, test_set])
Then you could get the name of the metrics doing something like this:
dict(zip(model.metrics_names, result))

tf.keras.backend.function for transforming embeddings inside tf.data.dataset

I am trying to use the output of a neural network to transform data inside tf.data.dataset. Specifically, I am using a Delta-Encoder to manipulate embeddings inside the tf.data pipeline. In so doing, however, I get the following error:
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
I have searched the dataset pipeline page and stack overflow, but I could not find something that addresses my question. In the code below I am using an Autoencoder, as it yields an identical error with more concise code.
The offending part seems to be
[[x,]] = tf.py_function(Auto_Func, [x], [tf.float32])
inside
tf_auto_transform.
num_embeddings = 100
input_dims = 1000
embeddings = np.random.normal(size = (num_embeddings, input_dims)).astype(np.float32)
target = np.zeros(num_embeddings)
#creating Autoencoder
inp = Input(shape = (input_dims,), name ='input')
hidden = Dense(10, activation = 'relu', name = 'hidden')(inp)
out = Dense(input_dims, activation = 'relu', name='output')(hidden)
auto_encoder = tf.keras.models.Model(inputs =inp, outputs=out)
Auto_Func = tf.keras.backend.function(inputs = Autoencoder.get_layer(name='input').input,
outputs = Autoencoder.get_layer(name='output').input )
#Autoencoder transform for dataset.map
def tf_auto_transform(x, target):
x_shape = x.shape
##tf.function
#def func(x):
# return tf.py_function(Auto_Func, [x], [tf.float32])
#[[x,]] = func(x)
[[x,]] = tf.py_function(Auto_Func, [x], [tf.float32])
x.set_shape(x_shape)
return x, target
def get_dataset(X,y, batch_size = 32):
train_ds = tf.data.Dataset.from_tensor_slices((X, y))
train_ds = train_ds.map(tf_auto_transform)
train_ds = train_ds.batch(batch_size)
return train_ds
dataset = get_dataset(embeddings, target, 2)
The above code yields the following error:
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
I tried to eliminate the error by running the commented out section of the tf_auto_transform function, but the error persisted.
SideNote: While it is true that the Delta encoder paper has code, it is written in tf 1.x. I am trying to use tf 2.x with the tf functional API instead. Thank you for your help!
At the risk of outing myself as a n00b, the answer is to switch the order of the map and batch functions. I am trying to apply a neural network to make some changes on data. tf.keras models take batches as input, not individual samples. By batching the data first, I can run batches through my nn.
def get_dataset(X,y, batch_size = 32):
train_ds = tf.data.Dataset.from_tensor_slices((X, y))
#The changed order
train_ds = train_ds.batch(batch_size)
train_ds = train_ds.map(tf_auto_transform)**strong text**
return train_ds
It really is that simple.

Tensorflow Federated : Why my iterative Process unable to train rounds

I write a code with TFF from my own dataset, all the code run correctly except
this line
In train_data, I make 4 dataset, loaded with tf.data.Dataset, they have the type "DatasetV1Adapter"
def client_data(n):
ds = source.create_tf_dataset_for_client(source.client_ids[n])
return ds.repeat(10).map(map_fn).shuffle(500).batch(20)
federated_train_data = [client_data(n) for n in range(4)]
batch = tf.nest.map_structure(lambda x: x.numpy(), iter(train_data[0]).next())
def model_fn():
model = tf.keras.models.Sequential([
.........
return tff.learning.from_compiled_keras_model(model, batch)
all this run correctly and I get trainer and state:
trainer = tff.learning.build_federated_averaging_process(model_fn)
Except, When I would to begin training and round with this code:
state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))
I can't. error comes! So, from where can be the error? from type of dataset? or the way that I make my data federated?
As verified in the comments above, adding a take(N) call for some finite integer N in the client_data function should solve this problem. The issue is that TFF will reduce over all the elements in the dataset you pass it. If you have an infinite dataset, this means "keep running the reduction forever". N here should represent "how much data an individual client has", and can really be anything of your choosing.
here is my code, I use Tensorflow v2.1.0 and tff 0.12.0
img_height = 200
img_width = 200
num_classes = 2
batch_size = 10
input_shape = (img_height, img_width, 3)
img_gen = tf.keras.preprocessing.image.ImageDataGenerator()
gen0 = img_gen.flow_from_directory(par1_train_data_dir,(200, 200),'rgb', batch_size=10)
ds_par1 = tf.data.Dataset.from_generator(gen
output_types=(tf.float32, tf.float32),
output_shapes=([None,img_height,img_width,3], [None,num_classes])
)
ds_par2 = tf.data.Dataset.from_generator(gen0
output_types=(tf.float32, tf.float32),
output_shapes=([None,img_height,img_width,3], [None,num_classes])
)
dataset_dict={}
dataset_dict['1'] = ds_par1
dataset_dict['2'] = ds_par2
def create_tf_dataset_for_client_fn(client_id):
return dataset_dict[client_id]
source = tff.simulation.ClientData.from_clients_and_fn(['1','2'],create_tf_dataset_for_client_fn)
def client_data(n):
ds = source.create_tf_dataset_for_client(source.client_ids[n])
return ds
train_data = [client_data(n) for n in range(1)]
images, labels = next(img_gen.flow_from_directory(par1_train_data_dir,batch_size=batch_size,target_size=(img_height,img_width)))
sample_batch = (images,labels)
def create_compiled_keras_model():
.....
def model_fn():
keras_model = create_compiled_keras_model()
return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
state, metrics = iterative_process.next(state, train_data)
print('round 1, metrics={}'.format(round_num, metrics))

keras split Input for multiple "mini-units"

Trying to implement this article.
Edit1: found one error, my output size is 10 and not 1. (one number per each sentence, there are 10 sentences per document)
Edit2: I got another error that involves the batch size. when I make it 10 the model trains (!!!!). but I think it's not the right way... the error I had given batch size 3 is
Edit 3 Solved!! stuff with sizes + the fact the BIDIRECTIONAL returns different stuff from LSTM so I need to concat myself. Will put right code in answer.
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Incompatible shapes: [10] vs. [3]
[[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
[[metrics_8/acc/Mean_1/_5481]]
(1) Invalid argument: Incompatible shapes: [10] vs. [3]
[[{{node training_5/Adam/gradients/loss_8/dense_61_loss/mul_grad/BroadcastGradientArgs}}]]
0 successful operations.
0 derived errors ignored.
The target is extractive document summarization.
Link to colab with code
What they do is (you can see in the picture at page 3)
100 units of BI_LSTM + Attention for each sentence of the document.
Concat those and insert them to 1 BI_LSTM + Attention to get document embeddings.
Use document embeddings + hidden states from the LSTM to get some features.
Classify according to features
After a lot of struggle with keras low level api, I got a simple version to work.
What I did was to get the already sentence embedding and just do the last LSTM.
Or get words embedding of a sentence and make the small unit of sentence LSTM to work.
Now I am trying to put every thing together but can not get the sizes to fit.
My input size is
number_of_document * sentences_in_document * words_in_sentence * word_embedding
In the code I set those to be 20 * 10 * 50 * 100
(10 sentence in document is for everything to run faster for now..).
My output is
10 * 1 meaning for each sentence I get 1/0 if it's part of the document summary.
(I have not yet did the features extraction part, I just use another dense layer to give me probabilities instead..)
I think the problem is with this part of the code
X_doc = Lambda(lambda x: x[:,t, :, :])(X)
The code with sample data
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
import keras
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import tensorflow as tf
from keras import backend as K
num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10
model_output_size = 10
word_embeddings_size = 100
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400
x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)
# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []
for i in range(sentences_in_doc):
bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
return_sequences=False, name='bidirectional_' + str(i)), merge_mode='concat'))
densors_1.append(Dense(densor1_output_size, activation = "tanh"))
densors_2.append(Dense(densor2_output_size, activation = "softmax"))
def invoke_sentence(sentence_matrix, index):
if index==0:
print(type(sentence_matrix))
print(tf.shape(sentence_matrix))
Ys = bi_lstms[index](sentence_matrix)
attention_middle = densors_1[index](Ys)
output = densors_2[index](attention_middle)
if index==0:
print(f'Ys shape is {Ys.shape}')
print(f'attention_middle shape is {attention_middle.shape}')
print(f'output shape is {output.shape}')
return output
def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
"""
Arguments:
words_in_sentence -- Tx -- length of the input sequence - max words per sentence
sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
lstm_hidden_size -- hidden state size of the Bi-LSTM
word_embeddings_size -- size of the word embeddings
model_output_size -- size of each sentence label (1 or 0)
Returns:
model -- Keras model instance
"""
sentences_embeddings = []
X = Input(shape=(sentences_in_doc , words_in_sentence, word_embeddings_size), name= 'X')
for t in range(Ty):
X_doc = Lambda(lambda x: x[:,t, :, :])(X)
print(type(X_doc))
print(X_doc)
print(X_doc.shape)
sentences_embeddings.append(invoke_sentence(X_doc, t))
sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=0))(sentences_embeddings)
Ys = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc , lstm_hidden_size*2),
return_sequences=False, name='bidirectional_document'),
merge_mode='concat')(sentences_embeddings_stacked)
attention_middle = Dense(densor1_output_size, activation = "tanh")(Ys)
document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)
outputs = Dense(model_output_size, activation = "softmax")(document_embedding)
# compute_features(document_embeddings, sentences_embeddings, ys)
model = Model(inputs=X, outputs=outputs)
return model
model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=2, epochs=1)
# Sizes
num_of_training_examples = 20
words_in_sentence = 50 # max words per sentence
sentences_in_doc = 10
model_output_size = 10
word_embeddings_size = 100
lstm_hidden_size = 200
densor1_output_size = 400
densor2_output_size = 400
# Data
x_train = np.random.rand(num_of_training_examples, sentences_in_doc, words_in_sentence, word_embeddings_size)
y_train= np.random.randint(2, size=(num_of_training_examples, sentences_in_doc))
print(x_train.shape)
print(y_train.shape)
# Initialize arrays
inputs = []
bi_lstms = []
densors_1 =[]
densors_2 = []
for i in range(sentences_in_doc):
bi_lstms.append(Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(words_in_sentence, word_embeddings_size),
return_sequences=True, return_state=True, name='bidirectional_' + str(i))))
densors_1.append(Dense(densor1_output_size, activation = "tanh",name='senteence_dense_tanh' + str(i)))
densors_2.append(Dense(densor2_output_size, activation = "softmax",name='senteence_dense_softmax' + str(i)))
def invoke_sentence(sentence_matrix, index):
if index==0:
print(type(sentence_matrix))
print(tf.shape(sentence_matrix))
lstm, forward_h, forward_c, backward_h, backward_c = bi_lstms[index](sentence_matrix)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
attention_middle = densors_1[index](state_h)
output = densors_2[index](attention_middle)
if index==0:
print(f'lstm shape is {lstm.shape}')
print(f'state_h shape is {state_h.shape}')
print(f'state_c shape is {state_c.shape}')
print(f'attention_middle shape is {attention_middle.shape}')
print(f'output shape is {output.shape}')
return output
def model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size):
"""
Arguments:
words_in_sentence -- Tx -- length of the input sequence - max words per sentence
sentences_in_doc --Ty -- length of the output sequence - number of sentences per document
lstm_hidden_size -- hidden state size of the Bi-LSTM
word_embeddings_size -- size of the word embeddings
model_output_size -- size of each sentence label (1 or 0)
Returns:
model -- Keras model instance
"""
sentences_embeddings = []
X = Input(shape=(sentences_in_doc, words_in_sentence, word_embeddings_size), name= 'X')
for t in range(sentences_in_doc):
X_doc = Lambda(lambda x: x[:, t,:, :])(X)
if(t==0):
print("X_doc")
print(type(X_doc))
print(X_doc)
print(X_doc.shape)
sentence_embedding = invoke_sentence(X_doc, t)
sentences_embeddings.append(sentence_embedding)
if(t==0):
print("sentence_embedding")
print(type(sentence_embedding))
print(sentence_embedding)
print(sentence_embedding.shape)
sentences_embeddings_stacked = Lambda(lambda x: tf.stack(x, axis=1))(sentences_embeddings)
print("sentences_embeddings_stacked")
print(type(sentences_embeddings_stacked))
print(sentences_embeddings_stacked)
print(sentences_embeddings_stacked.shape)
doc_lstm, doc_forward_h, doc_forward_c, doc_backward_h, doc_backward_c = Bidirectional(LSTM(units = lstm_hidden_size, input_shape=(sentences_in_doc, lstm_hidden_size*2),
return_sequences=True, return_state=True, name='bidirectional_document'),
merge_mode='concat')(sentences_embeddings_stacked)
doc_state_h = Concatenate()([doc_forward_h, doc_backward_h])
doc_state_c = Concatenate()([doc_forward_c, doc_backward_c])
print(f'doc_lstm shape is {doc_lstm.shape}')
print(f'doc_state_h shape is {doc_state_h.shape}')
print(f'doc_state_c shape is {doc_state_c.shape}')
attention_middle = Dense(densor1_output_size, activation = "tanh")(doc_state_h)
document_embedding = Dense(densor2_output_size, activation = "softmax")(attention_middle)
print(f'document_embedding shape is {document_embedding.shape}')
# my_layer = MyLayer(input_shape=((400), (10,400), (10,400)), output_dim=2)
# custom_output = my_layer([document_embedding, sentences_embeddings_stacked, doc_state_h])
# print(f'custom_output shape is {custom_output.shape}')
outputs = Dense(model_output_size, activation = "softmax")(document_embedding)
model = Model(inputs=X, outputs=outputs)
return model
model = model(words_in_sentence, sentences_in_doc, lstm_hidden_size, word_embeddings_size, model_output_size)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x = x_train, y = y_train, batch_size=5, epochs=1)