I am trying to create hierarchical attention in TensorFlow 2.0 using the AdditiveAttention Keras layer. The error I get:
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("question_input:0", shape=(None, None), dtype=float32) at layer "question_input". The following previous layers were accessed without issue: []
Can someone please tell me what I am doing wrong?
def get_text_model(self, embedding):
print("Text Input")
text_input = Input(shape=(None,), name="text_input")
text_embedding = embedding(text_input)
cnn_1d = Conv1D(128, 4, padding="same", activation="relu", strides=1)(text_embedding)
output = cnn_1d
model = Model(text_input, output)
return model
def get_sentence_attention_model(self, sentence_input, encoded_question, sentence_model):
encoded_sentence = sentence_model(sentence_input)
sentence_attention = AdditiveAttention()([encoded_sentence, encoded_question])
output = Concatenate()([sentence_attention, encoded_question])
model = Model(sentence_input, output)
return model
def get_section_model(self, encoded_question, sentence_model):
section_input = Input(shape=(None, None), name="section_input")
section_encoded = TimeDistributed(sentence_model)([self.question_input, section_input])
cnn_1d = Conv1D(128, 4, padding="same", activation="relu", strides=1)(section_encoded)
output = cnn_1d
section_attention_output = AdditiveAttention()([output, encoded_question])
model = Model(section_input, section_attention_output)
return model
def get_document_model(self, encoded_question, section_model):
document_input = Input(shape=(None, None, None), name="document_input")
document_encoded = TimeDistributed(section_model)(document_input)
cnn_1d = Conv1D(128, 4, padding="same", activation="relu", strides=1)(document_encoded)
document_attention = AdditiveAttention()([cnn_1d, encoded_question])
model = Model(document_input, document_attention)
return model
def get_model(self):
self.vocabulary_size = self.vectorizer.get_vocabulary_size()
self.embedding_matrix = self.vectorizer.get_embedding_matrix()
embedding = Embedding(self.vocabulary_size, self.embedding_size, mask_zero=True, trainable=True,
weights=None if self.embedding_matrix is None else [self.embedding_matrix])
self.question_input = Input(shape=(None,), name="question_input")
self.sentence_input = Input(shape=(None,), name="sentence_input")
self.question_model = self.get_text_model(embedding)
self.sentence_model = self.get_text_model(embedding)
self.encoded_question = self.question_model(self.question_input)
self.sentence_attention_model = self.get_sentence_attention_model(self.sentence_input, self.encoded_question, self.sentence_model)
self.section_model = self.get_section_model(self.encoded_question, self.sentence_attention_model)
self.document_model = self.get_document_model(self.encoded_question, self.section_model)
optimizer = Adadelta()
loss_metrics = "binary_crossentropy"
self.document_model.compile(loss=loss_metrics, optimizer=optimizer, metrics=[loss_metrics])
self.document_model.summary()
Related
I have two functional Keras models in the same level (same input and output shape), one of them is pre-trained, I would like to combine them horizontally and then retrain the whole model. I mean I want to initialize the pretrained with its weights and the other one randomly. How can I horizontally combine them by adding them in branches (not concatenate)?
def define_model_a(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = Lambda(lambda x: abs(x))(path11)
output = Add()([path10, path12])
define_model_a = Model(inputs=input_layer, outputs=output)
define_model_a._name = 'model_a'
return define_model_a
def define_model_b(input_shape, initializer, outputs = 1):
input_layer = Input(shape=(input_shape))
# first path
path10 = input_layer
path11 = Conv1D(filters=1, kernel_size=3, strides=1, padding="same", use_bias = True, kernel_initializer=initializer)(path10)
path12 = ReLU()(path11)
path13 = Dense(1, use_bias = True)(path12)
output = path13
define_model_b = Model(inputs=input_layer, outputs=output)
define_model_b._name = 'model_b'
return define_model_b
def define_merge_interpretation()
????
????
output = Add()(model_a, model_b)
model = Model(inputs=input_layer, outputs=output)
return model
initializer = tf.keras.initializers.HeNormal()
model_a = define_model_a(input_shape, initializer, outputs = 1)
model_b = define_model_b(input_shape, initializer, outputs = 1)
model_a.load_weights(load_path)
merge_interpretation = def merge_interprettation( )
history = merge_interpretation.fit(......
As reference, I am looking for a final structure like this in the image, but with some pretrained branches.
Consider following model
class FractalNeuralNetwork(tf.keras.Model):
def __init__(self, class_number):
super(FractalNeuralNetwork, self).__init__()
self.box_counting_patches = [BoxCountingPatch(box_size) for box_size in range(3, 41 + 1, 2)]
self.chebyshev = ChebyshevBinaryPatch()
self.euclidean = EuclideanBinaryPatch()
self.manhattan = ManhattanBinaryPatch()
self.percolation_c = PercolationC()
self.percolation_m = PercolationM()
self.percolation_q = PercolationQ()
self.probability = ProbabilityMatrix()
self.fractal_dimension = FractalDimension()
self.lacunarity = Lacunarity()
self.assemble = AssembleFractalImage()
self.resize = tf.keras.layers.Resizing(width=224, height=224)
self.rescale = tf.keras.layers.Rescaling(scale=1./255)
self.mobilenet_v2 = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4",
output_shape=[1280],
trainable=False)
self.combine = tf.keras.layers.Add()
self.score = tf.keras.layers.Dense(class_number, activation='softmax')
def call(self, inputs):
inputs = tf.ensure_shape(inputs, self.ensure_input_shape)
box_counting_patches = [box_counting_patch(inputs) for box_counting_patch in self.box_counting_patches]
chebyshev = self.chebyshev(inputs=box_counting_patches)
euclidean = self.euclidean(inputs=box_counting_patches)
manhattan = self.manhattan(inputs=box_counting_patches)
percolation_c = self.percolation_c(inputs=[chebyshev, euclidean, manhattan])
percolation_m = self.percolation_m(inputs=[chebyshev, euclidean, manhattan])
percolation_q = self.percolation_q(inputs=[chebyshev, euclidean, manhattan])
probability = self.probability(inputs=[chebyshev, euclidean, manhattan])
fractal_dimension = self.fractal_dimension(inputs=probability)
lacunarity = self.lacunarity(inputs=probability)
fractal_output = self.assemble(
inputs=[
fractal_dimension,
lacunarity,
percolation_c,
percolation_m,
percolation_q
]
)
fractal_output = self.resize(fractal_output)
fractal_output = self.rescale(fractal_output)
fractal_output = self.mobilenet_v2(fractal_output)
original_output = self.rescale(inputs)
original_output = self.mobilenet_v2(original_output)
combined_output = self.combine([fractal_output, original_output])
output = self.score(combined_output)
return output
Every custom layer here is not trainable, they just perform calculations - extract fractal features from images.
The model is trained with the following code:
model = FractalNeuralNetwork(
class_number=CLASS_NUMBER
)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(
training_set,
validation_data=validation_set,
epochs=1
)
The first batch comes with a normal shape (None, 224, 224, 3) but the second (None, None, None, None) and it break my model. Why does it happen?
Here is part of a model I am working. Being new in calculation gradients in tf, I got confused when I found all the gradient values are coming as 0. Here is the code:
class A:
def __init__(self, inputA_dim, inputB_dim):
self.inputA_dim = (35, 35, 1)
self.inputB_dim = 2
self.model = self.nn_model()
self.opt = tf.keras.optimizers.Adam()
# print(self.model.summary())
def nn_model(self):
inputA = Input(self.inputA_dim)
conv1 = Conv2D(10, 3, padding="same", activation="relu")(inputA)
pool1 = MaxPool2D(padding='same')(conv1)
conv2 = Conv2D(10, 3, padding="same", activation="relu")(pool1)
pool2 = MaxPool2D(padding='same')(conv2)
conv3 = Conv2D(10, 3, padding="same", activation="relu")(pool2)
pool3 = MaxPool2D(padding='same')(conv3)
flatten = Flatten()(pool3)
s2 = Dense(32, activation="relu")(flatten)
s3 = Dense(32, activation="relu")(s2)
s4 = Dense(2, activation="relu")(s3)
inputB = Input((self.inputB_dim,))
a1 = Dense(2, activation="relu")(inputB)
c1 = concatenate([s2, a1], axis=-1)
c2 = Dense(4, activation="relu")(c1)
outputs = Dense(1, activation="linear")(c2)
return tf.keras.Model([inputA, inputB], outputs)
def predict(self, inputs):
return self.model.predict(inputs)
def gradients(self, inputA, inputB):
inputB = tf.convert_to_tensor(inputB)
with tf.GradientTape() as tape:
tape.watch(inputB)
values = self.model([inputA, inputB])
values = tf.squeeze(values)
g = tape.gradient(values, inputB)
print(g)
return g
Later I found there is another method called jacobian which I also used here, still giving 0.s asgrad values. Can anyone tell me what to do. Thanks.
I'm trying to run a classification simulation in tff, but I'm getting this error:
TypeError: Unable to interpret an argument of type tensorflow.python.data.ops.dataset_ops.PrefetchDataset as a TFF value.
Here is the code I'm using
client_lr = 1e-3
server_lr = 1e-1
NUM_ROUNDS = 200
NUM_EPOCHS = 5
BATCH_SIZE = 2048
EPOCHS = 400
TH = 0.5
def base_model():
return Sequential([
Dense(256, activation='relu', input_shape=(x_train.shape[-1],)),
Dropout(0.5),
Dense(256, activation='relu'),
Dropout(0.5),
Dense(256, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid'),
])
client_train_dataset = collections.OrderedDict()
for i in range(1, total_clients+1):
client_name = "client_" + str(i)
start = samples_per_set * (i-1)
end = samples_per_set * i
data = collections.OrderedDict((('y', y_train[start:end]), ('x', x_train[start:end])))
client_train_dataset[client_name] = data
train_dataset = tff.simulation.FromTensorSlicesClientData(client_train_dataset)
sample_dataset = train_dataset.create_tf_dataset_for_client(train_dataset.client_ids[0])
sample_element = next(iter(sample_dataset))
PREFETCH_BUFFER = 10
SHUFFLE_BUFFER = samples_per_set
def preprocess(dataset):
def batch_format_fn(element):
return collections.OrderedDict(
x=element['x'],
y=tf.reshape(element['y'], [-1, 1]))
return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)
preprocessed_sample_dataset = preprocess(sample_dataset)
sample_batch = tf.nest.map_structure(lambda x: x.numpy(), next(iter(preprocessed_sample_dataset)))
def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x)) for x in client_ids]
federated_train_data = make_federated_data(train_dataset, train_dataset.client_ids)
def model_tff():
model = base_model()
return tff.learning.from_keras_model(
model,
input_spec=preprocessed_sample_dataset.element_spec,
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[
tfa.metrics.F1Score(num_classes=1, threshold=TH),
keras.metrics.Precision(name="precision", thresholds=TH),
keras.metrics.Recall(name="recall", thresholds=TH)
])
iterative_process = tff.learning.build_federated_averaging_process(
model_tff,
client_optimizer_fn=lambda: optimizers.Adam(learning_rate=client_lr),
server_optimizer_fn=lambda: optimizers.SGD(learning_rate=server_lr))
state = iterative_process.initialize()
federated_model = None
for round_num in range(1, NUM_ROUNDS+1):
state, tff_metrics = iterative_process.next(state, federated_train_data) # THE ERROR IS HERE
federated_model = base_model()
federated_model.compile(optimizer=optimizers.Adam(learning_rate=client_lr),
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[
tfa.metrics.F1Score(num_classes=1, threshold=TH),
keras.metrics.Precision(name="precision", thresholds=TH),
keras.metrics.Recall(name="recall", thresholds=TH)
])
state.model.assign_weights_to(model=federated_model)
federated_result = federated_model.evaluate(x_val, y_val, verbose=1, return_dict=True)
federated_test = federated_model.evaluate(x_test, y_test, verbose=1, return_dict=True)
I'm using this creditcard dataset: https://www.kaggle.com/mlg-ulb/creditcardfraud
The federated_train_data is a list of <PrefetchDataset shapes: OrderedDict([(x, (None, 29)), (y, (None, 1))]), types: OrderedDict([(x, tf.float64), (y, tf.int64)])>, just like the tutorial from the Tensorflow Federated website Federated Learning for Image Classification.
This might be issue#918. Does this only occur when running in Google Colab? What version of TFF is being used?
Commit#4e57386 is believed to have fixed this, which is now part of the tensorflow-federated-nightly pip package.
I think I am setting up my batches wrong. If I run with the generated dataset it runs fine but with my own data I get an error.
If I take out the encoder (max pooling) and decoder (UpSampling2D) I don't get an error.
input size (304, 228, 1)
Generated: RUNS
import tensorflow as tf
from tensorflow.keras import layers
from natsort import natsorted
from tensorflow.keras.models import Model
BATCH_SIZE = 4
EPOCHS = 20
LEARNING_RATE = 1e-4
RESET_TRAINING = True
INPUT_CHANNELS = 1
OUTPUT_CHANNELS = 1
LOSS_TYPE = tf.keras.losses.SparseCategoricalCrossentropy()
img_size = (304, 228)
# configure cuda
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
class UnetModel(Model):
def __init__(self, *args, **kwargs):
super().__init__(UnetModel, *args, **kwargs)
# -- Encoder -- #
# Block encoder 1
input_shape = (img_size[0], img_size[1], 1)
# If you want to know more about why we are using `he_normal`:
# https://stats.stackexchange.com/questions/319323/whats-the-difference-between-variance-scaling-initializer-and-xavier-initialize/319849#319849
# Or the excellent fastai course:
# https://github.com/fastai/course-v3/blob/master/nbs/dl2/02b_initializing.ipynb
initializer = 'he_normal'
inputs = layers.Input(shape=input_shape)
print("input shape ", input_shape)
conv_enc_1 = layers.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer=initializer)(inputs)
conv_enc_1 = layers.Conv2D(64, 3, activation = 'relu', padding='same', kernel_initializer=initializer)(conv_enc_1)
# Block encoder 2
max_pool_enc_2 = layers.MaxPooling2D(pool_size=(2, 2))(conv_enc_1)
conv_enc_2 = layers.Conv2D(128, 5, activation = 'relu', padding = 'same', kernel_initializer = initializer)(max_pool_enc_2)
conv_enc_2 = layers.Conv2D(128, 5, activation = 'relu', padding = 'same', kernel_initializer = initializer)(conv_enc_2)
# Block decoder 1
up_dec_4 = layers.Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = initializer)(layers.UpSampling2D(size = (2,2))(conv_enc_2))
merge_dec_4 = layers.concatenate([conv_enc_1, up_dec_4], axis = 3)
conv_dec_4 = layers.Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = initializer)(merge_dec_4)
conv_dec_4 = layers.Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = initializer)(conv_dec_4)
conv_dec_4 = layers.Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = initializer)(conv_dec_4)
# -- Dencoder -- #
output = layers.Conv2D(1, 1, activation = 'softmax')(conv_dec_4)
self.model = tf.keras.Model(inputs = inputs, outputs = output)
def call(self, x):
return self.model(x)
model = UnetModel()
model.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss = LOSS_TYPE, metrics= [tf.keras.metrics.get('accuracy')])
dataset_debug = tf.data.Dataset.from_tensor_slices((tf.random.normal(shape = (BATCH_SIZE, img_size[0], img_size[1], 1)), tf.random.normal(shape = (BATCH_SIZE, img_size[0], img_size[1], 1)))).batch(BATCH_SIZE)
history = model.fit(dataset_debug, epochs=EPOCHS, shuffle=True)
Does NOT run
Here I am splitting the filenames into training and validation sets using train_test_split and reading in images in the parse_img_input function
# takes image filenames of uint8 and normalizes to 0-1 range
def parse_img_input(img_file, img_file_out):
print("img file ", img_file)
def _parse_input(img_file, img_file_out):
# get img image
d_filepath = img_file.numpy().decode()
d_image_decoded = tf.image.decode_jpeg(
tf.io.read_file(d_filepath), channels=1)
d_image = tf.cast(d_image_decoded, tf.float32) / 255.0
# get img image
d_filepath_out = img_file_out.numpy().decode()
d_image_decoded_out = tf.image.decode_jpeg(
tf.io.read_file(d_filepath_out), channels=1)
d_image_out = tf.cast(d_image_decoded_out, tf.float32) / 255.0
# add channel dimension
d_image = tf.expand_dims(d_image, -1)
d_image_out = tf.expand_dims(d_image_out, -1)
return d_image, d_image_out
return tf.py_function(_parse_input,
inp=[img_file, img_file_out],
Tout=[tf.float32, tf.float32])
# depth_files_in, depth_files_out are lists of filenames
# split input data into train, test sets
X_train_file, X_test_file, y_train_file, y_test_file = train_test_split(depth_files_in, depth_files_out,
test_size=0.2,
random_state=0)
dataset_train = tf.data.Dataset.from_tensor_slices((X_train_file, y_train_file))
dataset_train = dataset_train.map(parse_img_input)
dataset_test = tf.data.Dataset.from_tensor_slices((X_test_file, y_test_file))
dataset_test = dataset_test.map(parse_img_input)
history = model.fit(dataset_train, epochs=EPOCHS, shuffle=True, batch_size = BATCH_SIZE, validation_data= dataset_test)
F tensorflow/stream_executor/cuda/cuda_dnn.cc:535] Check failed: cudnnSetTensorNdDescriptor(handle_.get(), elem_type, nd, dims.data(), strides.data()) == CUDNN_STATUS_SUCCESS (3 vs. 0)batch_descriptor: {count: 228 feature_map_count: 64 spatial: 152 0 value_min: 0.000000 value_max: 0.000000 layout: BatchDepthYX}