Add a custom layer in tensorflow to preprocess text (lower, remove special characters, remove numbers, etc) - tensorflow

I have the following function that preprocess a sentence, and I would like to use it inside of my tensorflow model as a layer.
def preprocess(sentence):
sentence=str(sentence)
sentence = sentence.lower() #lowercase
sentence=sentence.replace('{html}',"") #delete html from sentences
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', sentence)
rem_url= re.sub(r'http\S+', '',cleantext) #remove webpages
rem_num = re.sub('[0-9]+', '', rem_url) #remove numbers
rem_email = re.sub(r'[A-Za-z0-9]*#[A-Za-z]*\.?[A-Za-z0-9]*', "", rem_num) #remove email
rem_accents = unidecode(rem_email, "utf-8") #rem accents
sentence_preprocessed = re.sub(r'[^\w\s]','',rem_accents) #rem special characters
return sentence_preprocessed
I have tried to use it as a lambda layer but the model is not learning anything.
tokenizer_layer = hub.KerasLayer(tfhub_handle_preprocess, name='tokenizer')
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocess_input = tf.keras.layers.Lambda(preprocess, name="lambda_layer")(text_input)
tokenized_inputs = tokenizer_layer(preprocess_input)
outputs = encoder(tokenized_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.2)(net)
net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)
return tf.keras.Model(text_input, net)
How could I change my code in order to make it work?

Related

How to concat laserembeddings with huggingface funnel transformers simple CLS output for NLP sequence classification task?

i was approaching NLP sequence classification problem (3 classes) using huggingface transformers (funnel-transformer/large) and tensorflow.
first i created laserembedding like this :
from laserembeddings import Laser
laser = Laser()
df = pd.read_csv("mycsv.csv")
embeds = laser.embed_sentences(df['text'].values, lang='en')
write_pickle_to_file('train.pkl', embeds )
part 1 : Tensorflow version
for data preparation i use code like below :
df['text']=temp['column1']+tokenizer.sep_token+temp['column2']+tokenizer.sep_token+temp['column3']
def encode_text(texts):
enc_di = tokenizer.batch_encode_plus(
texts,
padding='max_length',
truncation=True,
return_token_type_ids=True,
pad_to_max_length=True,
max_length=cfg.max_len
)
return [np.asarray(enc_di['input_ids'], dtype=np.int64),
np.asarray(enc_di['attention_mask'], dtype=np.int64),
np.asarray(enc_di['token_type_ids'], dtype=np.int64)]
then inside training function :
x_train = encode_text(df.text.to_list())
train_ds = (
tf.data.Dataset
.from_tensor_slices((
{
"input_ids": x_train[0],
"input_masks": x_train[1],
"input_segments": x_train[2],
"lasers": np.array( train[laser_columns].values, dtype=np.float32 ) #laser_columns contains all the laser embedded columns
},
tf.one_hot(df["label"].to_list(), 3) #3 class
))
.repeat()
.shuffle(2048)
.batch(cfg.batch_size)
.prefetch(AUTO)
)
i add laser embedding in my model like this :
def create_model():
transformer = transformers.TFAutoModel.from_pretrained(cfg.pretrained,config=config,from_pt=True)
max_len=512
# transformer
input_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
input_masks = Input(shape=(max_len,), dtype="int32", name="input_masks")
input_segments = Input(shape=(max_len,), dtype="int32", name="input_segments")
sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_segments)[0]
cls_token = sequence_output[:, 0, :]
# lasers
lasers = Input(shape=(n_lasers,), dtype=tf.float32, name="lasers") #n_lasers = 1024
lasers_output = tf.keras.layers.Dense(n_lasers, activation='tanh')(lasers)
x = tf.keras.layers.Concatenate()([cls_token, lasers_output])
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(2048, activation='tanh')(x)
x = tf.keras.layers.Dropout(0.1)(x)
out = tf.keras.layers.Dense(3, activation='softmax')(x)
model = Model(inputs=[input_ids, input_masks, input_segments, lasers], outputs=out)
model.compile(Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(), metrics=["acc", metrics.CategoricalCrossentropy(name='xentropy')])
return model
now my question is, how do we do the same with pytorch for exact same problem and same dataset?
part 2 : pytorch version
df = pd.read_csv("mytrain.csv")
class myDataset(Dataset):
def __init__(self,df, max_length, tokenizer, training=True):
self.df = df
self.max_len = max_length
self.tokenizer = tokenizer
self.column1 = self.df['column1'].values
self.column2 = self.df['column2'].values
self.column3= self.df['column3'].values
self.column4= self.df['column4'].values
self.training = training
if self.training:
self.targets = self.df['label'].values
def __len__(self):
return len(self.df)
def __getitem__(self, index):
column1 = self.column1[index]
column2= self.column2[index]
column3= self.column3[index]
text0 = self.column4[index]
text1 = column1 + ' ' + column2+ ' ' + column3
inputs = self.tokenizer.encode_plus(
text1 ,
text0 ,
truncation = True,
add_special_tokens = True,
return_token_type_ids = True,
is_split_into_words=False,
max_length = self.max_len
)
samples = {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
}
if 'token_type_ids' in inputs:
samples['token_type_ids'] = inputs['token_type_ids']
if self.training:
samples['target'] = self.targets[index]
return samples
collate_fn = DataCollatorWithPadding(tokenizer=CONFIG['tokenizer'])
class myModel(nn.Module):
def __init__(self, model_name):
super(myModel, self).__init__()
self.model = AutoModel.from_pretrained(model_name)
if(True):
print("using gradient_checkpoint...")
self.model.gradient_checkpointing_enable()
self.config = AutoConfig.from_pretrained(model_name)
self.config.update(
{
"output_hidden_states": True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7,
"add_pooling_layer": False,
"attention_probs_dropout_prob":0.0,
}
)
self.fc = nn.Linear(self.config.hidden_size, 3)
def forward(self, ids, mask):
out = self.model(input_ids=ids,attention_mask=mask,output_hidden_states=False)
out = out[0][:, 0, :]
outputs = self.fc(out)
return outputs
and in train and validation loop i have code like this :
bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
ids = data['input_ids'].to(device, dtype = torch.long)
mask = data['attention_mask'].to(device, dtype = torch.long)
targets = data['target'].to(device, dtype=torch.long)
batch_size = ids.size(0)
optimizer.zero_grad()
# forward pass with `autocast` context manager
with autocast(enabled=True):
outputs = model(ids, mask)
loss = loss_fct(outputs, targets)
i would like to know where and how in my huggingface pytorch pipeline i can use the laserembedding that i created earlier and used in tensorflow huggingface model?
i would like to concat laserembeddings with funnel transformer's simple CLS token output and train the transformers model with laser embed as extra feature in pytorch implementation exactly like i did in tensorflow example,do you know how to modify my pytorch code to make it working in pytorch? the tensorflow implementation with laserembedding concatenated above that i have posted here works good,i just wanted to do the same in pytorch implementation,,your help is highly appreciated,thanks in advance

How to build a custom question-answering head when using hugginface transformers?

Using the TFBertForQuestionAnswering.from_pretrained() function, we get a predefined head on top of BERT together with a loss function that are suitable for this task.
My question is how to create a custom head without relying on TFAutoModelForQuestionAnswering.from_pretrained().
I want to do this because there is no place where the architecture of the head is explained clearly. By reading the code here we can see the architecture they are using, but I can't be sure I understand their code 100%.
Starting from How to Fine-tune HuggingFace BERT model for Text Classification is good. However, it covers only the classification task, which is much simpler.
'start_positions' and 'end_positions' are created following this tutorial.
So far, I've got the following:
train_dataset
# Dataset({
# features: ['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'],
# num_rows: 99205
# })
train_dataset.set_format(type='tensorflow', columns=['input_ids', 'token_type_ids', 'attention_mask'])
features = {x: train_dataset[x] for x in ['input_ids', 'token_type_ids', 'attention_mask']}
labels = [train_dataset[x] for x in ['start_positions', 'end_positions']]
labels = np.array(labels).T
tfdataset = tf.data.Dataset.from_tensor_slices((features, labels)).batch(16)
input_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='input_ids')
token_type_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='token_type_ids')
attention_mask = tf.keras.layers.Input((256,), dtype=tf.int32, name='attention_mask')
bert = TFAutoModel.from_pretrained("bert-base-multilingual-cased")
output = bert([input_ids, token_type_ids, attention_mask]).last_hidden_state
output = tf.keras.layers.Dense(2, name="qa_outputs")(output)
model = tf.keras.models.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=output)
num_train_epochs = 3
num_train_steps = len(tfdataset) * num_train_epochs
optimizer, schedule = create_optimizer(
init_lr=2e-5,
num_warmup_steps=0,
num_train_steps=num_train_steps,
weight_decay_rate=0.01
)
def qa_loss(labels, logits):
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction=tf.keras.losses.Reduction.NONE
)
start_loss = loss_fn(labels[0], logits[0])
end_loss = loss_fn(labels[1], logits[1])
return (start_loss + end_loss) / 2.0
model.compile(
loss=loss_fn,
optimizer=optimizer
)
model.fit(tfdataset, epochs=num_train_epochs)
And I am getting the following error:
ValueError: `labels.shape` must equal `logits.shape` except for the last dimension. Received: labels.shape=(2,) and logits.shape=(256, 2)
It is complaining about the shape of the labels. This should not happen since I am using SparseCategoricalCrossentropy loss.
For future reference, I actually found a solution, which is just editing the TFBertForQuestionAnswering class itself. For example, I added an additional layer in the following code and trained the model as usual and it worked.
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
from transformers.modeling_tf_utils import TFQuestionAnsweringLoss, get_initializer, input_processing
from transformers.modeling_tf_outputs import TFQuestionAnsweringModelOutput
from transformers import BertConfig
class MY_TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss):
# names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
_keys_to_ignore_on_load_unexpected = [
r"pooler",
r"mlm___cls",
r"nsp___cls",
r"cls.predictions",
r"cls.seq_relationship",
]
def __init__(self, config: BertConfig, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert")
# This is the dense layer I added
self.my_dense = tf.keras.layers.Dense(
units=config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
name="my_dense",
)
self.qa_outputs = tf.keras.layers.Dense(
units=config.num_labels,
kernel_initializer=get_initializer(config.initializer_range),
name="qa_outputs",
)
def call(
self,
input_ids = None,
attention_mask = None,
token_type_ids = None,
position_ids = None,
head_mask = None,
inputs_embeds = None,
output_attentions = None,
output_hidden_states = None,
return_dict = None,
start_positions = None,
end_positions= None,
training = False,
**kwargs,
):
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
end_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the end of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
start_positions=start_positions,
end_positions=end_positions,
training=training,
kwargs_call=kwargs,
)
outputs = self.bert(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs["token_type_ids"],
position_ids=inputs["position_ids"],
head_mask=inputs["head_mask"],
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
# You also have to add it here
my_logits = self.my_dense(inputs=sequence_output)
logits = self.qa_outputs(inputs=my_logits)
start_logits, end_logits = tf.split(value=logits, num_or_size_splits=2, axis=-1)
start_logits = tf.squeeze(input=start_logits, axis=-1)
end_logits = tf.squeeze(input=end_logits, axis=-1)
loss = None
if inputs["start_positions"] is not None and inputs["end_positions"] is not None:
labels = {"start_position": inputs["start_positions"]}
labels["end_position"] = inputs["end_positions"]
loss = self.hf_compute_loss(labels=labels, logits=(start_logits, end_logits))
if not inputs["return_dict"]:
output = (start_logits, end_logits) + outputs[2:]
return ((loss,) + output) if loss is not None else output
return TFQuestionAnsweringModelOutput(
loss=loss,
start_logits=start_logits,
end_logits=end_logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns
)

In tensorflow, for custom layers that need arguments at instantialion, does the get_config method need overriding?

Ubuntu - 20.04,
Tensorflow - 2.2.0,
Tensorboard - 2.2.1
I have read that one needs to reimplement the config method in order for a custom layer to be serializable.
I have a custom layer that accepts arguments in its __init__. It uses another custom layer and that consumes arguments in its __init__ as well. I can:
Without Tensorboard callbacks:
Use them in a model both in eager model and graph form
Run tf.saved_model.save and it executes without a glich
Load the thus saved model using tf.saved_model.load and it loads the model saved in 2. above
I can call model(input) the loaded model. I can also call 'call_and_return_all_conditional_losses(input)` and they run right as well
With Tensorboard callbacks:
All of the above (can .fit, save, load, predict from loaded etc) except.. While running fit i get
WARNING:tensorflow:Model failed to serialize as JSON. Ignoring... Layer PREPROCESS_MONSOON has arguments in `__init__` and therefore must override `get_config`.
Pasting the entire code here that can be run end to end. You just need to have tensorflow 2 installed. Please delete/add the callbacks (only tensorboard callbacks is there) to .fit to see the two behaviors mentioned above
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers as l
from tensorflow import keras as k
import numpy as np
##making empty directories
import os
os.makedirs('r_data',exist_ok=True)
os.makedirs('r_savedir',exist_ok=True)
#Preparing the dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train_ = pd.DataFrame(x_train.reshape(60000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_test_ = pd.DataFrame(x_test.reshape(10000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_train_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_[np.random.choice([True,False],size = x_train_.shape,p=[0.05,0.95]).reshape(x_train_.shape)] = np.nan
x_test_[np.random.choice([True,False],size = x_test_.shape,p=[0.05,0.95]).reshape(x_test_.shape)] = np.nan
x_train_.to_csv('r_data/x_train.csv',index=False)
x_test_.to_csv('r_data/x_test.csv',index=False)
pd.DataFrame(y_train).to_csv('r_data/y_train.csv',index=False)
pd.DataFrame(y_test).to_csv('r_data/y_test.csv',index=False)
#**THE MAIN LAYER THAT WE ARE TALKING ABOUT**
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import feature_column
import os
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'keys': self.keys,
# 'keys_all': self.keys_all,
# })
# return config
def build(self,input_shape):
def create_moving_mean_vars():
return tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
self.moving_means_total = {t:create_moving_mean_vars() for t in self.keys}
self.layer_global_counter = tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
def call(self,inputs, training = True):
null_cols = {k:tf.math.is_finite(inputs[k]) for k in self.keys}
current_means = {}
def compute_update_current_means(t):
current_mean = tf.math.divide_no_nan(tf.reduce_sum(tf.where(null_cols[t],inputs[t],0.),axis=0),\
tf.reduce_sum(tf.cast(tf.math.is_finite(inputs[t]),tf.float32),axis=0))
self.moving_means_total[t].assign_add(current_mean)
return current_mean
if training:
current_means = {t:compute_update_current_means(t) for t in self.keys}
outputs = {t:tf.where(null_cols[t],inputs[t],current_means[t]) for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
self.layer_global_counter.assign_add(1.)
else:
outputs = {t:tf.where(null_cols[t],inputs[t],(self.moving_means_total[t]/self.layer_global_counter))\
for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
return outputs
class PREPROCESS_MONSOON(layers.Layer):
def __init__(self,cat_cols_with_unique_values,num_cols):
'''cat_cols_with_unqiue_values: (dict) {'col_cat':[unique_values_list]}
num_cols: (list) [num_cols_name_list]'''
super().__init__()
self.cat_cols = cat_cols_with_unique_values
self.num_cols = num_cols
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'cat_cols': self.cat_cols,
# 'num_cols': self.num_cols,
# })
# return config
def build(self,input_shape):
self.ntd = NUM_TO_DENSE(self.num_cols)
self.num_colnames = self.ntd.keys_all
self.ctd = {k:layers.DenseFeatures\
(feature_column.embedding_column\
(feature_column.categorical_column_with_vocabulary_list\
(k,v),tf.cast(tf.math.ceil(tf.math.log(tf.cast(len(self.cat_cols[k]),tf.float32))),tf.int32).numpy()))\
for k,v in self.cat_cols.items()}
self.cat_colnames = [i for i in self.cat_cols]
self.dense_colnames = self.num_colnames+self.cat_colnames
def call(self,inputs,training=True):
dense_num_d = self.ntd(inputs,training=training)
dense_cat_d = {k:self.ctd[k](inputs) for k in self.cat_colnames}
dense_num = tf.stack([dense_num_d[k] for k in self.num_colnames],axis=1)
dense_cat = tf.concat([dense_cat_d[k] for k in self.cat_colnames],axis=1)
dense_all = tf.concat([dense_num,dense_cat],axis=1)
return dense_all
##Inputs
label_path = 'r_data/y_train.csv'
data_path = 'r_data/x_train.csv'
max_epochs = 100
batch_size = 32
shuffle_seed = 42
##Creating layer inputs
dfs = pd.read_csv(data_path,nrows=1)
cdtypes_x = dfs.dtypes
nc = list(dfs.select_dtypes(include=[int,float]).columns)
oc = list(dfs.select_dtypes(exclude=[int,float]).columns)
cdtypes_y = pd.read_csv(label_path,nrows=1).dtypes
dfc = pd.read_csv(data_path,usecols=oc)
ccwuv = {i:list(pd.Series(dfc[i].unique()).dropna()) for i in dfc.columns}
preds_name = pd.read_csv(label_path,nrows=1).columns
##creating datasets
dataset = tf.data.experimental.make_csv_dataset(
'r_data/x_train.csv',batch_size, column_names=cdtypes_x.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
labels = tf.data.experimental.make_csv_dataset(
'r_data/y_train.csv',batch_size, column_names=cdtypes_y.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
dataset = tf.data.Dataset.zip((dataset,labels))
##CREATING NETWORK
p = PREPROCESS_MONSOON(cat_cols_with_unique_values=ccwuv,num_cols=nc)
indict = {}
for i in nc:
indict[i] = k.Input(shape = (), name=i,dtype=tf.float32)
for i in ccwuv:
indict[i] = k.Input(shape=(), name=i,dtype=tf.string)
x = p(indict)
x = l.BatchNormalization()(x)
x = l.Dense(10,activation='relu',name='dense_1')(x)
predictions = l.Dense(10,activation=None,name=preds_name[0])(x)
model = k.Model(inputs=indict,outputs=predictions)
##Compiling model
model.compile(optimizer=k.optimizers.Adam(),
loss=k.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
##callbacks
log_dir = './tensorboard_dir/no_config'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
## Fit model on training data
history = model.fit(dataset,
batch_size=64,
epochs=30,
steps_per_epoch=5,
validation_split=0.,
callbacks = [tensorboard_callback])
#saving the model
tf.saved_model.save(model,'r_savedir')
#loading the model
model = tf.saved_model.load('r_savedir')
##Predicting on loaded model
for i in dataset:
print(model(i[0],training=False))
break
I have commented out the part from the code where i override the config files in my custom layers and you can comment them in and the Warning about the layers not being serializable would go away.
Question:
Do i or do i not need to override the config method in order to make a custom layer that accepts arguments in __init__ serializable?
Thank you in advance for help
You must add 'get_config' to your code
def get_config(self):
config = super().get_config()
return config
The NUM_TO_DENSE class must be like this
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
def get_config(self):
config = super().get_config()
return config

Expected to see 3 array(s), but instead got the following list of 1 arrays:

I am trying to train a triple loss model using a fit_generator. it requires three input and no output. so i have a function that generates hard triplets. the output from the triplets generator has a shape of (3,5,279) which is 3 inputs(anchor,positive and negative) for 5 batches and a total of 279 features. When i run the fit_generator it throws this error that "the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 3 array(s), but instead got the following list of 1 arrays" meanwhile i have passed a list of three arrays. the code is below. it works when i use the fit, however, i want to always call the generator function to generate my triplets as my batches. thanks in advance..this has taken me three days
def load_data():
path = "arrhythmia_data.txt"
f = open( path, "r")
data = []
#remove line breaker, comma separate and store in array
for line in f:
line = line.replace('\n','').replace('?','0')
line = line.split(",")
data.append(line)
f.close()
data = np.array(data).astype(np.float64)
#print(data.shape)
#create the class labels for input data
Y_train = data[:,-1:]
train = data[:,:-1]
normaliser = preprocessing.MinMaxScaler()
train = normaliser.fit_transform(train)
val = train[320:,:]
train = train[:320,:]
#create one hot encoding of the class labels of the data and separate them into train and test data
lb = LabelBinarizer()
encode = lb.fit_transform(Y_train)
nb_classes = int(len(encode[0]))
#one_hot_labels = keras.utils.to_categorical(labels, num_classes=10) this could also be used for one hot encoding
Y_val_e = encode[320:,:]
Y_train_e = encode[:320,:]
print(Y_train_e[0])
print(np.argmax(Y_train_e[0]))
val_in = []
train_in = []
#grouping and sorting the input data based on label id or name
for n in range(nb_classes):
images_class_n = np.asarray([row for idx,row in enumerate(train) if np.argmax(Y_train_e[idx])==n])
train_in.append(images_class_n)
images_class_n = np.asarray([row for idx,row in enumerate(val) if np.argmax(Y_val_e[idx])==n])
val_in.append(images_class_n)
#print(train_in[0].shape)
return train_in,val_in,Y_train_e,Y_val_e,nb_classes
train_in,val,Y_train,Y_val,nb_classes = load_data()
input_shape = (train_in[0].shape[1],)
def build_network(input_shape , embeddingsize):
'''
Define the neural network to learn image similarity
Input :
input_shape : shape of input images
embeddingsize : vectorsize used to encode our picture
'''
#in_ = Input(train.shape)
net = Sequential()
net.add(Dense(128, activation='relu', input_shape=input_shape))
net.add(Dense(128, activation='relu'))
net.add(Dense(256, activation='relu'))
net.add(Dense(4096, activation='sigmoid'))
net.add(Dense(embeddingsize, activation= None))
#Force the encoding to live on the d-dimentional hypershpere
net.add(Lambda(lambda x: K.l2_normalize(x,axis=-1)))
return net
class TripletLossLayer(Layer):
def __init__(self, alpha, **kwargs):
self.alpha = alpha
super(TripletLossLayer, self).__init__(**kwargs)
def triplet_loss(self, inputs):
anchor, positive, negative = inputs
p_dist = K.sum(K.square(anchor-positive), axis=-1)
n_dist = K.sum(K.square(anchor-negative), axis=-1)
return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
def call(self, inputs):
loss = self.triplet_loss(inputs)
self.add_loss(loss)
return loss
def build_model(input_shape, network, margin=0.2):
'''
Define the Keras Model for training
Input :
input_shape : shape of input images
network : Neural network to train outputing embeddings
margin : minimal distance between Anchor-Positive and Anchor-Negative for the lossfunction (alpha)
'''
# Define the tensors for the three input images
anchor_input = Input(input_shape, name="anchor_input")
positive_input = Input(input_shape, name="positive_input")
negative_input = Input(input_shape, name="negative_input")
# Generate the encodings (feature vectors) for the three images
encoded_a = network(anchor_input)
encoded_p = network(positive_input)
encoded_n = network(negative_input)
#TripletLoss Layer
loss_layer = TripletLossLayer(alpha=margin,name='triplet_loss_layer')([encoded_a,encoded_p,encoded_n])
# Connect the inputs with the outputs
network_train = Model(inputs=[anchor_input,positive_input,negative_input],outputs=loss_layer)
# return the model
return network_train
def get_batch_random(batch_size,s="train"):
# initialize result
triplets=[np.zeros((batch_size,m)) for i in range(3)]
for i in range(batch_size):
#Pick one random class for anchor
anchor_class = np.random.randint(0, nb_classes)
nb_sample_available_for_class_AP = X[anchor_class].shape[0]
#Pick two different random pics for this class => A and P. You can use same anchor as P if there is one one element for anchor
if nb_sample_available_for_class_AP<=1:
continue
[idx_A,idx_P] = np.random.choice(nb_sample_available_for_class_AP,size=2 ,replace=False)
#Pick another class for N, different from anchor_class
negative_class = (anchor_class + np.random.randint(1,nb_classes)) % nb_classes
nb_sample_available_for_class_N = X[negative_class].shape[0]
#Pick a random pic for this negative class => N
idx_N = np.random.randint(0, nb_sample_available_for_class_N)
triplets[0][i,:] = X[anchor_class][idx_A,:]
triplets[1][i,:] = X[anchor_class][idx_P,:]
triplets[2][i,:] = X[negative_class][idx_N,:]
return np.array(triplets)
def get_batch_hard(draw_batch_size,hard_batchs_size,norm_batchs_size,network,s="train"):
if s == 'train':
X = train_in
else:
X = val
#m, features = X[0].shape
#while True:
#Step 1 : pick a random batch to study
studybatch = get_batch_random(draw_batch_size,X)
#Step 2 : compute the loss with current network : d(A,P)-d(A,N). The alpha parameter here is omited here since we want only to order them
studybatchloss = np.zeros((draw_batch_size))
#Compute embeddings for anchors, positive and negatives
A = network.predict(studybatch[0])
P = network.predict(studybatch[1])
N = network.predict(studybatch[2])
#Compute d(A,P)-d(A,N)
studybatchloss = np.sum(np.square(A-P),axis=1) - np.sum(np.square(A-N),axis=1)
#Sort by distance (high distance first) and take the
selection = np.argsort(studybatchloss)[::-1][:hard_batchs_size]
#Draw other random samples from the batch
selection2 = np.random.choice(np.delete(np.arange(draw_batch_size),selection),norm_batchs_size,replace=False)
selection = np.append(selection,selection2)
triplets = [studybatch[0][selection,:], studybatch[1][selection,:],studybatch[2][selection,:]]
triplets = triplets.reshape(triplets.shape[0],triplets.shape[1],triplets.shape[2])
yield triplets
network = build_network(input_shape,embeddingsize=10)
hard = get_batch_hard(5,4,1,network,s="train")
network_train = build_model(input_shape,network)
optimizer = Adam(lr = 0.00006)
network_train.compile(loss=None,optimizer=optimizer)
#this works
#history = network_train.fit(hard,epochs=100,steps_per_epoch=1, verbose=2)
history = network_train.fit_generator(hard,epochs=10,steps_per_epoch=16, verbose=2)
# error:: the list of Numpy arrays that you are passing to your model is not the size the model
expected. Expected to see 3 array(s), but instead got the following list of 1 arrays:
I think that's beacause in your generator you are yielding the 3 inputs array in one list, you need to yield the 3 arrays independently:
triplet_1 = studybatch[0][selection,:]
triplet_2 = studybatch[1][selection,:]
triplet_3 = studybatch[2][selection,:]
yield [triplet_1, triplet_2, triplet_3]

FailedPreconditionError: FailedPr...onError()

I have FailedPreconditionError when running sess.
My network has two different parts, pretrained-network and new add in Recognition network.
Pretrained model is used to extract features and the feature is used to train again for recognition.
In my code, pre-trained model is loaded first.
graph = tf.Graph()
with graph.as_default():
input_data, input_labels, input_boxes = input_train_data.input_fn()
input_boxes = tf.reshape(input_boxes,[input_boxes.shape[0]*2,-1])#convert from Nx8 to 2Nx4
# build model and loss
net = Net(input_data, is_training = False)
f_saver = tf.train.Saver(max_to_keep=1000, write_version=tf.train.SaverDef.V2, save_relative_paths=True)
sess_config = tf.ConfigProto(log_device_placement = False, allow_soft_placement = True)
if FLAGS.gpu_memory_fraction < 0:
sess_config.gpu_options.allow_growth = True
elif FLAGS.gpu_memory_fraction > 0:
sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction;
session = tf.Session(graph=graph, config=sess_config)
tf.logging.info('Initialize from: ' + config.train.init_checkpoint)
f_saver.restore(session, config.train.init_checkpoint)
f_saver restores the pre-trained model.
Then feature conv5_3 is extracted and fed into Recognition network.
conv5_3 = net.end_points['conv5_3']
with tf.variable_scope("Recognition"):
global_step_rec = tf.Variable(0, name='global_step_rec', trainable=False)
#Pass through recognition net
r_net = regnet.ConstructRecNet(conv5_3)
conv7_7 = r_net.end_points['pool7']
#implement ROI Pooling
#input boxes be in x1, y1, x2, y2
h_fmap = tf.dtypes.cast(tf.shape(conv7_7)[1],tf.float32)
w_fmap = tf.dtypes.cast(tf.shape(conv7_7)[2],tf.float32)
#remap boxes at input images to feature mats
#input_boxes = input_boxes / tf.constant([config.train.input_shape[0], config.train.input_shape[0],\
# config.train.input_shape[0], config.train.input_shape[0]], dtype=tf.float32)#Normalize with image size first
remap_boxes=tf.matmul(input_boxes,tf.diag([w_fmap,h_fmap,w_fmap,h_fmap]))
#put first column with image indexes
rows = tf.expand_dims(tf.range(remap_boxes.shape[0]), 1)/2
add_index = tf.concat([tf.cast(rows,tf.float32),remap_boxes],-1)
index = tf.not_equal(tf.reduce_sum(add_index[:,4:],axis=1),0)
remap_boxes = tf.gather_nd(add_index,tf.where(index))
remap_boxes=tf.dtypes.cast(remap_boxes,tf.int32)
prob = roi_pooling(conv7_7, remap_boxes, pool_height=1, pool_width=28)
#Get features for CTC training
prob = tf.transpose(prob, (1, 0, 2)) # prepare for CTC
data_length = tf.fill([tf.shape(prob)[1]], tf.shape(prob)[0]) # input seq length, batch size
ctc = tf.py_func(CTCUtils.compute_ctc_from_labels, [input_labels], [tf.int64, tf.int64, tf.int64])
ctc_labels = tf.to_int32(tf.SparseTensor(ctc[0], ctc[1], ctc[2]))
predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(prob, data_length, merge_repeated=False, beam_width=10)[0][0])
tf.sparse_tensor_to_dense(predictions, default_value=-1, name='d_predictions')
tf.reduce_mean(tf.edit_distance(predictions, ctc_labels, normalize=False), name='error_rate')
loss = tf.reduce_mean(tf.compat.v1.nn.ctc_loss(inputs=prob, labels=ctc_labels, sequence_length=data_length, ctc_merge_repeated=True), name='loss')
learning_rate = tf.train.piecewise_constant(global_step_rec, [150000, 200000],[config.train.learning_rate, 0.1 * config.train.learning_rate,0.01 * config.train.learning_rate])
opt_loss = tf.contrib.layers.optimize_loss(loss, global_step_rec, learning_rate, config.train.opt_type,config.train.grad_noise_scale, name='train_step')
tf.global_variables_initializer()
I can run sess till feature extraction conv5_3. But can't run those in Recognition and got error as FailedPreconditionError: FailedPr...onError(). What could be the problem?
graph.finalize()
with tf.variable_scope("Recognition"):
for i in range(config.train.steps):
input_data_, input_labels_, input_boxes_ = session.run([input_data, input_labels, input_boxes])
conv5_3_ = session.run([conv5_3]) #can run this line
global_step_rec_ = session.run([global_step_rec]) # got FailedPreconditionError: FailedPr...onError() error at this line
conv7_7_ = session.run([conv7_7])
h_fmap_ = session.run([h_fmap])
Now it works.
Since my graph has two parts, I need to initialize separately.
(1)First get all variables from pre-trained model to initialize with those from checkpoint.
Then initialize with tf.train.Saver.
(2)Then initialize the rest add-in layers using tf.global_variables_initializer()
My code is as follow.
#Initialization
#Initialize pre-trained model first
#Since we need to restore pre-trained model and initialize to respective variables in this current graph
#(1)make a variable list for checkpoint
#(2)initialize a saver for the variable list
#(3)then restore
#(1)
def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors):
varlist=[]
reader = pywrap_tensorflow.NewCheckpointReader(file_name)
if all_tensors:
var_to_shape_map = reader.get_variable_to_shape_map()
for key in sorted(var_to_shape_map):
print(key)
varlist.append(key)
return varlist
varlist=print_tensors_in_checkpoint_file(file_name=config.train.init_checkpoint,all_tensors=True,tensor_name=None)
#(2)prepare the list of variables by calling variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
#countcheckpt_vars=0
#for n in tf.get_default_graph().as_graph_def().node:
# print(n.name)
#for op in tf.get_default_graph().get_operations():
# print(str(op.name))
#for var in zip(variables):
# countcheckpt_vars=countcheckpt_vars+1
#(3)
loader = tf.train.Saver(variables[:46])#since I need to initialize only 46 variables from global variables
tf.logging.info('Initialize from: ' + config.train.init_checkpoint)
sess_config = tf.ConfigProto(log_device_placement = False, allow_soft_placement = True)
if FLAGS.gpu_memory_fraction < 0:
sess_config.gpu_options.allow_growth = True
elif FLAGS.gpu_memory_fraction > 0:
sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction;
session = tf.Session(graph=graph, config=sess_config)
loader.restore(session, config.train.init_checkpoint)
Then initialize the rest of variables
init = tf.global_variables_initializer()
session.run(init)