AttributeError: 'list' object has no attribute 'model_dir' - tensorflow

I'm running a wide_deep.py script for linear regression in tensorflow.
I have cloned the models directory also as a part of process.
But i'm getting a error like AttributeError: 'list' object has no attribute 'model_dir'.
If I hard code this particular variable, I m getting other errors as AttributeError: 'list' object has no attribute 'data_dir' and so on .
Code:
"""Example code for TensorFlow Wide & Deep Tutorial using tf.estimator API."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import shutil
from absl import app as absl_app
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.utils.flags import core as flags_core
from official.utils.logs import hooks_helper
from official.utils.misc import model_helpers
_CSV_COLUMNS = [
'age', 'workclass', 'fnlwgt', 'education', 'education_num',
'marital_status', 'occupation', 'relationship', 'race', 'gender',
'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
'income_bracket'
]
_CSV_COLUMN_DEFAULTS = [[0], [''], [0], [''], [0], [''], [''], [''], [''], [''],
[0], [0], [0], [''], ['']]
_NUM_EXAMPLES = {
'train': 32561,
'validation': 16281,
}
LOSS_PREFIX = {'wide': 'linear/', 'deep': 'dnn/'}
def define_wide_deep_flags():
"""Add supervised learning flags, as well as wide-deep model type."""
flags_core.define_base()
flags.adopt_module_key_flags(flags_core)
flags.DEFINE_enum(
name="model_type", short_name="mt", default="wide_deep",
enum_values=['wide', 'deep', 'wide_deep'],
help="Select model topology.")
flags_core.set_defaults(data_dir='/tmp/census_data',
model_dir='/tmp/census_model',
train_epochs=40,
epochs_between_evals=2,
batch_size=40)
def build_model_columns():
"""Builds a set of wide and deep feature columns."""
# Continuous columns
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education_num')
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')
education = tf.feature_column.categorical_column_with_vocabulary_list(
'education', [
'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',
'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',
'5th-6th', '10th', '1st-4th', 'Preschool', '12th'])
marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
'marital_status', [
'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',
'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])
relationship = tf.feature_column.categorical_column_with_vocabulary_list(
'relationship', [
'Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried',
'Other-relative'])
workclass = tf.feature_column.categorical_column_with_vocabulary_list(
'workclass', [
'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',
'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])
# To show an example of hashing:
occupation = tf.feature_column.categorical_column_with_hash_bucket(
'occupation', hash_bucket_size=1000)
# Transformations.
age_buckets = tf.feature_column.bucketized_column(
age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
# Wide columns and deep columns.
base_columns = [
education, marital_status, relationship, workclass, occupation,
age_buckets,
]
crossed_columns = [
tf.feature_column.crossed_column(
['education', 'occupation'], hash_bucket_size=1000),
tf.feature_column.crossed_column(
[age_buckets, 'education', 'occupation'], hash_bucket_size=1000),
]
wide_columns = base_columns + crossed_columns
deep_columns = [
age,
education_num,
capital_gain,
capital_loss,
hours_per_week,
tf.feature_column.indicator_column(workclass),
tf.feature_column.indicator_column(education),
tf.feature_column.indicator_column(marital_status),
tf.feature_column.indicator_column(relationship),
# To show an example of embedding
tf.feature_column.embedding_column(occupation, dimension=8),
]
return wide_columns, deep_columns
def build_estimator(model_dir, model_type):
"""Build an estimator appropriate for the given model type."""
wide_columns, deep_columns = build_model_columns()
hidden_units = [100, 75, 50, 25]
# Create a tf.estimator.RunConfig to ensure the model is run on CPU, which
# trains faster than GPU for this model.
run_config = tf.estimator.RunConfig().replace(
session_config=tf.ConfigProto(device_count={'GPU': 0}))
if model_type == 'wide':
return tf.estimator.LinearClassifier(
model_dir=model_dir,
feature_columns=wide_columns,
config=run_config)
elif model_type == 'deep':
return tf.estimator.DNNClassifier(
model_dir=model_dir,
feature_columns=deep_columns,
hidden_units=hidden_units,
config=run_config)
else:
return tf.estimator.DNNLinearCombinedClassifier(
model_dir=model_dir,
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=hidden_units,
config=run_config)
def input_fn(data_file, num_epochs, shuffle, batch_size):
"""Generate an input function for the Estimator."""
assert tf.gfile.Exists(data_file), (
'%s not found. Please make sure you have run data_download.py and '
'set the --data_dir argument to the correct path.' % data_file)
def parse_csv(value):
print('Parsing', data_file)
columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS)
features = dict(zip(_CSV_COLUMNS, columns))
labels = features.pop('income_bracket')
return features, tf.equal(labels, '>50K')
# Extract lines from input files using the Dataset API.
dataset = tf.data.TextLineDataset(data_file)
if shuffle:
dataset = dataset.shuffle(buffer_size=_NUM_EXAMPLES['train'])
dataset = dataset.map(parse_csv, num_parallel_calls=5)
# We call repeat after shuffling, rather than before, to prevent separate
# epochs from blending together.
dataset = dataset.repeat(num_epochs)
dataset = dataset.batch(batch_size)
return dataset
def export_model(model, model_type, export_dir):
"""Export to SavedModel format.
Args:
model: Estimator object
model_type: string indicating model type. "wide", "deep" or "wide_deep"
export_dir: directory to export the model.
"""
wide_columns, deep_columns = build_model_columns()
if model_type == 'wide':
columns = wide_columns
elif model_type == 'deep':
columns = deep_columns
else:
columns = wide_columns + deep_columns
feature_spec = tf.feature_column.make_parse_example_spec(columns)
example_input_fn = (
tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec))
model.export_savedmodel(export_dir, example_input_fn)
def run_wide_deep(flags_obj):
"""Run Wide-Deep training and eval loop.
Args:
flags_obj: An object containing parsed flag values.
"""
# Clean up the model directory if present
shutil.rmtree(flags_obj.model_dir, ignore_errors=True)
model = build_estimator(flags_obj.model_dir, flags_obj.model_type)
train_file = os.path.join(flags_obj.data_dir, 'adult.data')
test_file = os.path.join(flags_obj.data_dir, 'adult.test')
# Train and evaluate the model every `flags.epochs_between_evals` epochs.
def train_input_fn():
return input_fn(
train_file, flags_obj.epochs_between_evals, True, flags_obj.batch_size)
def eval_input_fn():
return input_fn(test_file, 1, False, flags_obj.batch_size)
loss_prefix = LOSS_PREFIX.get(flags_obj.model_type, '')
train_hooks = hooks_helper.get_train_hooks(
flags_obj.hooks, batch_size=flags_obj.batch_size,
tensors_to_log={'average_loss': loss_prefix + 'head/truediv',
'loss': loss_prefix + 'head/weighted_loss/Sum'})
# Train and evaluate the model every `flags.epochs_between_evals` epochs.
for n in range(flags_obj.train_epochs // flags_obj.epochs_between_evals):
model.train(input_fn=train_input_fn, hooks=train_hooks)
results = model.evaluate(input_fn=eval_input_fn)
# Display evaluation metrics
print('Results at epoch', (n + 1) * flags_obj.epochs_between_evals)
print('-' * 60)
for key in sorted(results):
print('%s: %s' % (key, results[key]))
if model_helpers.past_stop_threshold(
flags_obj.stop_threshold, results['accuracy']):
break
# Export the model
if flags_obj.export_dir is not None:
export_model(model, flags_obj.model_type, flags_obj.export_dir)
def main(_):
run_wide_deep(flags.FLAGS)
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.INFO)
define_wide_deep_flags()
absl_app.run(main)

Hunter, I tried to run without hardcoding but still faced issues with attributes , so I tried to hard code to avoid this .
But, The issue is resolved now.
I cloned the directory again and instead of copying the wide_deep.py to another directory and run from there(which I was doing before), I ran directly from the same directory and now it is working fine.

Related

How to concat laserembeddings with huggingface funnel transformers simple CLS output for NLP sequence classification task?

i was approaching NLP sequence classification problem (3 classes) using huggingface transformers (funnel-transformer/large) and tensorflow.
first i created laserembedding like this :
from laserembeddings import Laser
laser = Laser()
df = pd.read_csv("mycsv.csv")
embeds = laser.embed_sentences(df['text'].values, lang='en')
write_pickle_to_file('train.pkl', embeds )
part 1 : Tensorflow version
for data preparation i use code like below :
df['text']=temp['column1']+tokenizer.sep_token+temp['column2']+tokenizer.sep_token+temp['column3']
def encode_text(texts):
enc_di = tokenizer.batch_encode_plus(
texts,
padding='max_length',
truncation=True,
return_token_type_ids=True,
pad_to_max_length=True,
max_length=cfg.max_len
)
return [np.asarray(enc_di['input_ids'], dtype=np.int64),
np.asarray(enc_di['attention_mask'], dtype=np.int64),
np.asarray(enc_di['token_type_ids'], dtype=np.int64)]
then inside training function :
x_train = encode_text(df.text.to_list())
train_ds = (
tf.data.Dataset
.from_tensor_slices((
{
"input_ids": x_train[0],
"input_masks": x_train[1],
"input_segments": x_train[2],
"lasers": np.array( train[laser_columns].values, dtype=np.float32 ) #laser_columns contains all the laser embedded columns
},
tf.one_hot(df["label"].to_list(), 3) #3 class
))
.repeat()
.shuffle(2048)
.batch(cfg.batch_size)
.prefetch(AUTO)
)
i add laser embedding in my model like this :
def create_model():
transformer = transformers.TFAutoModel.from_pretrained(cfg.pretrained,config=config,from_pt=True)
max_len=512
# transformer
input_ids = Input(shape=(max_len,), dtype="int32", name="input_ids")
input_masks = Input(shape=(max_len,), dtype="int32", name="input_masks")
input_segments = Input(shape=(max_len,), dtype="int32", name="input_segments")
sequence_output = transformer(input_ids, attention_mask=input_masks, token_type_ids=input_segments)[0]
cls_token = sequence_output[:, 0, :]
# lasers
lasers = Input(shape=(n_lasers,), dtype=tf.float32, name="lasers") #n_lasers = 1024
lasers_output = tf.keras.layers.Dense(n_lasers, activation='tanh')(lasers)
x = tf.keras.layers.Concatenate()([cls_token, lasers_output])
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Dense(2048, activation='tanh')(x)
x = tf.keras.layers.Dropout(0.1)(x)
out = tf.keras.layers.Dense(3, activation='softmax')(x)
model = Model(inputs=[input_ids, input_masks, input_segments, lasers], outputs=out)
model.compile(Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(), metrics=["acc", metrics.CategoricalCrossentropy(name='xentropy')])
return model
now my question is, how do we do the same with pytorch for exact same problem and same dataset?
part 2 : pytorch version
df = pd.read_csv("mytrain.csv")
class myDataset(Dataset):
def __init__(self,df, max_length, tokenizer, training=True):
self.df = df
self.max_len = max_length
self.tokenizer = tokenizer
self.column1 = self.df['column1'].values
self.column2 = self.df['column2'].values
self.column3= self.df['column3'].values
self.column4= self.df['column4'].values
self.training = training
if self.training:
self.targets = self.df['label'].values
def __len__(self):
return len(self.df)
def __getitem__(self, index):
column1 = self.column1[index]
column2= self.column2[index]
column3= self.column3[index]
text0 = self.column4[index]
text1 = column1 + ' ' + column2+ ' ' + column3
inputs = self.tokenizer.encode_plus(
text1 ,
text0 ,
truncation = True,
add_special_tokens = True,
return_token_type_ids = True,
is_split_into_words=False,
max_length = self.max_len
)
samples = {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
}
if 'token_type_ids' in inputs:
samples['token_type_ids'] = inputs['token_type_ids']
if self.training:
samples['target'] = self.targets[index]
return samples
collate_fn = DataCollatorWithPadding(tokenizer=CONFIG['tokenizer'])
class myModel(nn.Module):
def __init__(self, model_name):
super(myModel, self).__init__()
self.model = AutoModel.from_pretrained(model_name)
if(True):
print("using gradient_checkpoint...")
self.model.gradient_checkpointing_enable()
self.config = AutoConfig.from_pretrained(model_name)
self.config.update(
{
"output_hidden_states": True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7,
"add_pooling_layer": False,
"attention_probs_dropout_prob":0.0,
}
)
self.fc = nn.Linear(self.config.hidden_size, 3)
def forward(self, ids, mask):
out = self.model(input_ids=ids,attention_mask=mask,output_hidden_states=False)
out = out[0][:, 0, :]
outputs = self.fc(out)
return outputs
and in train and validation loop i have code like this :
bar = tqdm(enumerate(dataloader), total=len(dataloader))
for step, data in bar:
ids = data['input_ids'].to(device, dtype = torch.long)
mask = data['attention_mask'].to(device, dtype = torch.long)
targets = data['target'].to(device, dtype=torch.long)
batch_size = ids.size(0)
optimizer.zero_grad()
# forward pass with `autocast` context manager
with autocast(enabled=True):
outputs = model(ids, mask)
loss = loss_fct(outputs, targets)
i would like to know where and how in my huggingface pytorch pipeline i can use the laserembedding that i created earlier and used in tensorflow huggingface model?
i would like to concat laserembeddings with funnel transformer's simple CLS token output and train the transformers model with laser embed as extra feature in pytorch implementation exactly like i did in tensorflow example,do you know how to modify my pytorch code to make it working in pytorch? the tensorflow implementation with laserembedding concatenated above that i have posted here works good,i just wanted to do the same in pytorch implementation,,your help is highly appreciated,thanks in advance

tensorflow Exception encountered when calling layer (type CategoryEncoding)

I'm trying to code a layer to interface between a data set (numerical and categorical features) so it can be fed into a model.
I can't understand the error I get when it comes to categorical columns.
ValueError: Exception encountered when calling layer (type CategoryEncoding).
When output_mode is not 'int', maximum supported output rank is 2. Received
output_mode multi_hot and input shape (10, 7, 1), which would result in output rank 3.
From what I understand, the batch size should not have been counted in, but it is. And that seems to break.
Note that reproducing with only numerical features works fine.
Thank you for your help.
import tensorflow as tf
import pandas as pd
import numpy as np
# Simulate a data set of categorical and numerical values
# Configure simulation specifications: {feature: number of unique categories or None for numerical}
theSimSpecs = {'Cat1': 54, 'Cat2': 2, 'Cat3': 4, 'Num1': None, 'Num2': None}
# theSimSpecs = {'Num1': None, 'Num2': None}
# batch size and timesteps
theBatchSz, theTimeSteps = 10, 4
# Creation of the dataset as pandas.DataFrame
theDFs = []
for theFeature, theUniques in theSimSpecs.items():
if theUniques is None:
theDF = pd.DataFrame(np.random.random(size=theBatchSz * theTimeSteps), columns=[theFeature])
else:
theDF = pd.DataFrame(np.random.randint(low=0, high=theUniques, size=theBatchSz * theTimeSteps),
columns=[theFeature]).astype('category')
theDFs.append(theDF)
theDF = pd.concat(theDFs, axis=1)
# code excerpt
# inventory of the categorical features' values ( None for the numerical)
theCatCodes = {theCol: (theDF[theCol].unique().tolist() if str(theDF[theCol].dtypes) == "category" else None)
for theCol in theDF.columns}
# Creation of the batched tensorflow.data.Dataset
theDS = tf.data.Dataset.from_tensor_slices(dict(theDF))
theDS = theDS.window(size=theTimeSteps, shift=1, stride=1, drop_remainder=True)
theDS = theDS.flat_map(lambda x: tf.data.Dataset.zip(x))
theDS = theDS.batch(batch_size=theTimeSteps, drop_remainder=True)
theDS = theDS.batch(batch_size=theBatchSz, drop_remainder=True)
# extracting one batch
theBatch = next(iter(theDS))
tf.print(theBatch)
# Creation of the components for the interface layer
theFeaturesInputs = {}
theFeaturesEncoded = {}
for theFeature, theCodes in theCatCodes.items():
if theCodes is None: # Pass-through for numerical features
theNumInput = tf.keras.layers.Input(shape=[], dtype=tf.float32, name=theFeature)
theFeaturesInputs[theFeature] = theNumInput
theFeatureExp = tf.expand_dims(input=theNumInput, axis=-1)
theFeaturesEncoded[theFeature] = theFeatureExp
else: # Process for categorical features
theCatInput = tf.keras.layers.Input(shape=[], dtype=tf.int64, name=theFeature)
theFeaturesInputs[theFeature] = theCatInput
theFeatureExp = tf.expand_dims(input=theCatInput, axis=-1)
theEncodingLayer = tf.keras.layers.CategoryEncoding(num_tokens=theSimSpecs[theFeature], name=f"{theFeature}_enc",
output_mode="multi_hot", sparse=False)
theFeaturesEncoded[theFeature] = theEncodingLayer(theFeatureExp)
theStackedInputs = tf.concat(tf.nest.flatten(theFeaturesEncoded), axis=1)
theModel = tf.keras.Model(inputs=theFeaturesInputs, outputs=theStackedInputs)
theOutput = theModel(theBatch)
tf.print(theOutput)

How to build a custom question-answering head when using hugginface transformers?

Using the TFBertForQuestionAnswering.from_pretrained() function, we get a predefined head on top of BERT together with a loss function that are suitable for this task.
My question is how to create a custom head without relying on TFAutoModelForQuestionAnswering.from_pretrained().
I want to do this because there is no place where the architecture of the head is explained clearly. By reading the code here we can see the architecture they are using, but I can't be sure I understand their code 100%.
Starting from How to Fine-tune HuggingFace BERT model for Text Classification is good. However, it covers only the classification task, which is much simpler.
'start_positions' and 'end_positions' are created following this tutorial.
So far, I've got the following:
train_dataset
# Dataset({
# features: ['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'],
# num_rows: 99205
# })
train_dataset.set_format(type='tensorflow', columns=['input_ids', 'token_type_ids', 'attention_mask'])
features = {x: train_dataset[x] for x in ['input_ids', 'token_type_ids', 'attention_mask']}
labels = [train_dataset[x] for x in ['start_positions', 'end_positions']]
labels = np.array(labels).T
tfdataset = tf.data.Dataset.from_tensor_slices((features, labels)).batch(16)
input_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='input_ids')
token_type_ids = tf.keras.layers.Input(shape=(256,), dtype=tf.int32, name='token_type_ids')
attention_mask = tf.keras.layers.Input((256,), dtype=tf.int32, name='attention_mask')
bert = TFAutoModel.from_pretrained("bert-base-multilingual-cased")
output = bert([input_ids, token_type_ids, attention_mask]).last_hidden_state
output = tf.keras.layers.Dense(2, name="qa_outputs")(output)
model = tf.keras.models.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=output)
num_train_epochs = 3
num_train_steps = len(tfdataset) * num_train_epochs
optimizer, schedule = create_optimizer(
init_lr=2e-5,
num_warmup_steps=0,
num_train_steps=num_train_steps,
weight_decay_rate=0.01
)
def qa_loss(labels, logits):
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction=tf.keras.losses.Reduction.NONE
)
start_loss = loss_fn(labels[0], logits[0])
end_loss = loss_fn(labels[1], logits[1])
return (start_loss + end_loss) / 2.0
model.compile(
loss=loss_fn,
optimizer=optimizer
)
model.fit(tfdataset, epochs=num_train_epochs)
And I am getting the following error:
ValueError: `labels.shape` must equal `logits.shape` except for the last dimension. Received: labels.shape=(2,) and logits.shape=(256, 2)
It is complaining about the shape of the labels. This should not happen since I am using SparseCategoricalCrossentropy loss.
For future reference, I actually found a solution, which is just editing the TFBertForQuestionAnswering class itself. For example, I added an additional layer in the following code and trained the model as usual and it worked.
from transformers import TFBertPreTrainedModel
from transformers import TFBertMainLayer
from transformers.modeling_tf_utils import TFQuestionAnsweringLoss, get_initializer, input_processing
from transformers.modeling_tf_outputs import TFQuestionAnsweringModelOutput
from transformers import BertConfig
class MY_TFBertForQuestionAnswering(TFBertPreTrainedModel, TFQuestionAnsweringLoss):
# names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model
_keys_to_ignore_on_load_unexpected = [
r"pooler",
r"mlm___cls",
r"nsp___cls",
r"cls.predictions",
r"cls.seq_relationship",
]
def __init__(self, config: BertConfig, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.bert = TFBertMainLayer(config, add_pooling_layer=False, name="bert")
# This is the dense layer I added
self.my_dense = tf.keras.layers.Dense(
units=config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
name="my_dense",
)
self.qa_outputs = tf.keras.layers.Dense(
units=config.num_labels,
kernel_initializer=get_initializer(config.initializer_range),
name="qa_outputs",
)
def call(
self,
input_ids = None,
attention_mask = None,
token_type_ids = None,
position_ids = None,
head_mask = None,
inputs_embeds = None,
output_attentions = None,
output_hidden_states = None,
return_dict = None,
start_positions = None,
end_positions= None,
training = False,
**kwargs,
):
r"""
start_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the start of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
end_positions (`tf.Tensor` or `np.ndarray` of shape `(batch_size,)`, *optional*):
Labels for position (index) of the end of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
are not taken into account for computing the loss.
"""
inputs = input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
start_positions=start_positions,
end_positions=end_positions,
training=training,
kwargs_call=kwargs,
)
outputs = self.bert(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs["token_type_ids"],
position_ids=inputs["position_ids"],
head_mask=inputs["head_mask"],
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output = outputs[0]
# You also have to add it here
my_logits = self.my_dense(inputs=sequence_output)
logits = self.qa_outputs(inputs=my_logits)
start_logits, end_logits = tf.split(value=logits, num_or_size_splits=2, axis=-1)
start_logits = tf.squeeze(input=start_logits, axis=-1)
end_logits = tf.squeeze(input=end_logits, axis=-1)
loss = None
if inputs["start_positions"] is not None and inputs["end_positions"] is not None:
labels = {"start_position": inputs["start_positions"]}
labels["end_position"] = inputs["end_positions"]
loss = self.hf_compute_loss(labels=labels, logits=(start_logits, end_logits))
if not inputs["return_dict"]:
output = (start_logits, end_logits) + outputs[2:]
return ((loss,) + output) if loss is not None else output
return TFQuestionAnsweringModelOutput(
loss=loss,
start_logits=start_logits,
end_logits=end_logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFQuestionAnsweringModelOutput(
start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns
)

LightGBM model returning the same score with different parameters

I'm trying to train a LightGBM model on the Kaggle Iowa housing dataset and I wrote a small script to randomly try different parameters within a given range. I'm not sure what's wrong with my code, but the script returns the same score with different parameters, which shouldn't be happening. I tried the same script with Catboost and it works as expected, so I'm guessing the issue is with LGBM.
The code:
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from random import choice, randrange, uniform
complete_train = pd.read_csv(
"train.csv",
encoding = "UTF-8",
index_col = "Id")
complete_test = pd.read_csv(
"test.csv",
encoding = "UTF-8",
index_col = "Id")
def encode_impute(*datasets):
for dataset in datasets:
for column in dataset.columns:
dataset[
column].fillna(
-999,
inplace = True)
if dataset[
column].dtype == "object":
dataset[
column] = dataset[
column].astype("category", copy = False)
encode_impute(
complete_train,
complete_test)
X = complete_train.drop(
columns = "SalePrice")
y = complete_train[
"SalePrice"]
X_train, X_valid, y_train, y_valid = train_test_split(X, y)
def objective():
while True:
params = {
"boosting_type": choice(["gbdt", "goss", "dart", "rf"]),
"num_leaves": randrange(10000),
"learning_rate": uniform(0.01, 1),
"subsample_for_bin": randrange(100000000),
"min_data_in_leaf": randrange(100000000),
"reg_alpha": uniform(0, 1),
"reg_lambda": uniform(0, 1),
"feature_fraction": uniform(0, 1),
"bagging_fraction": uniform(0, 1),
"bagging_freq": randrange(1, 100)}
params["bagging_fraction"] = 1.0 if params[
"boosting_type"] == "goss" else params[
"bagging_fraction"]
model = LGBMRegressor().set_params(**params)
model.fit(X_train, y_train)
predictions = model.predict(X_valid)
error_rate = mean_absolute_error(
y_valid, predictions)
print(f"Score = {error_rate} with parameters: {params}","\n" *5)
objective()
Example of the output I'm getting:
Score = 55967.70375930444 with parameters: {'boosting_type': 'gbdt', 'num_leaves': 6455, 'learning_rate': 0.2479700848039991, 'subsample_for_bin': 83737077, 'min_data_in_leaf': 51951103, 'reg_alpha': 0.1856001984332697, 'reg_lambda': 0.7849262049058852, 'feature_fraction': 0.10550627738309537, 'bagging_fraction': 0.2613298736131875, 'bagging_freq': 96}
Score = 55967.70375930444 with parameters: {'boosting_type': 'dart', 'num_leaves': 9678, 'learning_rate': 0.28670432435369037, 'subsample_for_bin': 24246091, 'min_data_in_leaf': 559094, 'reg_alpha': 0.07261459695501371, 'reg_lambda': 0.8834743560240725, 'feature_fraction': 0.5361519020265366, 'bagging_fraction': 0.9120030047714073, 'bagging_freq': 10}
Score = 55967.70375930444 with parameters: {'boosting_type': 'goss', 'num_leaves': 4898, 'learning_rate': 0.09237499846487345, 'subsample_for_bin': 32620066, 'min_data_in_leaf': 71317820, 'reg_alpha': 0.9818297737748625, 'reg_lambda': 0.11638265354331834, 'feature_fraction': 0.4230342728468828, 'bagging_fraction': 1.0, 'bagging_freq': 64}
I would point out that that min_data_in_leaf parameter in all the options seems very high and I suspect that the model is not learning anything and just sending the average value of the response variable with only root node.

How can i use 38 classes instead of 1000 in model.predict decode predictions

i am founding an error in plant disease detection using resnet50 deep learning model every time it raises an error message in decode_predictions
error
expects a batch of predictions (i.e. a 2D array of shape (samples, 1000)). Found array with shape: (1, 38)"
enter code here
model = ResNet50(weights='imagenet',include_top=False,classes=38)
try:
model = load_model('/content/drive/My
Drive/color/checkpoints/ResNet50_model_weights.h5')
print("model loaded")
except:
print("model not loaded")
img_path = '/content/drive/My Drive/color/test/0/appleblackrot188.jpg'
img = image.load_img(img_path, target_size=(300, 300))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = model.predict(x)
print('Predicted:', decode_predictions(preds,top=3)[0])
You can try using the preprocesing function:
import tensorflow as tf
# Using the keras wrapper on tensorflow (it must be the same using just keras).
IMAGE = [] # From image source, i did it from the camera.
toPred = tf.keras.applications.resnet50.preprocess_input(np.expand_dims(tf.keras.preprocessing.image.img_to_array(IMAGE), axis=0))
Maybe that can help :)
decode_predictions works only for ImageNet (no. of classes = 1000). For these 38 classes of plants, you have to write your own decode predictions based on the ground truth label you've assigned for each plant.
First, you need an index JSON file and create a new decode_predictions function.
For example
This HAM10000 that has 7 classes and you need to split to each folder like this
then make an index JSON file like this
{
"0" : [
"AKIEC",
"akiec"
],
"1" : [
"BCC",
"bcc"
],
"2" : [
"BKL",
"bkl"
],
"3" : [
"DF",
"df"
],
"4" : [
"MEL",
"mel"
],
"5" : [
"NV",
"nv"
],
"6" : [
"VASC",
"vasc"
]
}
Then try this code
def decode_predictions(preds, top=4, class_list_path='/content/SKIN_DATA/HAM10000_index.json'):
if len(preds.shape) != 2 or preds.shape[1] != 7: # your classes number
raise ValueError('`decode_predictions` expects '
'a batch of predictions '
'(i.e. a 2D array of shape (samples, 1000)). '
'Found array with shape: ' + str(preds.shape))
index_list = json.load(open(class_list_path))
results = []
for pred in preds:
top_indices = pred.argsort()[-top:][::-1]
result = [tuple(index_list[str(i)]) + (pred[i],) for i in top_indices]
result.sort(key=lambda x: x[2], reverse=True)
results.append(result)
return results