Making custom activation function in tensorflow 2.0 - tensorflow

I am trying to create a custom tanh() activation function in tensorflow to work with a particular output range that I want. I want my network to output concentration multipliers, so I figured if the output of tanh() were negative it should return a value between 0 and 1, and if it were positive to output a value between 1 and 10.
Here is what I currently have
def output_activation(x):
# function to scale tanh activation to be 1-10 if x > 0, or 0-1 if x < 0
return tf.cond(x >= 0, lambda: tf.math.tanh(x+0.1)*10, lambda: tf.math.tanh(x) + 1)
I believe this will work with a single value, but I want to output a vector of values, to which python throws a value error
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Tensors are immutable and, from my understanding, converting to a numpy array and back will slow down network training if I am on a GPU. What is the best way to get around this error but still keep the benefits of hardware acceleration?

I suggest you tf.keras.backend.switch. Here a dummy example
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras import backend as K
def output_activation(x):
return K.switch(x >= 0, tf.math.tanh(x+0.1)*10, tf.math.tanh(x) + 1)
X = np.random.uniform(0,1, (100,10))
y = np.random.uniform(0,1, 100)
inp = Input((10,))
x = Dense(8, activation=output_activation)(inp)
out = Dense(1)(x)
model = Model(inp, out)
model.compile('adam', 'mse')
model.fit(X,y, epochs=3)
here the running notebook: https://colab.research.google.com/drive/1T_kRNUphJt9xTjiOheTgoIGpGDZaaRAg?usp=sharing

Related

Why am I obtaining high MAE values when constructing my deep neural network

I am trying to do hyperparameter optimization for a regression problem using a deep neural network.
I am getting slightly high MAE values (5-7) when in the literature this is around 0.9-5 (using the same dataset to train the NN).
Any idea of what I could improve? I can provide the dataset if needed, but it's very large.
X looks like this:
Y looks like this:
X is composed of 865432 rows=features and 134 columns=samples where rows are ordered by decreasing pearson correlation with variable Y which is the age of each sample (a float variable).
Each entry in X is a number between 0-1.
Since there are too many features for the number of samples I decided to take only the 40000 most important features. (Also because I don't know how to include feature selection within the training of the model).
Here is the loss vs epochs:
#!/usr/bin/env python3
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasRegressor
import keras.losses
from keras import backend as K
import keras_tuner as kt
from keras_tuner.tuners import RandomSearch
from sklearn.model_selection import cross_val_score
def dynamic_model(hp):
# hyperparameters (independent of number of layers)
# Number of hidden layers: 1 - 50
hp_num_layers = hp.Int("num_layers", 2, 10)
hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
# Initialize sequential API and start building model.
model = keras.Sequential()
model.add(keras.layers.InputLayer(input_shape=(CpG_num,)))
# Tune the number of hidden layers and units in each
for i in range(1, hp_num_layers):
# hyperparameters (dependent on num_layers):
hp_units = hp.Int(f"units_{i}", min_value=100, max_value=400, step=100)
hp_dropout_rate = hp.Float(f"dropout_{i}", 0, 0.5, step=0.1)
hp_activation = hp.Choice(f"act_{i}", ['LeakyReLU', 'PReLU'])
model.add(
keras.layers.Dense(units=hp_units, kernel_initializer='normal', activation=hp_activation)
)
model.add(keras.layers.Dropout(hp_dropout_rate))
# Add output layer
model.add(keras.layers.Dense(units=1, kernel_initializer='normal'))
# Define optimizer, loss, and metrics
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
loss=loss_function,
metrics=metrics
)
return model
def correlation_coefficient_loss(y_true, y_pred):
x = y_true
y = y_pred
mx = K.mean(x)
my = K.mean(y)
xm, ym = x-mx, y-my
print(type(mx))
r_num = K.sum(tf.multiply(xm,ym))
r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym))))
r = r_num / r_den
r = K.maximum(K.minimum(r, 1.0), -1.0)
return (1 - K.square(r))
loss_function = 'mae'
metrics = ['mae', correlation_coefficient_loss]
scoring_function = 'neg_mean_absolute_error'
tuner_kind = 'random'
results_dir = '../tmp_files/Grid_Search_results/'+name+tuner_kind
hp = kt.HyperParameters()
if tuner_kind == 'random':
tuner = kt.RandomSearch(
dynamic_model,
objective = 'mae',
overwrite=True,
directory = results_dir,
project_name = 'random_tuner_trials',
max_trials = 500
)
tuner.search(
X1[name],Y1[name],
epochs=50,
validation_data=(X2[name],Y2[name]),
)
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
best_hps_model = dynamic_model(best_hps)
best_hps_model.fit(X1_train[name],y1_train[name],validation_data=(X2_train[name],y2_train[name]), epochs=500, batch_size=10)

Getting TypeError: sparse matrix length is ambiguous; use getnnz() or shape[0] while doing multi class classification

from sklearn.naive_bayes import CategoricalNB
from sklearn.datasets import make_multilabel_classification
X, y = make_multilabel_classification(sparse = True, n_labels = 15,
return_indicator = 'sparse', allow_unlabeled = False)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
I tried using X.todense() but the error is still raised.
X_train = X_train.todense()
X_test = X_test.todense()
Training on the dataset
from skmultilearn.adapt import MLkNN
from sklearn.metrics import accuracy_score
classifier = MLkNN(k=20)
classifier.fit(X_train, y_train)
predicting the output of trained dataset.
y_pred = classifier.predict(X_test)
accuracy_score(y_test,y_pred)
np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)
You are trying to get the length from a matrix, which is ambigious:
len(y_pred)
Your matrix y_pred has the dimension (25,5), as seen with y_pred.shape.
So instead of len(y_pred), you could use y_pred.shape[0], which would return 25.
But then you will encounter a problem when you are using y_pred.reshape(y_pred.shape[0],1)
ValueError: cannot reshape array of size 125 into shape (25, 1)
(previously: y_pred.reshape(len(y_pred),1))
This error makes sense, because you are trying to reshape a matrix with 125 values into a matrix with only 25 values. You need to rethink your code here.

what's the meaning of 'input_length'?

the data have 4 timestamps,but the embedding's input_length=3,so what's the meaning of input_length?
from tensorflow import keras
import numpy as np
data = np.array([[0,0,0,0]])
emb = keras.layers.Embedding(input_dim=2, output_dim=3, input_length=3)
emb(data)
As per the official documentation here,
input_length: Length of input sequences, when it is constant. This
argument is required if you are going to connect Flatten then Dense
layers upstream (without it, the shape of the dense outputs cannot be
computed).
from tensorflow import keras
import numpy as np
model = keras.models.Sequential()
model.add(keras.layers.Embedding(input_dim=2, output_dim=3, input_length=4))
# the model will take as input an integer matrix of size (batch, input_length).
input_array = np.array([[0,0,0,0]])
model.compile('rmsprop', 'mse')
output_array = model.predict(input_array)
print(output_array)
Above works fine, but if you change input_length to 3, then you will get below error:
ValueError: Error when checking input: expected embedding_input to
have shape (3,) but got array with shape (4,)

When is a random number generated in a Keras Lambda layer?

I would like to apply simple data augmentation (multiplication of the input vector by a random scalar) to a fully connected neural network implemented in Keras. Keras has nice functionality for image augmentation, but trying to use this seemed awkward and slow for my input (1-tensors), whose training data set fits in my computer's memory.
Instead, I imagined that I could achieve this using a Lambda layer, e.g. something like this:
x = Input(shape=(10,))
y = x
y = Lambda(lambda z: random.uniform(0.5,1.0)*z)(y)
y = Dense(units=5, activation='relu')(y)
y = Dense(units=1, activation='sigmoid')(y)
model = Model(x, y)
My question concerns when this random number will be generated. Will this fix a single random number for:
the entire training process?
each batch?
each training data point?
Using this will create a constant that will not change at all, because random.uniform is not a keras function. You defined this operation in the graph as constant * tensor and the factor will be constant.
You need random functions "from keras" or "from tensorflow". For instance, you can take K.random_uniform((1,), 0.5, 1.).
This will be changed per batch. You can test it by training this code for a lot of epochs and see the loss changing.
from keras.layers import *
from keras.models import Model
from keras.callbacks import LambdaCallback
import numpy as np
ins = Input((1,))
outs = Lambda(lambda x: K.random_uniform((1,))*x)(ins)
model = Model(ins,outs)
print(model.predict(np.ones((1,1))))
print(model.predict(np.ones((1,1))))
print(model.predict(np.ones((1,1))))
model.compile('adam','mae')
model.fit(np.ones((100000,1)), np.ones((100000,1)))
If you want it to change for each training sample, then get a fixed batch size and generate a tensor with random numbers for each sample: K.random_uniform((batch_size,), .5, 1.).
You should probably get better performance if you do it in your own generator and model.fit_generator(), though:
class MyGenerator(keras.utils.Sequence):
def __init__(self, inputs, outputs, batchSize, minRand, maxRand):
self.inputs = inputs
self.outputs = outputs
self.batchSize = batchSize
self.minRand = minRand
self.maxRand = maxRand
#if you want shuffling
def on_epoch_end(self):
indices = np.array(range(len(self.inputs)))
np.random.shuffle(indices)
self.inputs = self.inputs[indices]
self.outputs = self.outputs[indices]
def __len__(self):
leng,rem = divmod(len(self.inputs), self.batchSize)
return (leng + (1 if rem > 0 else 0))
def __getitem__(self,i):
start = i*self.batchSize
end = start + self.batchSize
x = self.inputs[start:end] * random.uniform(self.minRand,self.maxRand)
y = self.outputs[start:end]
return x,y

How to correct this custom loss function for keras with tensorflow?

I want to write a custom loss function that would penalize underestimation of positive target values with weights. It would work like mean square error, with the only difference that square errors in said case would get multiplied with a weight greater than 1.
I wrote it like this:
def wmse(ground_truth, predictions):
square_errors = np.square(np.subtract(ground_truth, predictions))
weights = np.ones_like(square_errors)
weights[np.logical_and(predictions < ground_truth, np.sign(ground_truth) > 0)] = 100
weighted_mse = np.mean(np.multiply(square_errors, weights))
return weighted_mse
However, when I supply it to my Sequential model in keras with tensorflow as backend:
model.compile(loss=wmse,optimizer='rmsprop')
I get the following error:
raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed.
TypeError: Using a `tf.Tensor` as a Python `bool` is not allowed. Use `if t is not None:` instead of `if t:` to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.
The traceback points to this line in wmse:
weights[np.logical_and(predictions < ground_truth, np.sign(ground_truth) > 0)] = 100
I have never worked with keras nor tensorflow until now, so I'd appreciate if someone helped me to adapt this loss function to keras/tensorflow framework. I tried to replace np.logical_and with tensorflow.logical_and, but to no avail, the error is still there.
As #nuric mentioned, you have to implement your loss using only Keras / Tensorflow operations with derivatives, as these frameworks won't be able to back-propagate through other operations (like numpy ones).
A Keras only implementation could look like this:
from keras import backend as K
def wmse(ground_truth, predictions):
square_errors = (ground_truth - predictions) ** 2
weights = K.ones_like(square_errors)
mask = K.less(predictions, ground_truth) & K.greater(K.sign(ground_truth), 0)
weights = K.switch(mask, weights * 100, weights)
weighted_mse = K.mean(square_errors * weights)
return weighted_mse
gt = K.constant([-2, 2, 1, -1, 3], dtype="int32")
pred = K.constant([-2, 1, 1, -1, 1], dtype="int32")
weights, loss = wmse(gt, pred)
sess = K.get_session()
print(loss.eval(session=sess))
# 100