How to create a custom conditional activation function - tensorflow

I want to create custom activation function in TF2. The math is like this:
def sqrt_activation(x):
if x >= 0:
return tf.math.sqrt(x)
else:
return -tf.math.sqrt(-x)
The problem is that I can't compare x with 0 since x is a tensor. How to achieve this functionality?

You can skip the comparison by doing,
def sqrt_activation(x):
return tf.math.sign(x)*tf.math.sqrt(tf.abs(x))

YOu need to use tf backend functions and convert your code as follows:
import tensorflow as tf
#tf.function
def sqrt_activation(x):
zeros = tf.zeros_like(x)
pos = tf.where(x >= 0, tf.math.sqrt(x), zeros)
neg = tf.where(x < 0, -tf.math.sqrt(-x), zeros)
return pos + neg
note that this function check all tensor to meet on those conditions ergo returning the pos + neg line

Related

Converting a fully connected neural network with variable number of hidden layers from tensorflow to pytorch

I recently started learning pytorch and I am trying to convert a part of a large script including coding a MLP with variable number of hidden layers from Tensorflow to pytorch.
import tensorflow as tf
### Base neural network
def init_mlp(layer_sizes, std=.01, bias_init=0.):
params = {'w':[], 'b':[]}
for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:]):
params['w'].append(tf.Variable(tf.random_normal([n_in, n_out], stddev=std)))
params['b'].append(tf.Variable(tf.mul(bias_init, tf.ones([n_out,]))))
return params
def mlp(X, params):
h = [X]
for w,b in zip(params['w'][:-1], params['b'][:-1]):
h.append( tf.nn.relu( tf.matmul(h[-1], w) + b ) )
#h.append( tf.nn.tanh( tf.matmul(h[-1], w) + b ) )
return tf.matmul(h[-1], params['w'][-1]) + params['b'][-1]
def compute_nll(x, x_recon_linear):
return tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(x_recon_linear, x), reduction_indices=1, keep_dims=True)
def gauss_cross_entropy(mean_post, std_post, mean_prior, std_prior):
d = (mean_post - mean_prior)
d = tf.mul(d,d)
return tf.reduce_sum(-tf.div(d + tf.mul(std_post,std_post),(2.*std_prior*std_prior)) - tf.log(std_prior*2.506628), reduction_indices=1, keep_dims=True)
how could I write down similarly weights and bias variables and attach them in each hidden layer in pytorch?
how could I convert gauss_cross_entropy and compute_nll
functions as well (finding equivalent syntax)?
Are these two codes compatible?
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as func
from torch.distributions import Normal, Categorical, Independent
from copy import
device = "cpu"
if torch.cuda.is_available():
device = "cuda:0"
if torch.cuda.device_count() > 1:
net = nn.DataParallel(net)
net.to(device)
def init_mlp(layer_sizes, std=.01, bias_init=0.):
params = {'w':[], 'b':[]}
for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:]):
params['w'].append(torch.tensor(Normal([n_in, n_out], torch.tensor([std])) ,requires_grad=True))
params['b'].append(torch.tensor(torch.mul(bias_init, torch.ones([n_out,])),requires_grad=True))
return params
def mlp(X, params):
h = [X]
for w,b in zip(params['w'][:-1], params['b'][:-1]):
h.append( torch.nn.ReLU( tf.matmul(h[-1], w) + b ) )
return torch.matmul(h[-1], params['w'][-1]) + params['b'][-1]
def compute_nll(x, x_recon_linear):
return torch.sum(func.binary_cross_entropy_with_logits(x_recon_linear, x), reduction_indices=1, keep_dims=True)
def gauss_cross_entropy(mu_post, sigma_post, mu_prior, sigma_prior):
d = (mu_post - mu_prior)
d = torch.mul(d,d)
return torch.sum(-torch.div(d + torch.mul(sigma_post,sigma_post),(2.*sigma_prior*sigma_prior)) - torch.log(sigma_prior*2.506628), reduction_indices=1, keep_dims=True)
What is the substitute function for tf.placeholder in pytorch? For instance here:
class VAE(object):
def __init__(self, hyperParams):
self.X = tf.placeholder("float", [None, hyperParams['input_d']])
self.prior = hyperParams['prior']
self.K = hyperParams['K']
self.encoder_params = self.init_encoder(hyperParams)
self.decoder_params = self.init_decoder(hyperParams)
and also how should I change tf.shape in this line: tf.random_normal(tf.shape(self.sigma[-1]))
How could I write down similar weights and bias variables and attach them in each hidden layer in PyTorch?
An easier way to define those is to create a list containing the params as (weight, bias) tuples:
def init_mlp(layer_sizes, std=.01, bias_init=0.):
params = []
for n_in, n_out in zip(layer_sizes[:-1], layer_sizes[1:]):
params.append([
nn.init.normal_(torch.empty(n_in, n_out)).requires_grad_(True),
torch.empty(n_out).fill_(bias_init).requires_grad_(True)])
return params
Above I define my parameters as 'empty' (created with uninitialized data) tensors with torch.empty. I have used in-place functions such as nn.init.normal_ (there are many others available) and torch.Tensor.fill_ to fill the tensor with an arbitrary value (maybe it is .mul_(bias_init) you are looking for, based on your TensorFlow sample?).
For the inference code, you don't actually need to store the intermediate layer results:
def mlp(x, params):
for i, (W, b) in enumerate(params):
x = x#W + b
if i < len(params) - 1:
x = torch.relu(x)
return x
How could I convert gauss_cross_entropy and compute_nll functions as well (finding equivalent syntax)?
You can use PyTorch functions and mathematical operators to define your logic. For compute_loss you were using the built-in, which actually does not require summation after it, by default the losses of the batch elements are averaged.
def compute_loss(y_pred, y_true):
return F.binary_cross_entropy_with_logits(y_pred, y_true)
What is the substitute function for tf.placeholder in Pytorch?
You don't have placeholders in PyTorch, you compute your outputs explicitly using PyTorch operators, then you should be able to backpropagate through those operators and get the gradients for each parameter.
How should I change tf.shape in this line: tf.random_normal(tf.shape(self.sigma[-1]))
Function tf.shape returns the shape of the tensor, in PyTorch you call torch.Tensor.shape or by calling torch.Tensor.size: i.e. self.sigma[-1].shape or self.sigma[-1].size().

Get logits of a trained Keras model [duplicate]

I am building a deconvolution network. I would like to add a layer to it which is the reverse of a softmax. I tried to write a basic python function that returns the inverse of a softmax for a given matrix and put that in a tensorflow Lambda and add it to my model.
I have no error but when I doing a predict I only have 0 at the exit. When I don't add this layer to my network I have output something other than zeros. This therefore justifies that they are due to my inv_softmax function which is bad.
Can you enlighten me how to proceed?
I define my funct as this :
def inv_softmax(x):
C=0
S = np.zeros((1,1,10)) #(1,1,10) is the shape of the datas that my layer will receive
try:
for j in range(np.max(np.shape(x))):
C+=np.exp(x[0,0,j])
for i in range(np.max(np.shape(x))):
S[0,0,i] = np.log(x[0,0,i]+C
except ValueError:
print("ValueError in inv_softmax")
pass
S = tf.convert_to_tensor(S,dtype=tf.float32)
return S
I add it to my network as :
x = ...
x = layers.Lambda(lambda x : inv_softmax(x),name='inv_softmax',output_shape=[1,1,10])(x)
x = ...
If you need more of my code or others informations ask me please.
Try this:
import tensorflow as tf
def inv_softmax(x, C):
return tf.math.log(x) + C
import math
input = tf.keras.layers.Input(shape=(1,10))
x = tf.keras.layers.Lambda(lambda x : inv_softmax(x, math.log(10.)),name='inv_softmax')(input)
model = tf.keras.Model(inputs=input, outputs=x)
a = tf.zeros([1, 1, 10])
a = tf.nn.softmax(a)
a = model(a)
print(a.numpy())
Thanks it works !
I put :
import keras.backend as K
def inv_softmax(x,C):
return K.log(x)+K.log(C)

Tensorflow: Random selection of masks

I know that this stackoverflow thread already gives some nice examples about conditionals in tensorflow, but I'm still struggling how to solve my issue of randomly selecting among several different masks in tensorflow.
Right now I can only select between two mask tensors a and b:
rand_num = tf.random_uniform([], minval=0, maxval=2.0, dtype=tf.float32, seed=None)
def if_true():
return b
def if_false():
return a
mask_sel = tf.cond(tf.less(rand_num , tf.constant(1.0)),if_true,if_false)
(I still find it weird that one needs to define these two helper functions, but not using them weirdly throws an error.)
Now the question: Lets say I have 4 mask tensors (a,b,c,d) or more to randomly select, what would be the best way to do that in tensorflow?
In python that would be
rand_num = np.random.uniform(low=0,high=4.0)
if (rand_num < 1.0):
mask_sel = a
elif(rand_num < 2.0):
mask_sel = b
elif(rand_num < 3.0):
mask_sel = c
else
mask_sel = d
About the helper functions, they are useful because they allow tensorflow to know which operations will run under each condition, this way it can optimize by running only the selected branch and ignoring the other. Operations outside the helper functions but used by any of them will always be run before tf.cond runs.
The other options is to use tf.select; you won't need the helper functions here but it will always evaluate both sides before running tf.select which can be inefficient if you don't need to.
Now for the main problem 'selecting from more than 2 tesnors', you can use multiple options:
1- Recursively nesting tf.cond operations:
def select_from_list(selector, tensor_list):
length = len(tensor_list)
if length == 0:
raise ValueError('List is empty')
elif length == 1:
return tensor_list[0]
else:
half = length // 2
return tf.cond(tf.less(selector, float(half)), lambda: select_from_list(selector, tensor_list[:half]), lambda: select_from_list(selector - half, tensor_list[half:]))
2- Using tf.case:
def select_from_list(selector, tensor_list):
length = len(tensor_list)
if length == 0:
raise ValueError('List is empty')
elif length == 1:
return tensor_list[0]
else:
def fn(tensor):
return lambda: tensor
pred_fn_pairs = [(tf.less(selector, float(i+1)), fn(tensor)) for i, tensor in enumerate(tensor_list)]
return tf.case(pred_fn_pairs, default=lambda:tensor_list[-1])
You can test any of them using:
def test(selector, value_list, sess):
return select_from_list(float(selector), [tf.constant(value) for value in value_list]).eval(session = sess)
sess = tf.Session()
test(3.5, [4,2,6,7,5], sess)
This should return 7

How to use maxout activation function in tensorflow?

I want to use maxout activation function in tensorflow, but I don't know which function should use.
I sent a pull request for maxout, here is the link:
https://github.com/tensorflow/tensorflow/pull/5528
Code is as follows:
def maxout(inputs, num_units, axis=None):
shape = inputs.get_shape().as_list()
if axis is None:
# Assume that channel is the last dimension
axis = -1
num_channels = shape[axis]
if num_channels % num_units:
raise ValueError('number of features({}) is not a multiple of num_units({})'
.format(num_channels, num_units))
shape[axis] = -1
shape += [num_channels // num_units]
outputs = tf.reduce_max(tf.reshape(inputs, shape), -1, keep_dims=False)
return outputs
Here is how it works:
I don't think there is a maxout activation but there is nothing stopping yourself from making it yourself. You could do something like the following.
with tf.variable_scope('maxout'):
layer_input = ...
layer_output = None
for i in range(n_maxouts):
W = tf.get_variable('W_%d' % d, (n_input, n_output))
b = tf.get_variable('b_%d' % i, (n_output,))
y = tf.matmul(layer_input, W) + b
if layer_output is None:
layer_output = y
else:
layer_output = tf.maximum(layer_output, y)
Note that this is code I just wrote in my browser so there may be syntax errors but you should get the general idea. You simply perform a number of linear transforms and take the maximum across all the transforms.
How about this code?
This seems to work in my test.
def max_out(input_tensor,output_size):
shape = input_tensor.get_shape().as_list()
if shape[1] % output_size == 0:
return tf.transpose(tf.reduce_max(tf.split(input_tensor,output_size,1),axis=2))
else:
raise ValueError("Output size or input tensor size is not fine. Please check it. Reminder need be zero.")
I refer the diagram in the following page.
From version 1.4 on you can use tf.contrib.layers.maxout.
Maxout is a layer such that it calculates N*M output for a N*1 input, and then it returns the maximum value across the column, i.e., the final output has shape N*1 as well. Basically it uses multiple linear fittings to mimic a complex function.

if statement in numpy array always needs vectorization

The if statement in this function, works without vectorization?
def K(T0,z,v):
for i in range(len(T0)-1):
GDens[i+1]=(Dens[i+1]-Dens[i])/(z[i+1]-z[i])
for i in range(len(T0)):
B[i]=(((ws/Dens0)*k0)**2)*np.exp(-2*alfa*z[i])-((g/Dens0)*GDens[i])
for i in range(len(T0)):
if B[i]>0:
kz[i]=((0.05*h1)**2)*np.sqrt(B[i])+kmin
else:
kz[i]=kmin
kfinal=kz
return kfinal
I'm not sure what you're asking but this is how you can vectorize this code:
GDens = np.zeros_like(z)
GDens[:-1] = (Dens[1:] - Dens[:-1]) / (z[1:] - z[:-1])
B = (((ws/Dens0)*k0)**2)*np.exp(-2*alfa*z)-((g/Dens0)*GDens)
kz = np.where(B > 0, ((0.05*h1)**2)*np.sqrt(B)+kmin, kmin)