Converting a tensor to a numpy 2D array - numpy

from transformers import BertTokenizer, TFBertModel
import matplotlib.pyplot as plt
import tensorflow as tf
The code included below throws an error on the line:
features = bert_encoder([input_word_ids, input_mask, input_type_ids])[0][:,0,:].numpy()
The error is:
AttributeError: 'Tensor' object has no attribute 'numpy'
I am running this on a tensor flow version > 2.0 and tf.executing_eagerly() returns True
The dictionary items that I am retrieving information from before the numpy() operation are:
{
bert_encoder_output: <tf.Tensor 'strided_slice:0' shape=(None, 768) dtype=float32>,
embedding: <tf.Tensor 'tf_bert_model/Identity:0' shape=(None, 50, 768) dtype=float32>
}
TPU Session set up:
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
strategy = tf.distribute.get_strategy() # for CPU and single GPU
print('Number of replicas:', strategy.num_replicas_in_sync)
Code:
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
if (tf.executing_eagerly()):
print ("Yes")
tf.compat.v1.enable_eager_execution()
max_len = 50
def get_bert_encoder_output(printInputs = False):
model_inputs = {}
bert_encoder = TFBertModel.from_pretrained(model_name)
# Get Inputs
input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_word_ids")
input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_mask")
input_type_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_type_ids")
# last hidden-state - the model output - is the first element of the output tuple
embedding = bert_encoder([input_word_ids, input_mask, input_type_ids])[0]
bert_encoder_output = (embedding[:,0,:])
model_inputs['input_word_ids'] = input_word_ids
model_inputs['input_mask'] = input_mask
model_inputs['input_type_ids'] = input_type_ids
model_inputs['bert_encoder_output'] = bert_encoder_output
model_inputs['embedding'] = embedding
if (tf.executing_eagerly()):
print ("Inside get_bert_encoder_output - Yes executing eagerly")
features = bert_encoder([input_word_ids, input_mask, input_type_ids])[0][:,0,:].numpy()
if (printInputs):
print (model_inputs)
print (features)
return (model_inputs)

Related

Create Multi-Output Model with KerasNLP (CLS + MLM)

I am trying to use Keras_NLP to pretrain a model through both Masked Language Modelling and using a proxy for the CLS token to classify certain labels. With just Masked Language Modelling, it works fine but when I try to make it multi-output and also classify labels using the CLS token it seems to be unable to work
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
!pip install -q keras-nlp
import keras_nlp
import numpy as np
#Creating Random Data
x = np.random.randint(3, 100, size = (1000, 140))
#Adding Int 2 at front of each vector to act as CLS
x_cls = np.hstack([np.full((1000,1),2), x])
y = np.random.randint(0, 3, size = (1000,1)) #Random Class for each x value
#Need data for actual problem in dataset format, so using it here
dataset = tf.data.Dataset.from_tensor_slices((x_cls, y)).batch(32)
#Keras Masker
masker = keras_nlp.layers.MLMMaskGenerator(99, 0.15, mask_token_id = 0, unselectable_token_ids= [0,1,2], mask_selection_length = 15)
def process(input, y):
outputs = masker(input)
features = {
"tokens": outputs["tokens"],
"mask_positions": outputs["mask_positions"],
}
labels = outputs["mask_ids"]
weights = outputs["mask_weights"]
return features, labels, weights , y
#Apply Mask to Dataset
dataset_mask = dataset.map(lambda x, y: process(x,y))
#Create Simple Encoder, for testing purposes
input = keras.Input(shape = (141,))
embedding = layers.Embedding(99, 20)(input)
x = layers.Dense(20, activation = "relu")(embedding)
encoder = keras.Model(inputs = input, outputs = x)
#MLM prediction + CLS prediction
inputs = {
"tokens": keras.Input(shape=(141,), dtype=tf.int32),
"mask_positions": keras.Input(shape=(15,), dtype=tf.int32),
}
encoded_tokens = encoder(inputs["tokens"])
outputs = keras_nlp.layers.MLMHead(vocabulary_size = 141, activation="softmax")(encoded_tokens, inputs["mask_positions"])
x = layers.Lambda(lambda x: x[:,0,:])(encoded_tokens)
#Use lambda layer to extract embedding from 1st dim, corresponds to CLS
output_2 = layers.Dense(3, activation = "softmax")(x)
pretraining_model = keras.Model(inputs, outputs = [outputs, output_2])
pretraining_model.compile(loss=["sparse_categorical_crossentropy" "sparse_categorical_crossentropy"], optimizer = "Adam",weighted_metrics=["sparse_categorical_accuracy"], jit_compile=True)
pretraining_model.fit(dataset_mask)
The error I get is
ValueError: Data is expected to be in format `x`, `(x,)`, `(x, y)`, or `(x, y, sample_weight)`, found: ({'tokens': <tf.Tensor 'data_1:0' shape=(None, 141) dtype=int64>, 'mask_positions': <tf.Tensor 'data:0' shape=(None, 15) dtype=int64>}, <tf.Tensor 'data_2:0' shape=(None, 15) dtype=int64>, <tf.Tensor 'data_3:0' shape=(None, 15) dtype=float32>, <tf.Tensor 'data_4:0' shape=(None, 1) dtype=int64>)
Any help would be appreciated!

Model.predict throwing TypeError: 'numpy.ndarray' object is not callable

I am new to Python and facing few issues while implementing Neural Networks on a Earthquake prediction problem.
There is very rare material availabale online to solve this issue using neural networks, so got struck.
Please support.
Model.predict throwing TypeError: 'numpy.ndarray' object is not callable.
enter link description here
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
train_data = pd.read_csv("C:\\Users\\rjraj\\Desktop\\mma\\ML & AI\\Project\\train_values.csv")
train_labels = pd.read_csv("C:\\Users\\rjraj\\Desktop\mma\\ML & AI\\Project\\train_labels.csv")
test_labels = pd.read_csv("C:\\Users\\rjraj\\Desktop\\mma\\ML & AI\\Project\\test_values.csv")
X_tr = train_data
X_te = test_labels
y_tr = train_labels['damage_grade'].values
# label encoding the categorical variables
label_encoding_columns=['land_surface_condition', 'foundation_type', 'roof_type',
'ground_floor_type', 'other_floor_type', 'position',
'plan_configuration', 'legal_ownership_status']
# label encoding categorical columns in train dataset
for i in label_encoding_columns:
X_tr[i]=X_tr[i].astype("category")
X_tr[i]=X_tr[i].cat.codes
# label encoding categorical columns in test dataset
for j in label_encoding_columns:
X_te[j]=X_te[j].astype("category")
X_te[j]=X_te[j].cat.codes
from sklearn.model_selection import train_test_split
X_train, X_test,y_train, y_test = train_test_split(X_tr,y_tr,test_size = 0.3,random_state = 42)
X_train.shape
(182420, 39)
X_test.shape
(78181, 39)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
MinMaxScaler()
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
#from tensorflow.keras.optimizers import Adam
model = Sequential()
model.add(Dense(4, activation = 'relu'))
model.add(Dense(4, activation = 'relu'))
model.add(Dense(1))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x=X_train, y=y_train, epochs=30)
model.evaluate(X_test,y_test, verbose = 0)
model.evaluate(X_train,y_train, verbose = 0)
test_pred = model.predict(X_test)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-74-82e9029ecb43> in <module>
----> 1 test_pred = model.predict(X_test)
TypeError: 'numpy.ndarray' object is not callable

Tensorflow nonsense reshape values

When using tensorflow.keras.layers.Reshape I've been getting strange errors. Where is it getting the 47409408 value from? 207936 corresponds to the correct size (69312*3).
A weird aspect is if I put a flatten layer before the reshape it works.
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 304, 228, 3) 30
_________________________________________________________________
reshape (Reshape) (None, 69312, 3) 0
=================================================================
Total params: 30
Trainable params: 30
Non-trainable params: 0
____________________________________
(0) Invalid argument: Input to reshape is a tensor with 207936 values, but the requested shape has 47409408
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from PIL import Image
from tensorflow.keras import datasets, layers, models, preprocessing
import os
from natsort import natsorted
from tensorflow.keras.models import Model
BATCH_SIZE = 32
EPOCHS = 15
LEARNING_RATE = 1e-4
#jpegs with values from 0 to 255
img_dir = ".../normalized_imgs"
# .npy files of size (69312,3)
pts_dir = ".../normalized_pts"
img_files = [os.path.join(img_dir, f)
for f in natsorted(os.listdir(img_dir))]
pts_files = [os.path.join(pts_dir, f)
for f in natsorted(os.listdir(pts_dir))]
img = Image.open(img_files[0])
pts = np.load(pts_files[0])
def parse_img_input(img_file, pts_file):
def _parse_input(img_file, pts_file):
# get image
d_filepath = img_file.numpy().decode()
d_image_decoded = tf.image.decode_jpeg(tf.io.read_file(d_filepath), channels=1)
d_image = tf.cast(d_image_decoded, tf.float32) / 255.0
# get numpy data
pts_filepath = pts_file.numpy().decode()
pts = np.load(pts_filepath, allow_pickle= True)
print("d_image ",d_image.shape )
return d_image, pts
return tf.py_function(_parse_input,
inp=[img_file, pts_file],
Tout=[tf.float32, tf.float32])
class SimpleCNN(Model):
def __init__(self):
super(SimpleCNN, self).__init__()
input_shape = (img.size[0], img.size[1], 1)
self.model = model = models.Sequential()
model.add(tf.keras.Input(shape= input_shape))
model.add(layers.Conv2D(3, (3,3), padding='same'))
model.add(layers.Reshape((pts.shape[0], pts.shape[1])))
# split input data into train, test sets
X_train_file, X_test_file, y_train_file, y_test_file = train_test_split(img_files, pts_files,
test_size=0.2,
random_state=0)
model = SimpleCNN()
dataset_train = tf.data.Dataset.from_tensor_slices((X_train_file, y_train_file))
dataset_train = dataset_train.map(parse_img_input)
dataset_test = tf.data.Dataset.from_tensor_slices((X_test_file, y_test_file))
dataset_test = dataset_test.map(parse_img_input)
model.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss= tf.losses.MeanSquaredError(), metrics= [tf.keras.metrics.get('accuracy')])
model.fit(dataset_train, epochs=EPOCHS, shuffle=True, validation_data= dataset_test)

Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2

Before reshaping xtraindata and xtest data, I got error:
"Input 0 of layer sequential_10 is incompatible with the layer: : expected min_ndim=4, found ndim=2.". After reshaping xtraindata and xtestdata as (1400,24,24,1) and (600,24,24,1) in order. Then I got error like this:
"Incompatible shapes: [32,1] vs. [32,6,6,1]
[[node mean_squared_error/SquaredDifference (defined at C:\Users\User\Documents\car_person.py:188) ]] [Op:__inference_test_function_7945]
Function call stack:
test_function"
I cannot make evaluate function working on created model. What should I do in order to make test data compatible with model?
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import cv2
import pandas as pd
import tensorflow as tf
import itertools as it
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
except RuntimeError as e:
print(e)
#gpu_options=K.tf.GPUOptions(per_process_gpu_memory_fraction=0.35)
path = "C:/Users/User/Desktop/tunel_data"
training_data=[]
def create_training_data(training_data, path):
categories = ["tunel_data_other", "tunel_data_car"]
for category in categories:
path=os.path.join(path, category)
for img in os.listdir(path):
print(img)
if category=="tunel_data_other":
class_num= 0
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('car'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
elif category=="tunel_data_car":
class_num = 1
#image=Image.open(img)
#new_image = image.resize((50, 50))
#new_image.save('person'+img.index())
#try:
image_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)/255
new_array = cv2.resize(image_array, (24, 24))
print(new_array.shape)
training_data.append([new_array, class_num])
#except:
#pass
path = "C:/Users/User/Desktop/tunel_data"
return training_data
create_training_data(training_data, path)
x=[]
y=[]
for i in range(len(training_data)):
x.append(training_data[i][0])
y.append(training_data[i][1])
#print(x)
#print(y)
x = np.array(x).reshape(2000, 576)
"""
principle_features = PCA(n_components=250)
feature = principle_features.fit_transform(x)
"""
feature = x
label = y
feature_df = pd.DataFrame(feature)
#df = DataFrame (People_List,columns=['First_Name','Last_Name','Age'])
label_df = pd.DataFrame(label)
data = pd.concat([feature_df, label_df], axis=1).to_csv('complete.csv')
data = pd.read_csv("complete.csv")
data = data.sample(frac=1).reset_index(drop=True)
print(data)
x_test, x_train, y_test, y_train = train_test_split(x, y, test_size=0.7, random_state=65)
xtraindata=pd.DataFrame(data=x_train[:,:])
xtestdata=pd.DataFrame(data=x_test[:,:])
print(xtraindata)
ytraindata=pd.DataFrame(data=y_train[:])
ytestdata=pd.DataFrame(data=y_test[:])
print(ytraindata)
xtraindata = np.asarray(xtraindata)
ytraindata = np.asarray(ytraindata)
xtestdata = np.asarray(xtestdata)
ytestdata = np.asarray(ytestdata)
x=np.asarray(x)
y=np.asarray(y)
xtraindata = xtraindata.reshape(1400,24,24,1)
xtestdata = xtestdata.reshape(600,24,24,1)
activation = ["tanh", "relu", "sigmoid", "softmax"]
input_size1 = range(10)
input_size2 = range(10)
k_scores = []
in_size = []
possible = list(it.permutations(activation, 4))
for c in possible:
for i in input_size1:
for a in input_size2:
model = tf.keras.Sequential([tf.keras.layers.Conv2D(256, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(512, kernel_size=(3,3), padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
tf.keras.layers.Dense(250, activation=c[0]),
tf.keras.layers.Dense(i, activation=c[1]),
tf.keras.layers.Dense(a, activation=c[2]),
tf.keras.layers.Dense(1, activation=c[3])])
model.compile(optimizer='sgd', loss='mse')
val_loss = model.evaluate(xtestdata, ytestdata, verbose=1)
k_scores.append(val_loss)
in_size.append([i,a])
print(k_scores)
print("Best activation functions for each layer:", possible[(k_scores.index((min(k_scores)))) % len(possible)],
"/n Best input sizes:", "840", in_size[k_scores.index((min(k_scores)))][0], in_size[k_scores.index((min(k_scores)))][1], "1")
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(250, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][0]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][0], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][1]))
model.add(tf.keras.layers.Dense(in_size[k_scores.index((min(k_scores)))][1], activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][2]))
model.add(tf.keras.layers.Dense(1, activation=possible[(k_scores.index((min(k_scores)))) % len(possible)][3]))
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", "mse"])
model.fit(x, y, batch_size=16, epochs=5)
predictions = model.predict([x_test])
print(predictions)
print(predictions.shape)
output layer size is different. you want size (32, 1) but model's output is (32, 6, 6, 1)
insert Flatten() between MaxPooling2D and Dense() maybe this work's well.
and here is the tip. .evaluate method is only for trained model. you should use .fit first.

ResNet50 From keras gives different results for predict and output

I want to fine-tune the ResNet50 from Keras but first I found that given the same input, the prediction from ResNet50 is different from the output of the model. Actually, the value of the output seems to be 'random'. What am I doing wrong?
Thanks in advance!
Here it is my code:
import tensorflow as tf
from resnet50 import ResNet50
from keras.preprocessing import image
from imagenet_utils import preprocess_input
import numpy as np
from keras import backend as K
img_path = 'images/tennis_ball.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x_image = preprocess_input(x)
#Basic prediction
model_basic = ResNet50(weights='imagenet', include_top=False)
x_prediction = model_basic.predict(x_image)
#Using tensorflow to obtain the output
input_tensor = tf.placeholder(tf.float32, shape=[None, 224,224, 3], name='input_tensor')
model = ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor)
x = model.output
# Tensorflow session
session = tf.Session()
session.run(tf.global_variables_initializer())
K.set_session(session)
feed_dict = {input_tensor: x_image, K.learning_phase(): 0}
# Obatin the output given the same input
x_output = session.run(x, feed_dict=feed_dict)
# Different results
print('Value of the prediction: {}'.format(x_prediction))
print('Value of the output: {}'.format(x_output))
Here it is an example of the logs:
Value of the prediction: [[[[ 1.26408589e+00 3.91489342e-02 8.43058806e-03 ...,
5.63185453e+00 4.49339962e+00 5.13037841e-04]]]]
Value of the output: [[[[ 2.62883282 2.20199227 9.46755123 ..., 1.24660134 1.98682189
0.63490123]]]]
The problem was that session.run(tf.global_variables_initializer()) initializes your parameters to random values.
The problem was solve by using:
session = K.get_session()
instead of:
session = tf.Session()
session.run(tf.global_variables_initializer())