TFX runs out of memory on 32GB of RAM with only a 232.9MB TFRecord file. Is there something wrong with my code? - tensorflow

I am trying to train a model that detects whether someone is using sunglasses using tfx and a subset of the celebsA dataset (~ 26k images). I have written the images and labels to a TFrecord that is 232.9MB.
When I then go through the different components, I always run out of memory when running the Transform component. Is this normal? By the way, I am running this on a TPU with 32GB of RAM as I am using Google Colab Pro.
If so, what would be the best way to overcome the problem? Just create many smaller records and pass them through the components one by one?
Here is the code I have been using:
Code for writing to TFRecord:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import tensorflow_datasets as tfds
import pandas as pd
import numpy as np
from PIL import Image
import shutil
import random
import os
import io
import matplotlib.pyplot as plt
%matplotlib inline
from google.colab import drive
drive.mount('/content/gdrive')
%cd gdrive/MyDrive/Machine_Learning_stuff/celebs/
RAW_SUNGLASSES_DIR='./sunglasses_classifier/sunglasses_imgs/raw/'
SUNGLASSES_TFRECORD_DIR= './sunglasses_classifier/data/rec_sunglasses/sunglasses_full.tfrecords'
def _bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def image_resize_to_byte_array(image:Image):
imgByteArr = io.BytesIO()
image=image.resize((256,256))
image.save(imgByteArr, format="jpeg")
imgByteArr = imgByteArr.getvalue()
return imgByteArr
#Remove any corrupted files and non jpeg files
!find ${RAW_SUNGLASSES_DIR} -size 0 -exec rm {} +
!find ${RAW_SUNGLASSES_DIR} -type f ! -name "*.jpg" -exec rm {} +
image_labels={}
for filename in os.listdir(RAW_SUNGLASSES_DIR + '1-sunglasses'):
if '.jpg' in filename:
file_path=os.path.join(RAW_SUNGLASSES_DIR,'1-sunglasses',filename)
#print(file_path)
image_labels[file_path]=1
for filename in os.listdir(RAW_SUNGLASSES_DIR + 'no_sunglasses'):
if '.jpg' in filename:
file_path=os.path.join(RAW_SUNGLASSES_DIR,'no_sunglasses',filename)
#print(file_path)
image_labels[file_path]=0
# Create a dictionary with features that are relevant.
def image_example(image_string, label):
image_shape = tf.io.decode_jpeg(image_string).shape
feature = {
'label': _int64_feature(label),
'image_raw': _bytes_feature(image_string),
}
return tf.train.Example(features=tf.train.Features(feature=feature))
with tf.io.TFRecordWriter(SUNGLASSES_TFRECORD_DIR) as writer:
for filepath, label in image_labels.items():
image_bytes=image_resize_to_byte_array(Image.open(filepath,mode='r'))
#image_string = open(filepath, 'rb').read()
tf_example = image_example(image_bytes, label)
writer.write(tf_example.SerializeToString())
Code for TFX pipeline:
import tensorflow as tf
from tensorflow import keras
#import tensorflow_datasets as tfds
import os
import pprint
#import tfx
from tfx.components import ImportExampleGen
from tfx.components import ExampleValidator
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.components import Transform
from tfx.components import Tuner
from tfx.components import Trainer
from tfx.proto import example_gen_pb2
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
# Location of the pipeline metadata store
_pipeline_root = 'pipeline/'
# Directory of the raw data files
_data_root = './data/rec_sunglasses/'
from google.colab import drive
drive.mount('/content/gdrive')
%cd gdrive/MyDrive/Machine_Learning_stuff/celebs/sunglasses_classifier/
context = InteractiveContext(pipeline_root=_pipeline_root)
#ExampleGen
example_gen = ImportExampleGen(input_base=_data_root)
context.run(example_gen)
#StatisticsGen
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
context.run(statistics_gen)
#SchemaGen
schema_gen = SchemaGen(
statistics=statistics_gen.outputs['statistics'],infer_feature_shape=True)
context.run(schema_gen)
#ExampleValidator
example_validator = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=schema_gen.outputs['schema'])
context.run(example_validator)
#Transform
_transform_module_file = 'sunglasses_transform.py'
%%writefile {_transform_module_file}
import tensorflow as tf
import tensorflow_transform as tft
# Keys
_LABEL_KEY = "label"
_IMAGE_KEY = "image_raw"
def _transformed_name(key):
return key + '_xf'
def _image_parser(image_str):
'''converts the images to a float tensor'''
image = tf.image.decode_image(image_str,channels=3)
image = tf.reshape(image, (256, 256, 3))
image = tf.cast(image, tf.float32)
return image
def _label_parser(label_id):
'''converts the labels to a float tensor'''
label = tf.cast(label_id, tf.float32)
return label
def preprocessing_fn(inputs):
"""tf.transform's callback function for preprocessing inputs.
Args:
inputs: map from feature keys to raw not-yet-transformed features.
Returns:
Map from string feature key to transformed feature operations.
"""
# Convert the raw image and labels to a float array
#print(inputs)
outputs = {
_transformed_name(_IMAGE_KEY):
tf.map_fn(
_image_parser,
tf.squeeze(inputs[_IMAGE_KEY], axis=1),
dtype=tf.float32),
_transformed_name(_LABEL_KEY):
tf.map_fn(
_label_parser,
inputs[_LABEL_KEY],
dtype=tf.float32)
}
# scale the pixels from 0 to 1
outputs[_transformed_name(_IMAGE_KEY)] = tft.scale_to_0_1(outputs[_transformed_name(_IMAGE_KEY)])
return outputs
When I then run the code below I always get a message after about 23 mins stating that my runtime was restarted because I ran out of RAM.
# Ignore TF warning messages
tf.get_logger().setLevel('ERROR')
# Setup the Transform component
transform = Transform(
examples=example_gen.outputs['examples'],
schema=schema_gen.outputs['schema'],
module_file=os.path.abspath(_transform_module_file))
# Run the component
context.run(transform)

I think the bug is in Transform itself, as I am encountering an OOM issue passing it a preprocessing_fn but do not encounter the same issue when passing the same preprocessing_fn to the function below. Secondly, I face the OOM issue even when just having a training split. In the case where you are using Transform, isn't the beam code essentially abstracted from you, so how can you override what it is doing? I see the only solution here to not use Transform and use something like the below, the downside being that you lose all the MLMetadata and pipeline benefits, which are significant :(:
def transform_data(preprocessing_fn, source_path, source_tfrecord_prefix , experiment_path):
schema = get_schema_from_transform(source_path)
source_tfrecord_path = f"{source_path}/tfrecords/{source_tfrecord_prefix}*"
with beam.Pipeline() as pipeline:
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
tfrecord_tfxio = tfxio.TFExampleRecord(
file_pattern=source_tfrecord_path , schema=schema)
raw_data = (
pipeline | 'ReadData' >> beam.io.ReadFromTFRecord(
file_pattern=source_tfrecord_path, coder=beam.coders.BytesCoder())
| 'DecodeTrainData' >> tfrecord_tfxio.BeamSource())
raw_dataset = (raw_data, tfrecord_tfxio.TensorAdapterConfig())
transformed_dataset, transform_fn = (
raw_dataset | tft_beam.AnalyzeAndTransformDataset(
preprocessing_fn, output_record_batches=True))
transformed_data, _ = transformed_dataset
transform_fn_output = os.path.join(experiment_path, 'transform_output')
tfrecord_file_path_prefix = os.path.join(experiment_path, 'tfrecords', experiment_path)
data_written = (
transformed_data | 'EncodeTrainData' >> beam.FlatMapTuple(
lambda batch, _: RecordBatchToExamples(batch)) | beam.io.WriteToTFRecord(
tfrecord_file_path_prefix, ))
_ = (transform_fn | 'WriteTransformFn' >> tft_beam.WriteTransformFn(transform_fn_output))
My code that generates the OOM is as follows:
from tfx import v1 as tfx
import tensorflow_data_validation as tfdv
TRAIN_DATA = 'train_smallest.csv'
LABEL_NAME = 'fare_amount'
BATCH_SIZE=256
ORIGINAL_TFRECORD_PATH='./identity_transform/tfrecords/'
from tfx.components import ImportExampleGen
from tfx.components import StatisticsGen
from tfx.components import SchemaGen
from tfx.components import Transform
from tfx.v1 import proto
from tfx.proto import example_gen_pb2
output = proto.Output(
split_config=example_gen_pb2.SplitConfig(splits=[
proto.SplitConfig.Split(name='train', hash_buckets=3),
proto.SplitConfig.Split(name='eval', hash_buckets=1)
]))
example_gen = ImportExampleGen(input_base=ORIGINAL_TFRECORD_PATH, output_config=output)
stats_options = tfdv.StatsOptions(label_feature=LABEL_NAME)
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'], stats_options=stats_options)
schema_gen = SchemaGen(
statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)
transform = Transform(examples = example_gen.outputs['examples'], schema = schema_gen.outputs['schema'], module_file='./preprocessing_fn.py')
components = [
example_gen,
statistics_gen,
schema_gen,
transform
]
pipeline_name='pipeline'
pipeline_root='./pipeline'
metadata_path='./metadata/pipeline/metadata.db'
pipeline = tfx.dsl.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
metadata_connection_config=tfx.orchestration.metadata
.sqlite_metadata_connection_config(metadata_path),
components=components,
enable_cache=False)
tfx.orchestration.LocalDagRunner().run(pipeline)
My preprocessing_fn is:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_transform as tft
NBUCKETS = 10
def preprocessing_fn(inputs):
"""
Preprocess input columns into transformed features. This is what goes
into tensorflow transform/apache beam.
"""
# Since we are modifying some features and leaving others unchanged, we
# start by setting `outputs` to a copy of `inputs.
transformed = inputs.copy()
del(transformed["key"])
transformed['passenger_count'] = tft.scale_to_0_1(
inputs['passenger_count'])
# cannot use the below in tft as managing those learned values need
# to be
# managed carefully
# normalizer = tf.keras.layers.Normalization(axis=None,
# name="passenger_count_normalizer")
# normalizer.adapt(inputs['passenger_count'])
# transformed['other_passenger_count'] = normalizer(
# inputs['passenger_count'])
for col in ['dropoff_longitude', 'dropoff_latitude']:
transformed[col] = tft.sparse_tensor_to_dense_with_shape(inputs[col], default_value=tft.mean(inputs[col]), shape=[None, 1]) #You can make this more robust by using the shape from the feature spec
for lon_col in ['pickup_longitude', 'dropoff_longitude']:
# transformed[lon_col] = scale_longitude(inputs[lon_col])
transformed[lon_col] = (transformed[lon_col] + 78) / 8.
for lat_col in ['pickup_latitude', 'dropoff_latitude']:
transformed[lat_col] = (transformed[lat_col] - 37) / 8.
position_difference = tf.square(
transformed["dropoff_longitude"] -
transformed["pickup_longitude"])
position_difference += tf.square(
transformed["dropoff_latitude"] -
transformed["pickup_latitude"])
transformed['euclidean'] = tf.sqrt(position_difference)
lat_lon_buckets = [
bin_edge / NBUCKETS
for bin_edge in range(0, NBUCKETS)]
transformed['bucketed_pickup_longitude'] = tft.apply_buckets(
transformed["pickup_longitude"],
bucket_boundaries=tf.constant([lat_lon_buckets]))
transformed["bucketed_pickup_latitude"] = tft.apply_buckets(
transformed['pickup_latitude'],
bucket_boundaries=tf.constant([lat_lon_buckets]))
transformed['bucketed_dropoff_longitude'] = tft.apply_buckets(
transformed["dropoff_longitude"],
bucket_boundaries=tf.constant([lat_lon_buckets]))
transformed['bucketed_dropoff_latitude'] = tft.apply_buckets(
transformed["dropoff_latitude"],
bucket_boundaries=tf.constant([lat_lon_buckets]))
# transformed["pickup_cross"]=tf.sparse.cross(
# inputs=[transformed['pickup_latitude_apply_buckets'],
# transformed['pickup_longitude_apply_buckets']])
hash_pickup_crossing_layer = tf.keras.layers.experimental.preprocessing.HashedCrossing(
output_mode='one_hot', num_bins=NBUCKETS**2, name='hash_pickup_crossing_layer')
transformed['pickup_location'] = hash_pickup_crossing_layer(
(transformed['bucketed_pickup_latitude'],
transformed['bucketed_pickup_longitude']))
hash_dropoff_crossing_layer = tf.keras.layers.experimental.preprocessing.HashedCrossing(
output_mode='one_hot', num_bins=NBUCKETS**2,
name='hash_dropoff_crossing_layer')
transformed['dropoff_location'] = hash_dropoff_crossing_layer(
(transformed['bucketed_dropoff_latitude'],
transformed['bucketed_dropoff_longitude']))
hash_pickup_crossing_layer_intermediary = tf.keras.layers.experimental.preprocessing.HashedCrossing(
output_mode='int', num_bins=NBUCKETS**2, )
hashed_pickup_intermediary = hash_pickup_crossing_layer_intermediary(
(transformed['bucketed_pickup_longitude'],
transformed['bucketed_pickup_latitude']))
hash_dropoff_crossing_layer_intermediary = tf.keras.layers.experimental.preprocessing.HashedCrossing(
output_mode='int', num_bins=NBUCKETS**2, )
hashed_dropoff_intermediary = hash_dropoff_crossing_layer_intermediary(
(transformed['bucketed_dropoff_longitude'],
transformed['bucketed_dropoff_latitude']))
hash_trip_crossing_layer = tf.keras.layers.experimental.preprocessing.HashedCrossing(
output_mode='one_hot', num_bins=NBUCKETS ** 3,
name="hash_trip_crossing_layer")
transformed['hashed_trip'] = hash_trip_crossing_layer(
(hashed_pickup_intermediary,
hashed_dropoff_intermediary))
seconds_since_1970 = tf.cast(
tfa.text.parse_time(
inputs["pickup_datetime"],
"%Y-%m-%d %H:%M:%S %Z",
output_unit="SECOND"),
tf.float32)
# seconds_since_1970 = fn_seconds_since_1970(inputs['pickup_datetime'])
seconds_since_1970 = tf.cast(seconds_since_1970, tf.float32)
hours_since_1970 = seconds_since_1970 / 3600.
hours_since_1970 = tf.floor(hours_since_1970)
hour_of_day_intermediary = hours_since_1970 % 24
transformed['hour_of_day'] = hour_of_day_intermediary
hour_of_day_intermediary = tf.cast(hour_of_day_intermediary, tf.int32)
days_since_1970 = seconds_since_1970 / (3600 * 24)
days_since_1970 = tf.floor(days_since_1970)
# January 1st 1970 was a Thursday
day_of_week_intermediary = (days_since_1970 + 4) % 7
transformed['day_of_week'] = day_of_week_intermediary
day_of_week_intermediary = tf.cast(day_of_week_intermediary, tf.int32)
hashed_crossing_layer = tf.keras.layers.experimental.preprocessing.HashedCrossing(
num_bins=24 * 7, output_mode="one_hot")
hashed_crossing_layer_intermediary = tf.keras.layers.experimental.preprocessing.HashedCrossing(
num_bins=24 * 7, output_mode="int", name='hashed_hour_of_day_of_week_layer')
transformed['hour_of_day_of_week'] = hashed_crossing_layer(
(hour_of_day_intermediary, day_of_week_intermediary))
hour_of_day_of_week_intermediary = hashed_crossing_layer_intermediary(
(hour_of_day_intermediary, day_of_week_intermediary))
hash_trip_crossing_layer_intermediary = tf.keras.layers.experimental.preprocessing.HashedCrossing(
output_mode='int', num_bins=NBUCKETS ** 3)
hashed_trip_intermediary = hash_trip_crossing_layer_intermediary(
(hashed_pickup_intermediary, hashed_dropoff_intermediary))
hash_trip_and_time_layer = tf.keras.layers.experimental.preprocessing.HashedCrossing(
output_mode='one_hot', num_bins=(
NBUCKETS ** 3) * 4, name='hash_trip_and_time_layer')
transformed['hashed_trip_and_time'] = hash_trip_and_time_layer(
(hashed_trip_intermediary, hour_of_day_of_week_intermediary))
return transformed
And the data that I'm using is as follows, this is before putting it into tfrecords by using a preprocessing_fn that is essentially f(x) = x:
key,fare_amount,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count
2010-10-19 00:01:17.0000003,11.3,2010-10-19 00:01:17 UTC,-73.948724,40.777489,-73.949569,40.814049,1
2009-08-19 08:58:00.00000031,8.5,2009-08-19 08:58:00 UTC,-74.007743,40.724717,-74.006797,40.751253,1
2011-01-24 06:48:12.0000001,10.9,2011-01-24 06:48:12 UTC,-73.986678,40.742597,-73.955101,40.766174,1
2014-09-13 09:08:00.000000126,15.5,2014-09-13 09:08:00 UTC,-74.00325,40.7083,-73.975935,40.749007,1
2013-04-12 19:12:22.0000002,10,2013-04-12 19:12:22 UTC,-74.005318,40.728261,-73.981724,40.7293,1
2015-03-11 11:09:40.0000003,13,2015-03-11 11:09:40 UTC,-73.998809814453125,40.734573364257812,-73.989830017089844,40.756542205810547,1
2014-02-26 01:06:40.0000001,8,2014-02-26 01:06:40 UTC,-73.985821,40.763299,-74.003947,40.751722,1
2011-12-03 02:34:21.0000002,15.7,2011-12-03 02:34:21 UTC,-73.940638,40.840057,-73.98792,40.768815,2
2012-12-10 15:50:03.0000002,34,2012-12-10 15:50:03 UTC,-73.872871,40.774284,-73.995264,40.739349,1
2013-09-22 21:15:18.0000001,7.5,2013-09-22 21:15:18 UTC,-73.996565,40.718924,-74.007011,40.707672,2
2011-06-13 20:19:00.00000010,8.1,2011-06-13 20:19:00 UTC,-73.981587,40.747238,-73.956932,40.771512,5
2013-04-13 02:28:55.0000006,4,2013-04-13 02:28:55 UTC,0,0,0,0,1
2013-09-08 15:49:49.0000001,5.5,2013-09-08 15:49:49 UTC,-73.96077,40.775805,-73.970084,40.76252,1
2011-10-05 21:59:00.00000098,6.5,2011-10-05 21:59:00 UTC,-74.005052,40.70663,-74.012255,40.718838,1
2014-10-03 04:04:00.00000020,5.5,2014-10-03 04:04:00 UTC,-74.000032,40.732362,-73.99655,40.736532,5
2010-06-18 13:41:28.0000001,12.1,2010-06-18 13:41:28 UTC,-73.998732,40.717089,-73.975146,40.758038,3
2014-08-07 12:06:50.0000001,13,2014-08-07 12:06:50 UTC,-74.008268,40.722489,-73.982861,40.744874,1
2009-08-30 11:56:58.0000005,5.7,2009-08-30 11:56:58 UTC,-74.007583,40.748017,-73.994615,40.751834,1
2012-01-28 01:58:00.000000132,9.7,2012-01-28 01:58:00 UTC,-74.422832,40.84662,-74.49563,40.968122,1
2012-06-01 13:33:00.00000070,6.9,2012-06-01 13:33:00 UTC,-73.987377,40.743832,-73.998615,40.739855,1
2014-07-31 20:00:00.000000140,14.5,2014-07-31 20:00:00 UTC,-73.983415,40.75677,-73.95324,40.77148,1
2009-02-06 09:25:00.000000128,6.9,2009-02-06 09:25:00 UTC,-73.994222,40.727137,-73.987398,40.744425,1
2012-01-07 20:28:30.0000003,6.9,2012-01-07 20:28:30 UTC,-73.9889,40.7591,-74.0053,40.7401,0
2012-06-13 15:57:00.000000105,8.5,2012-06-13 15:57:00 UTC,-73.982535,40.761602,-73.980302,40.748475,1
2014-05-02 10:33:00.00000042,5,2014-05-02 10:33:00 UTC,-73.985352,40.74213,-73.991562,40.742512,1
2013-03-30 00:33:00.000000133,15,2013-03-30 00:33:00 UTC,-73.98616,40.757022,-74.009022,40.707857,1
2011-05-20 23:01:00.00000071,9.3,2011-05-20 23:01:00 UTC,-73.951177,40.77465,-73.972487,40.743393,1
2011-01-27 21:51:00.000000171,8.9,2011-01-27 21:51:00 UTC,-73.989867,40.756748,-73.972143,40.786588,3
2009-03-20 12:46:25.0000001,6.9,2009-03-20 12:46:25 UTC,-73.951526,40.770003,-73.970998,40.754989,1
2013-05-01 09:32:00.000000143,7,2013-05-01 09:32:00 UTC,-73.990302,40.756552,-73.982462,40.760242,5
2010-05-25 10:10:00.00000027,4.9,2010-05-25 10:10:00 UTC,-73.980722,40.779832,-73.971522,40.787518,1
2012-12-18 07:24:00.000000148,6,2012-12-18 07:24:00 UTC,-73.965952,40.776297,-73.950533,40.774467,4
2012-04-18 08:36:00.000000182,7.7,2012-04-18 08:36:00 UTC,-73.98358,40.766182,-73.97922,40.752992,1
2009-05-23 17:11:00.00000092,42.9,2009-05-23 17:11:00 UTC,-73.781909,40.64477,-73.991234,40.687251,2
2013-04-16 08:44:39.0000006,12,2013-04-16 08:44:39 UTC,-73.961365,40.760555,-73.9805,40.753709,1
2014-04-15 18:39:47.0000002,3.5,2014-04-15 18:39:47 UTC,-74.008172,40.737866,-74.007745,40.732653,1
2013-12-22 17:42:24.0000001,8,2013-12-22 17:42:24 UTC,-73.977505,40.742731,-73.980127,40.722385,2
2011-09-07 20:12:37.0000004,14.5,2011-09-07 20:12:37 UTC,-74.002225,40.715,-73.951187,40.728798,1
2014-12-10 21:57:46.0000005,6.5,2014-12-10 21:57:46 UTC,-73.97175,40.760287,0,0,1
2012-11-12 22:11:00.000000101,6.5,2012-11-12 22:11:00 UTC,-73.983615,40.760325,-73.998885,40.760012,1
2015-02-21 19:26:56.0000005,15,2015-02-21 19:26:56 UTC,-73.9959716796875,40.686809539794922,-74.009872436523438,40.718009948730469,1
2010-10-06 03:28:42.0000001,8.9,2010-10-06 03:28:42 UTC,-73.988341,40.72886,-73.984581,40.751519,1
2010-07-09 22:01:22.0000007,4.1,2010-07-09 22:01:22 UTC,-73.959272,40.771833,-73.962304,40.773124,1
2009-05-28 10:45:00.00000032,8.9,2009-05-28 10:45:00 UTC,-73.988872,40.753367,-73.972733,40.753327,1
2013-09-24 18:25:00.000000123,25,2013-09-24 18:25:00 UTC,-74.005197,40.751602,-73.980317,40.676607,1
2009-02-05 08:23:01.0000004,7.3,2009-02-05 08:23:01 UTC,-73.975468,40.759635,-73.991854,40.749352,1
2011-07-03 22:25:04.0000003,28.5,2011-07-03 22:25:04 UTC,-73.776755,40.64523,-73.951802,40.657781,1
2010-06-07 15:20:00.000000164,17.7,2010-06-07 15:20:00 UTC,-73.969625,40.758133,-74.012548,40.713983,1
2012-09-14 01:02:00.000000106,7,2012-09-14 01:02:00 UTC,-73.982777,40.744722,-73.984505,40.732127,1
2013-03-08 23:16:00.00000051,25.5,2013-03-08 23:16:00 UTC,-73.990822,40.734702,-73.945737,40.651117,1
2009-10-30 16:06:00.00000074,10.1,2009-10-30 16:06:00 UTC,-73.981008,40.768257,-73.968412,40.80144,2
2012-12-13 19:08:47.0000004,4.5,2012-12-13 19:08:47 UTC,-73.949347,40.78134,-73.956487,40.777995,1
2009-09-03 18:08:00.000000222,5.3,2009-09-03 18:08:00 UTC,-73.985702,40.753625,-73.989385,40.741143,5
2014-05-09 14:18:00.00000085,22.5,2014-05-09 14:18:00 UTC,-73.994697,40.765992,-74.012857,40.706807,1
2010-06-13 18:07:00.00000026,5.7,2010-06-13 18:07:00 UTC,-73.98811,40.774712,-73.98207,40.763285,1
2009-05-30 22:44:00.0000003,15.7,2009-05-30 22:44:00 UTC,-73.968862,40.791558,-73.971705,40.793732,2
2009-05-03 09:58:47.0000002,3.7,2009-05-03 09:58:47 UTC,-73.966445,40.804635,-73.969422,40.798094,1
2011-02-21 22:48:57.0000004,5.7,2011-02-21 22:48:57 UTC,-73.977624,40.752278,-73.994179,40.751649,1
2009-01-09 22:16:00.00000018,4.5,2009-01-09 22:16:00 UTC,-73.961832,40.763897,-73.96975,40.759523,1
2011-06-17 10:31:00.000000154,7.3,2011-06-17 10:31:00 UTC,-73.963242,40.771425,-73.968752,40.7537,1
2014-11-24 12:38:00.000000143,5,2014-11-24 12:38:00 UTC,-73.969662,40.753422,-73.975425,40.74545,1
2015-05-03 09:46:20.0000007,14.5,2015-05-03 09:46:20 UTC,-73.9213638305664,40.745254516601562,-73.85888671875,40.751045227050781,2
2010-04-05 18:49:28.0000001,8.9,2010-04-05 18:49:28 UTC,-73.960298,40.766187,-73.981875,40.727677,1
2013-12-17 20:12:00.00000030,6.5,2013-12-17 20:12:00 UTC,-73.956007,40.779077,-73.965855,40.765742,1
2010-03-12 21:29:00.000000235,9.3,2010-03-12 21:29:00 UTC,0,0,0,0,3
2011-11-30 17:09:00.000000158,14.1,2011-11-30 17:09:00 UTC,-73.985565,40.731717,-73.981347,40.77369,1
2011-12-18 22:21:00.000000162,12.1,2011-12-18 22:21:00 UTC,-73.995165,40.753117,-73.98827,40.725642,1
2010-09-02 14:39:00.000000201,4.1,2010-09-02 14:39:00 UTC,-73.978898,40.785037,-73.970818,40.789267,1
2014-08-22 16:00:00.00000098,6,2014-08-22 16:00:00 UTC,-73.97484,40.756925,-73.972712,40.761775,3
#tensorflow-support let's solve this issue and make Transform and LocalDagRunner usable so people can solve cool problems with it!

Related

RuntimeError: class '__torch__.kornia.geometry.boxes.Boxes3D' already defined

I am using a Premade code that works on colab.research.google.com but when I downloaded it locally and used Jupyter I got this error
RuntimeError: class '__torch__.kornia.geometry.boxes.Boxes3D' already defined.
Here is the premade code
# #title 3) Download Libraries for Neural Network
import argparse
import math
from pathlib import Path
import sys
sys.path.append('./taming-transformers')
from IPython import display
from base64 import b64encode
from omegaconf import OmegaConf
from PIL import Image
from taming.models import cond_transformer, vqgan
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import transforms
from torchvision.transforms import functional as TF
from tqdm.notebook import tqdm
from CLIP import clip
import kornia.augmentation as K
import numpy as np
import imageio
from PIL import ImageFile, Image
from imgtag import ImgTag # metadatos
from libxmp import * # metadatos
import libxmp # metadatos
from stegano import lsb
import json
ImageFile.LOAD_TRUNCATED_IMAGES = True
def sinc(x):
return torch.where(x != 0, torch.sin(math.pi * x) / (math.pi * x), x.new_ones([]))
def lanczos(x, a):
cond = torch.logical_and(-a < x, x < a)
out = torch.where(cond, sinc(x) * sinc(x/a), x.new_zeros([]))
return out / out.sum()
def ramp(ratio, width):
n = math.ceil(width / ratio + 1)
out = torch.empty([n])
cur = 0
for i in range(out.shape[0]):
out[i] = cur
cur += ratio
return torch.cat([-out[1:].flip([0]), out])[1:-1]
def resample(input, size, align_corners=True):
n, c, h, w = input.shape
dh, dw = size
input = input.view([n * c, 1, h, w])
if dh < h:
kernel_h = lanczos(ramp(dh / h, 2), 2).to(input.device, input.dtype)
pad_h = (kernel_h.shape[0] - 1) // 2
input = F.pad(input, (0, 0, pad_h, pad_h), 'reflect')
input = F.conv2d(input, kernel_h[None, None, :, None])
if dw < w:
kernel_w = lanczos(ramp(dw / w, 2), 2).to(input.device, input.dtype)
pad_w = (kernel_w.shape[0] - 1) // 2
input = F.pad(input, (pad_w, pad_w, 0, 0), 'reflect')
input = F.conv2d(input, kernel_w[None, None, None, :])
input = input.view([n, c, h, w])
return F.interpolate(input, size, mode='bicubic', align_corners=align_corners)
class ReplaceGrad(torch.autograd.Function):
#staticmethod
def forward(ctx, x_forward, x_backward):
ctx.shape = x_backward.shape
return x_forward
#staticmethod
def backward(ctx, grad_in):
return None, grad_in.sum_to_size(ctx.shape)
replace_grad = ReplaceGrad.apply
class ClampWithGrad(torch.autograd.Function):
#staticmethod
def forward(ctx, input, min, max):
ctx.min = min
ctx.max = max
ctx.save_for_backward(input)
return input.clamp(min, max)
#staticmethod
def backward(ctx, grad_in):
input, = ctx.saved_tensors
return grad_in * (grad_in * (input - input.clamp(ctx.min, ctx.max)) >= 0), None, None
clamp_with_grad = ClampWithGrad.apply
def vector_quantize(x, codebook):
d = x.pow(2).sum(dim=-1, keepdim=True) + codebook.pow(2).sum(dim=1) - 2 * x # codebook.T
indices = d.argmin(-1)
x_q = F.one_hot(indices, codebook.shape[0]).to(d.dtype) # codebook
return replace_grad(x_q, x)
class Prompt(nn.Module):
def __init__(self, embed, weight=1., stop=float('-inf')):
super().__init__()
self.register_buffer('embed', embed)
self.register_buffer('weight', torch.as_tensor(weight))
self.register_buffer('stop', torch.as_tensor(stop))
def forward(self, input):
input_normed = F.normalize(input.unsqueeze(1), dim=2)
embed_normed = F.normalize(self.embed.unsqueeze(0), dim=2)
dists = input_normed.sub(embed_normed).norm(dim=2).div(2).arcsin().pow(2).mul(2)
dists = dists * self.weight.sign()
return self.weight.abs() * replace_grad(dists, torch.maximum(dists, self.stop)).mean()
def parse_prompt(prompt):
vals = prompt.rsplit(':', 2)
vals = vals + ['', '1', '-inf'][len(vals):]
return vals[0], float(vals[1]), float(vals[2])
class MakeCutouts(nn.Module):
def __init__(self, cut_size, cutn, cut_pow=1.):
super().__init__()
self.cut_size = cut_size
self.cutn = cutn
self.cut_pow = cut_pow
self.augs = nn.Sequential(
K.RandomHorizontalFlip(p=0.5),
# K.RandomSolarize(0.01, 0.01, p=0.7),
K.RandomSharpness(0.3,p=0.4),
K.RandomAffine(degrees=30, translate=0.1, p=0.8, padding_mode='border'),
K.RandomPerspective(0.2,p=0.4),
K.ColorJitter(hue=0.01, saturation=0.01, p=0.7))
self.noise_fac = 0.1
def forward(self, input):
sideY, sideX = input.shape[2:4]
max_size = min(sideX, sideY)
min_size = min(sideX, sideY, self.cut_size)
cutouts = []
for _ in range(self.cutn):
size = int(torch.rand([])**self.cut_pow * (max_size - min_size) + min_size)
offsetx = torch.randint(0, sideX - size + 1, ())
offsety = torch.randint(0, sideY - size + 1, ())
cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size]
cutouts.append(resample(cutout, (self.cut_size, self.cut_size)))
batch = self.augs(torch.cat(cutouts, dim=0))
if self.noise_fac:
facs = batch.new_empty([self.cutn, 1, 1, 1]).uniform_(0, self.noise_fac)
batch = batch + facs * torch.randn_like(batch)
return batch
def load_vqgan_model(config_path, checkpoint_path):
config = OmegaConf.load(config_path)
if config.model.target == 'taming.models.vqgan.VQModel':
model = vqgan.VQModel(**config.model.params)
model.eval().requires_grad_(False)
model.init_from_ckpt(checkpoint_path)
elif config.model.target == 'taming.models.cond_transformer.Net2NetTransformer':
parent_model = cond_transformer.Net2NetTransformer(**config.model.params)
parent_model.eval().requires_grad_(False)
parent_model.init_from_ckpt(checkpoint_path)
model = parent_model.first_stage_model
elif config.model.target == 'taming.models.vqgan.GumbelVQ':
model = vqgan.GumbelVQ(**config.model.params)
print(config.model.params)
model.eval().requires_grad_(False)
model.init_from_ckpt(checkpoint_path)
else:
raise ValueError(f'unknown model type: {config.model.target}')
del model.loss
return model
def resize_image(image, out_size):
ratio = image.size[0] / image.size[1]
area = min(image.size[0] * image.size[1], out_size[0] * out_size[1])
size = round((area * ratio)**0.5), round((area / ratio)**0.5)
return image.resize(size, Image.LANCZOS)
def download_img(img_url):
try:
return wget.download(img_url,out="input.jpg")
except:
return
again there isn't any problem with the code it is working, just not in this local conda environment
Here is the output error
RuntimeError Traceback (most recent call last)
Input In [22], in <cell line: 22>()
19 from tqdm.notebook import tqdm
21 from CLIP import clip
---> 22 import kornia.augmentation as K
23 import numpy as np
24 import imageio
File ~\anaconda3\envs\taming\lib\site-packages\kornia\__init__.py:10, in <module>
7 from . import geometry
9 # import the other modules for convenience
---> 10 from . import (
11 augmentation,
12 color,
13 contrib,
14 enhance,
15 feature,
16 losses,
17 metrics,
18 morphology,
19 tracking,
20 utils,
21 x,
22 )
23 # NOTE: we are going to expose to top level very few things
24 from kornia.constants import pi
File ~\anaconda3\envs\taming\lib\site-packages\kornia\augmentation\__init__.py:54, in <module>
41 from kornia.augmentation._3d import (
42 CenterCrop3D,
43 RandomAffine3D,
(...)
51 RandomVerticalFlip3D,
52 )
53 from kornia.augmentation._3d.base import AugmentationBase3D
---> 54 from kornia.augmentation.container import AugmentationSequential, ImageSequential, PatchSequential, VideoSequential
56 __all__ = [
57 "AugmentationBase2D",
58 "GeometricAugmentationBase2D",
(...)
109 "VideoSequential",
110 ]
File ~\anaconda3\envs\taming\lib\site-packages\kornia\augmentation\container\__init__.py:1, in <module>
----> 1 from kornia.augmentation.container.augment import AugmentationSequential
2 from kornia.augmentation.container.image import ImageSequential
3 from kornia.augmentation.container.patch import PatchSequential
File ~\anaconda3\envs\taming\lib\site-packages\kornia\augmentation\container\augment.py:20, in <module>
18 from kornia.augmentation.container.video import VideoSequential
19 from kornia.constants import DataKey
---> 20 from kornia.geometry.boxes import Boxes
22 __all__ = ["AugmentationSequential"]
25 class AugmentationSequential(ImageSequential):
File ~\anaconda3\envs\taming\lib\site-packages\kornia\geometry\boxes.py:465, in <module>
460 self._data = self._data.to(device=device, dtype=dtype)
461 return self
464 #torch.jit.script
--> 465 class Boxes3D:
466 r"""3D boxes containing N or BxN boxes.
467
468 Args:
(...)
478 `hexahedrons <https://en.wikipedia.org/wiki/Hexahedron>`_ are cubes and rhombohedrons.
479 """
480 def __init__(
481 self, boxes: torch.Tensor, raise_if_not_floating_point: bool = True,
482 mode: str = "xyzxyz_plus"
483 ) -> None:
File ~\anaconda3\envs\taming\lib\site-packages\torch\jit\_script.py:924, in script(obj, optimize, _frames_up, _rcb)
921 def fail(self, *args, **kwargs):
922 raise RuntimeError(name + " is not supported on ScriptModules")
--> 924 return fail
File ~\anaconda3\envs\taming\lib\site-packages\torch\jit\_script.py:64, in _compile_and_register_class(obj, rcb, qualified_name)
61 def _reduce(cls):
62 raise pickle.PickleError("ScriptFunction cannot be pickled")
---> 64 ScriptFunction.__reduce__ = _reduce # type: ignore[assignment]
67 if _enabled:
68 Attribute = collections.namedtuple("Attribute", ["value", "type"])
RuntimeError: class '__torch__.kornia.geometry.boxes.Boxes3D' already defined.
https://colab.research.google.com/drive/1lx9AGsrh7MlyJhK9UrNTK8pYpARnx457?usp=sharing
this is the link to the project I downloaded locally instead of using it online, so I am trying to get it up and running
Thanks!!!

OOM on GPU with tensorflow while making Mandelbrot

Trying to get a beautiful Mandelbrot picture, the code works great with 16k resolution but I can't get it to render a 32k image
I tried lowering cycles to 50 but still no difference
Specs - i9 10900k & RTX 3090 24gb
I get an OOM message saying
W tensorflow/core/common_runtime/bfc_allocator.cc:456] Allocator (GPU_0_bfc) ran out of memory
trying to allocate 7.91GiB (rounded to 8493465600)requested by op Mul
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation.
here is my code
import tensorflow as tf
import numpy as np
import PIL.Image
from io import BytesIO
from IPython.display import Image, display
def render(a):
a_cyclic = (a*0.3).reshape(list(a.shape)+[1])
img = np.concatenate([10+20*np.cos(a_cyclic),
30+50*np.sin(a_cyclic),
155-80*np.cos(a_cyclic)], 2)
img[a==a.max()] = 0
a = img
a = np.uint8(np.clip(a, 0, 255))
f = BytesIO()
return PIL.Image.fromarray(a)
##tf.function
def mandelbrot_helper(grid_c, current_values, counts,cycles):
for i in range(cycles):
temp = current_values*current_values + grid_c
not_diverged = tf.abs(temp) < 4
current_values.assign(temp),
counts.assign_add(tf.cast(not_diverged, tf.float64))
def mandelbrot(render_size,center,zoom,cycles):
f = zoom/render_size[0]
real_start = center[0]-(render_size[0]/2)*f
real_end = real_start + render_size[0]*f
imag_start = center[1]-(render_size[1]/2)*f
imag_end = imag_start + render_size[1]*f
real_range = tf.range(real_start,real_end,f,dtype=tf.float64)
imag_range = tf.range(imag_start,imag_end,f,dtype=tf.float64)
real, imag = tf.meshgrid(real_range,imag_range)
grid_c = tf.constant(tf.complex(real, imag))
current_values = tf.Variable(grid_c)
counts = tf.Variable(tf.zeros_like(grid_c, tf.float64))
mandelbrot_helper(grid_c, current_values,counts,cycles)
return counts.numpy()
counts = mandelbrot(
render_size=(30720,17280), # 32K
#render_size=(15360,8640), # 16K
#render_size=(7680,4320), # 8K
#render_size=(3840,2160), # 4K
#render_size=(1920,1080), # HD
center=(-0.5,0),
zoom=4,
cycles=200
)
img = render(counts)
print(img.size)
img
#img.save("E:/Python/Python3/TestingSO/Images/test.png")

In tensorflow, for custom layers that need arguments at instantialion, does the get_config method need overriding?

Ubuntu - 20.04,
Tensorflow - 2.2.0,
Tensorboard - 2.2.1
I have read that one needs to reimplement the config method in order for a custom layer to be serializable.
I have a custom layer that accepts arguments in its __init__. It uses another custom layer and that consumes arguments in its __init__ as well. I can:
Without Tensorboard callbacks:
Use them in a model both in eager model and graph form
Run tf.saved_model.save and it executes without a glich
Load the thus saved model using tf.saved_model.load and it loads the model saved in 2. above
I can call model(input) the loaded model. I can also call 'call_and_return_all_conditional_losses(input)` and they run right as well
With Tensorboard callbacks:
All of the above (can .fit, save, load, predict from loaded etc) except.. While running fit i get
WARNING:tensorflow:Model failed to serialize as JSON. Ignoring... Layer PREPROCESS_MONSOON has arguments in `__init__` and therefore must override `get_config`.
Pasting the entire code here that can be run end to end. You just need to have tensorflow 2 installed. Please delete/add the callbacks (only tensorboard callbacks is there) to .fit to see the two behaviors mentioned above
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers as l
from tensorflow import keras as k
import numpy as np
##making empty directories
import os
os.makedirs('r_data',exist_ok=True)
os.makedirs('r_savedir',exist_ok=True)
#Preparing the dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train_ = pd.DataFrame(x_train.reshape(60000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_test_ = pd.DataFrame(x_test.reshape(10000,-1),columns = ['col_'+str(i) for i in range(28*28)])
x_train_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat1'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i']) for i in range(x_train_.shape[0])]
x_test_['col_cat2'] = [np.random.choice(['a','b','c','d','e','f','g','h','i','j']) for i in range(x_test_.shape[0])]
x_train_[np.random.choice([True,False],size = x_train_.shape,p=[0.05,0.95]).reshape(x_train_.shape)] = np.nan
x_test_[np.random.choice([True,False],size = x_test_.shape,p=[0.05,0.95]).reshape(x_test_.shape)] = np.nan
x_train_.to_csv('r_data/x_train.csv',index=False)
x_test_.to_csv('r_data/x_test.csv',index=False)
pd.DataFrame(y_train).to_csv('r_data/y_train.csv',index=False)
pd.DataFrame(y_test).to_csv('r_data/y_test.csv',index=False)
#**THE MAIN LAYER THAT WE ARE TALKING ABOUT**
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import feature_column
import os
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'keys': self.keys,
# 'keys_all': self.keys_all,
# })
# return config
def build(self,input_shape):
def create_moving_mean_vars():
return tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
self.moving_means_total = {t:create_moving_mean_vars() for t in self.keys}
self.layer_global_counter = tf.Variable(initial_value=0.,shape=(),dtype=tf.float32,trainable=False)
def call(self,inputs, training = True):
null_cols = {k:tf.math.is_finite(inputs[k]) for k in self.keys}
current_means = {}
def compute_update_current_means(t):
current_mean = tf.math.divide_no_nan(tf.reduce_sum(tf.where(null_cols[t],inputs[t],0.),axis=0),\
tf.reduce_sum(tf.cast(tf.math.is_finite(inputs[t]),tf.float32),axis=0))
self.moving_means_total[t].assign_add(current_mean)
return current_mean
if training:
current_means = {t:compute_update_current_means(t) for t in self.keys}
outputs = {t:tf.where(null_cols[t],inputs[t],current_means[t]) for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
self.layer_global_counter.assign_add(1.)
else:
outputs = {t:tf.where(null_cols[t],inputs[t],(self.moving_means_total[t]/self.layer_global_counter))\
for t in self.keys}
outputs.update({str(k)+'__nullcol':tf.cast(null_cols[k],tf.float32) for k in self.keys})
return outputs
class PREPROCESS_MONSOON(layers.Layer):
def __init__(self,cat_cols_with_unique_values,num_cols):
'''cat_cols_with_unqiue_values: (dict) {'col_cat':[unique_values_list]}
num_cols: (list) [num_cols_name_list]'''
super().__init__()
self.cat_cols = cat_cols_with_unique_values
self.num_cols = num_cols
# def get_config(self):
# config = super().get_config().copy()
# config.update({
# 'cat_cols': self.cat_cols,
# 'num_cols': self.num_cols,
# })
# return config
def build(self,input_shape):
self.ntd = NUM_TO_DENSE(self.num_cols)
self.num_colnames = self.ntd.keys_all
self.ctd = {k:layers.DenseFeatures\
(feature_column.embedding_column\
(feature_column.categorical_column_with_vocabulary_list\
(k,v),tf.cast(tf.math.ceil(tf.math.log(tf.cast(len(self.cat_cols[k]),tf.float32))),tf.int32).numpy()))\
for k,v in self.cat_cols.items()}
self.cat_colnames = [i for i in self.cat_cols]
self.dense_colnames = self.num_colnames+self.cat_colnames
def call(self,inputs,training=True):
dense_num_d = self.ntd(inputs,training=training)
dense_cat_d = {k:self.ctd[k](inputs) for k in self.cat_colnames}
dense_num = tf.stack([dense_num_d[k] for k in self.num_colnames],axis=1)
dense_cat = tf.concat([dense_cat_d[k] for k in self.cat_colnames],axis=1)
dense_all = tf.concat([dense_num,dense_cat],axis=1)
return dense_all
##Inputs
label_path = 'r_data/y_train.csv'
data_path = 'r_data/x_train.csv'
max_epochs = 100
batch_size = 32
shuffle_seed = 42
##Creating layer inputs
dfs = pd.read_csv(data_path,nrows=1)
cdtypes_x = dfs.dtypes
nc = list(dfs.select_dtypes(include=[int,float]).columns)
oc = list(dfs.select_dtypes(exclude=[int,float]).columns)
cdtypes_y = pd.read_csv(label_path,nrows=1).dtypes
dfc = pd.read_csv(data_path,usecols=oc)
ccwuv = {i:list(pd.Series(dfc[i].unique()).dropna()) for i in dfc.columns}
preds_name = pd.read_csv(label_path,nrows=1).columns
##creating datasets
dataset = tf.data.experimental.make_csv_dataset(
'r_data/x_train.csv',batch_size, column_names=cdtypes_x.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
labels = tf.data.experimental.make_csv_dataset(
'r_data/y_train.csv',batch_size, column_names=cdtypes_y.index,prefetch_buffer_size=1,
shuffle=True,shuffle_buffer_size=10000,shuffle_seed=shuffle_seed)
dataset = tf.data.Dataset.zip((dataset,labels))
##CREATING NETWORK
p = PREPROCESS_MONSOON(cat_cols_with_unique_values=ccwuv,num_cols=nc)
indict = {}
for i in nc:
indict[i] = k.Input(shape = (), name=i,dtype=tf.float32)
for i in ccwuv:
indict[i] = k.Input(shape=(), name=i,dtype=tf.string)
x = p(indict)
x = l.BatchNormalization()(x)
x = l.Dense(10,activation='relu',name='dense_1')(x)
predictions = l.Dense(10,activation=None,name=preds_name[0])(x)
model = k.Model(inputs=indict,outputs=predictions)
##Compiling model
model.compile(optimizer=k.optimizers.Adam(),
loss=k.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['sparse_categorical_accuracy'])
##callbacks
log_dir = './tensorboard_dir/no_config'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
## Fit model on training data
history = model.fit(dataset,
batch_size=64,
epochs=30,
steps_per_epoch=5,
validation_split=0.,
callbacks = [tensorboard_callback])
#saving the model
tf.saved_model.save(model,'r_savedir')
#loading the model
model = tf.saved_model.load('r_savedir')
##Predicting on loaded model
for i in dataset:
print(model(i[0],training=False))
break
I have commented out the part from the code where i override the config files in my custom layers and you can comment them in and the Warning about the layers not being serializable would go away.
Question:
Do i or do i not need to override the config method in order to make a custom layer that accepts arguments in __init__ serializable?
Thank you in advance for help
You must add 'get_config' to your code
def get_config(self):
config = super().get_config()
return config
The NUM_TO_DENSE class must be like this
class NUM_TO_DENSE(layers.Layer):
def __init__(self,num_cols):
super().__init__()
self.keys = num_cols
self.keys_all = self.keys+[str(i)+'__nullcol' for i in self.keys]
def get_config(self):
config = super().get_config()
return config

Tensorflow strings.to_number

My data consists of many csv files, each csv file containing one row with many float numbers.I am trying to convert strings to float in the following
import tensorflow as tf
PATH = "C:\\DeepFakes\\Training\\im2data\\train\\"
#tf.config.experimental_run_functions_eagerly(False)
def process_path(file_path):
label = tf.strings.split(file_path, "\\")[-2]
data = tf.strings.split (tf.io.read_file(file_path),','
try:
data = tf.strings.to_number(
data, out_type=tf.dtypes.float32, name=None)
except:
print('dddddddddddddddd')
return data, label
file_path = PATH + "\\original\\ImA00001.csv"
data, label = process_path(file_path)
print('data ',data, ' label ', label)
A sample of the data is as follows
0.00044 0.00233 0.00572 0.00190 0.13761 0.42304 0.00027 0.00286
The output is
dddddddddddddddd
data tf.Tensor([b'0.0004401633\r\n0.0023351652\r\n0.0057266317\r\n0.0019061912\r\n0.13761024\r\n0.42304015\r\n0.0002711446\r\n0.0028613438\r\n'], shape=(1,), dtype=string) label tf.Tensor(b'original', shape=(), dtype=string)
Please refer working code as per your instructions
import numpy as np
import tensorflow as tf
print("Tensorflow Version:", tf.__version__)
You can do it this way using tf.string_split and tf.string_to_number:
line = tf.constant("0.00044 0.00233 0.00572 0.00190 0.13761 0.42304 0.00027 0.00286", shape=(1,))
b = tf.compat.v1.string_split(line, delimiter=" ").values
c = tf.strings.to_number(b, tf.float32)
a = np.asarray(b)
print("Given Input:",line)
print("Desired Output:",a)
Output:
Tensorflow Version: 2.1.0
Given Input: tf.Tensor([b'0.00044 0.00233 0.00572 0.00190 0.13761 0.42304 0.00027 0.00286'], shape=(1,), dtype=string)
Desired Output: [b'0.00044' b'0.00233' b'0.00572' b'0.00190' b'0.13761' b'0.42304'
b'0.00027' b'0.00286']

Apache Beam job (Python) using Tensorflow Transform is killed by Cloud Dataflow

I'm trying to run an Apache Beam job based on Tensorflow Transform on Dataflow but its killed. Someone has experienced that behaviour? This is a simple example with DirectRunner, that runs ok on my local but fails on Dataflow (I change the runner properly):
import os
import csv
import datetime
import numpy as np
import tensorflow as tf
import tensorflow_transform as tft
from apache_beam.io import textio
from apache_beam.io import tfrecordio
from tensorflow_transform.beam import impl as beam_impl
from tensorflow_transform.beam import tft_beam_io
from tensorflow_transform.tf_metadata import dataset_metadata
from tensorflow_transform.tf_metadata import dataset_schema
import apache_beam as beam
NUMERIC_FEATURE_KEYS = ['feature_'+str(i) for i in range(2000)]
def _create_raw_metadata():
column_schemas = {}
for key in NUMERIC_FEATURE_KEYS:
column_schemas[key] = dataset_schema.ColumnSchema(tf.float32, [], dataset_schema.FixedColumnRepresentation())
raw_data_metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema(column_schemas))
return raw_data_metadata
def preprocessing_fn(inputs):
outputs={}
for key in NUMERIC_FEATURE_KEYS:
outputs[key] = tft.scale_to_0_1(inputs[key])
return outputs
def main():
output_dir = '/tmp/tmp-folder-{}'.format(datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
RUNNER = 'DirectRunner'
with beam.Pipeline(RUNNER) as p:
with beam_impl.Context(temp_dir=output_dir):
raw_data_metadata = _create_raw_metadata()
_ = (raw_data_metadata | 'WriteInputMetadata' >> tft_beam_io.WriteMetadata(os.path.join(output_dir, 'rawdata_metadata'), pipeline=p))
m = numpy_dataset = np.random.rand(100,2000)*100
raw_data = (p
| 'CreateTestDataset' >> beam.Create([dict(zip(NUMERIC_FEATURE_KEYS, m[i,:])) for i in range(m.shape[0])]))
raw_dataset = (raw_data, raw_data_metadata)
transform_fn = (raw_dataset | 'Analyze' >> beam_impl.AnalyzeDataset(preprocessing_fn))
_ = (transform_fn | 'WriteTransformFn' >> tft_beam_io.WriteTransformFn(output_dir))
(transformed_data, transformed_metadata) = ((raw_dataset, transform_fn) | 'Transform' >> beam_impl.TransformDataset())
transformed_data_coder = tft.coders.ExampleProtoCoder(transformed_metadata.schema)
_ = transformed_data | 'WriteTrainData' >> tfrecordio.WriteToTFRecord(os.path.join(output_dir, 'train'), file_name_suffix='.gz', coder=transformed_data_coder)
if __name__ == '__main__':
main()
Also, my production code (not shown) fail with the message: The job graph is too large. Please try again with a smaller job graph, or split your job into two or more smaller jobs.
Any hint?
The restriction on the pipeline description size is documented here:
https://cloud.google.com/dataflow/quotas#limits
There is a way around that, instead of creating stages for each tensor that goes into tft.scale_to_0_1 we could fuse them by first stacking them together, and then passing them into tft.scale_to_0_1 with 'elementwise=True'.
The result will be the same, because the min and max are computed per 'column' instead of across the whole tensor.
This would look something like this:
stacked = tf.stack([inputs[key] for key in NUMERIC_FEATURE_KEYS], axis=1)
scaled_stacked = tft.scale_to_0_1(stacked, elementwise=True)
for key, tensor in zip(NUMERIC_FEATURE_KEYS, tf.unstack(scaled_stacked, axis=1)):
outputs[key] = tensor