For debugging, I would like to print a tensorflow FlatMapDataset.
When trying to use the print method of a tf.data.Dataset, I get the error: AttributeError: 'FlatMapDataset' object has no attribute 'print'.
What I expected was some kind of print-out to assess whether the content of the dataset is what I expected.
Apparently a FlatMapDataset does not have the method:
['_GeneratorState', '__abstractmethods__', '__bool__', '__class__', '__class_getitem__', '__debug_string__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__nonzero__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__tf_tracing_type__', '__weakref__', '_abc_impl', '_add_trackable_child', '_add_variable_with_custom_getter', '_apply_debug_options', '_as_serialized_graph', '_checkpoint_dependencies', '_common_args', '_consumers', '_convert_variables_to_tensors', '_deferred_dependencies', '_deserialization_dependencies', '_deserialize_from_proto', '_export_to_saved_model_graph', '_flat_shapes', '_flat_structure', '_flat_types', '_functions', '_gather_saveables_for_checkpoint', '_graph', '_graph_attr', '_handle_deferred_dependencies', '_input_dataset', '_inputs', '_lookup_dependency', '_map_func', '_map_resources', '_maybe_initialize_trackable', '_maybe_track_assets', '_metadata', '_name', '_name_based_attribute_restore', '_name_based_restores', '_no_dependency', '_object_identifier', '_options', '_options_attr', '_options_tensor_to_options', '_preload_simple_restoration', '_restore_from_tensors', '_serialize_to_proto', '_serialize_to_tensors', '_setattr_tracking', '_shape_invariant_to_type_spec', '_structure', '_tf_api_names', '_tf_api_names_v1', '_trace_variant_creation', '_track_trackable', '_trackable_children', '_transformation_name', '_type_spec', '_unconditional_checkpoint_dependencies', '_unconditional_dependency_names', '_update_uid', '_variant_tensor', '_variant_tensor_attr', 'apply', 'as_numpy_iterator', 'batch', 'bucket_by_sequence_length', 'cache', 'cardinality', 'choose_from_datasets', 'concatenate', 'element_spec', 'enumerate', 'filter', 'flat_map', 'from_generator', 'from_tensor_slices', 'from_tensors', 'get_single_element', 'group_by_window', 'interleave', 'list_files', 'load', 'map', 'options', 'padded_batch', 'prefetch', 'random', 'range', 'reduce', 'rejection_resample', 'repeat', 'sample_from_datasets', 'save', 'scan', 'shard', 'shuffle', 'skip', 'snapshot', 'take', 'take_while', 'unbatch', 'unique', 'window', 'with_options', 'zip']
How can I print a FlatMapDataSet in some convenient way to review it's content?
You can print the flatmap dataset by iterating over the datset.
For example
import tensorflow as tf
dataset = tf.data.Dataset.from_tensor_slices([[[1,2, 3], [3,4,5]]])
dataset = dataset.flat_map(lambda x : tf.data.Dataset.from_tensor_slices(x**2))
for i in dataset:
print(i)
output of above code:
tf.Tensor([1 4 9], shape=(3,), dtype=int32)
tf.Tensor([ 9 16 25], shape=(3,), dtype=int32)
Thank You.
Related
I would like to use "get_single_element" to get the one batch dataset as dict. The keys in dict are features names. The values are values.
My code is in tf2.8 on EC2 instance and run from jupyter.
import pandas as pd
import tensorflow as tf
df = pd.DataFrame({
'id': [1, 2, 6, 3, 5, 0],
'value_1': [10.892561, 7.210528, 1.2278101, -9.251782, 0.2118367, 6.9128551],
'value_2': ['large', 'small', 'mid','small','large', 'mid'],
'name_1': ['tyne', 'wnhp', 'ebhg','lpzhn','tyne', 'ebhg'],
'label': [0, 1, 0, 1, 1, 1]
})
dataset = tf.data.Dataset.from_tensor_slices(dict(df))
dataset = dataset.batch(2)
print(type(dataset))
print(dataset)
print(len(dataset))
class Features(object):
feature_data = {
"id": tf.io.FixedLenFeature((1,), dtype=tf.int8),
"value_1": tf.io.VarLenFeature(dtype=tf.float32),
"value_2": tf.io.FixedLenFeature((1,), dtype=tf.string),
"name_1": tf.io.FixedLenFeature((1,), dtype=tf.string)
}
label_data = {"label": tf.io.FixedLenFeature((1,), dtype=tf.int8)}
def process_sample(ds):
print(f"ds type is {type(ds)}")
features = tf.io.parse_single_example(ds, Features.feature_data) # error !
labels = tf.io.parse_single_example(ds, Features.label_data)['label']
return (features, labels)
dataset = dataset.map(lambda x: process_sample(x), num_parallel_calls=tf.data.AUTOTUNE)
dataset = tf.data.Dataset.get_single_element(dataset.batch(len(dataset)))
print(f"dataset type is {type(dataset)} dataset is {dataset}")
def get_dict(input_ds):
feature_dict_tensors = dict(input_ds)
print(f"\feature_dict_tensors type is {type(feature_dict_tensors)}, feature_dict_tensors is {feature_dict_tensors}")
return feature_dict_tensors
ds = dataset.map(get_dict)
print(f"ds type is {type(ds)}")
print(ds)
I got error:
File "<ipython-input-4-e9407f42e0a7>", line 37, in None *
lambda x: process_sample(x), num_parallel_calls=tf.data.AUTOTUNE)
File "<ipython-input-4-e9407f42e0a7>", line 33, in process_sample *
features = tf.io.parse_single_example(ds, Features.feature_data)
TypeError: Expected any non-tensor type, but got a tensor instead.
Based on https://www.tensorflow.org/api_docs/python/tf/io/parse_single_example, the first argument should be
A scalar string Tensor, a single serialized Example.
Why I got this error ?
thanks
The following is a test for conv2d_transpose.
import tensorflow as tf
import numpy as np
x = tf.constant(np.array([[
[[-67], [-77]],
[[-117], [-127]]
]]), tf.float32)
# shape = (3, 3, 1, 1) -> (height, width, input_channels, output_channels) - 3x3x1 filter
f = tf.constant(np.array([
[[[-1]], [[2]], [[-3]]],
[[[4]], [[-5]], [[6]]],
[[[-7]], [[8]], [[-9]]]
]), tf.float32)
conv = tf.nn.conv2d_transpose(x, f, output_shape=(1, 5, 5, 1), strides=[1, 2, 2, 1], padding='VALID')
The result:
tf.Tensor(
[[[[ 67.]
[ -134.]
[ 278.]
[ -154.]
[ 231.]]
[[ -268.]
[ 335.]
[ -710.]
[ 385.]
[ -462.]]
[[ 586.]
[ -770.]
[ 1620.]
[ -870.]
[ 1074.]]
[[ -468.]
[ 585.]
[-1210.]
[ 635.]
[ -762.]]
[[ 819.]
[ -936.]
[ 1942.]
[-1016.]
[ 1143.]]]], shape=(1, 5, 5, 1), dtype=float32)
To my understanding, it should work as described in Figure 4.5 in the doc
Therefore, the first element (conv[0,0,0,0]) should be -67*-9=603. Why it turns out to be 67?
The result may be expained by the following image:. But why the convolution kernel is inversed?
To explain best, I have made a draw.io figure to explain the results that you obtained.
I guess above illustration might help explain the reason why the first element of transpose conv. feature map is 67.
A key thing to note:
Unlike traditional convolution, in transpose convolution each element of the filter is multiplied by an element of the input feature map and the results of those individual multiplications & intermediate feature maps are overlaid on one another to create the final feature map. The stride determines how far apart the overlays are. In our case, stride = 2, hence the filter moves by 2 in both x & y dimension after each convolution with the original downsampled feature map.
We are using TensorFlow.js to create and train the model. We use tf.fromPixels() function to convert an image into tensor.
We want to create a custom model with the below properties:
AddImage( HTML_Image_Element, 'Label'): Add an imageElement with a custom label
Train() / fit() : Train this custom model with associated labels
Predict(): Predict the images with their associated label, and it will return the predicted response with the attached label of every image.
For better understanding let's take an example:
Let's say we have three images for prediction i.e: img1, img2, img3 with three labels 'A', 'B' and 'C' respectively.
So we want to create and train our model with these images and respective labels like below :
When user want to predict 'img1' then it shows the prediction 'A', similarly, for 'img2' predict with 'B' and for 'img3' predict with 'C'
Please suggest to me how can we create and train this model.
This is webpage we used to create a model with images and its associate labels:
<apex:page id="PageId" showheader="false">
<head>
<title>Image Classifier with TensorFlowJS</title>
<script src="https://cdn.jsdelivr.net/npm/#tensorflow/tfjs#0.11.2"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
</head>
<div id="output_field"></div>
<img id="imgshow" src="{!$Resource.cat}" crossorigin="anonymous" width="400" height="300" />
<script>
async function learnlinear(){
//img data set
const imageHTML = document.getElementById('imgshow');
console.log('imageHTML::'+imageHTML.src);
//convert to tensor
const tensorImg = tf.fromPixels(imageHTML);
tensorImg.data().then(async function (stuffTensImg){
console.log('stuffTensImg::'+stuffTensImg.toString());
});
const model = tf.sequential();
model.add(tf.layers.conv2d({
kernelSize: 5,
filters: 20,
strides: 1,
activation: 'relu',
inputShape: [imageHTML.height, imageHTML.width, 3],
}));
model.add(tf.layers.maxPooling2d({
poolSize: [2, 2],
strides: [2, 2],
}));
model.add(tf.layers.flatten());
model.add(tf.layers.dropout(0.2));
// Two output values x and y
model.add(tf.layers.dense({
units: 2,
activation: 'tanh',
}));
// Use ADAM optimizer with learning rate of 0.0005 and MSE loss
model.compile({
optimizer: tf.train.adam(0.0005),
loss: 'meanSquaredError',
});
await model.fit(tensorImg, {epochs: 500});
model.predict(tensorImg).print();
}
learnlinear();
</script>
</apex:page>
we got the following error while running the code snippet:
tfjs#0.11.2:1 Uncaught (in promise) Error: Error when checking input: expected conv2d_Conv2D1_input to have 4 dimension(s). but got an array with shape 300,400,3
at new t (tfjs#0.11.2:1)
at standardizeInputData (tfjs#0.11.2:1)
at t.standardizeUserData (tfjs#0.11.2:1)
at t. (tfjs#0.11.2:1)
at n (tfjs#0.11.2:1)
at Object.next (tfjs#0.11.2:1)
at tfjs#0.11.2:1
at new Promise ()
at __awaiter$15 (tfjs#0.11.2:1)
at t.fit (tfjs#0.11.2:1)
This error coming while passing this sample error
You simply need to reshape your tensor data.
The data you passed in to your model should be one dimension bigger than the inputShape. Actually predict takes an array of elements of shape InputShape. The number of elements is the batch size. Therefore your image data should have the following shape [batchsize, ...inputShape] (using the ellipsis for rest parameter to indicate that the later part of the shape is equal to that of inputShape)
Since you're training with only one element (which does not really happen in real case) one simply needs to use a batchsize of 1.
model.predict(tensorImg.expandDims(0)).print()
TLDR: You simply have to resize your data using np.expand_dims() or np.reshape().
First, let's generate some random tensor that mimics your current tensor input -
# Some random numpy array
In [20]: x = np.random.random((2,2,4))
In [21]: x
Out[21]:
array([[[0.8454901 , 0.75157647, 0.1511371 , 0.53809724],
[0.50779498, 0.41321185, 0.45686143, 0.80532259]],
[[0.93412402, 0.02820063, 0.5452628 , 0.8462806 ],
[0.4315332 , 0.9528761 , 0.69604215, 0.538589 ]]])
# Currently your tensor is a similar 3D shape like x
In [22]: x.shape
Out[22]: (2, 2, 4)
Now you can convert it to a 4D tensor like so -
[23]: y = np.expand_dims(x, axis = 3)
In [24]: y
Out[24]:
array([[[[0.8454901 ],
[0.75157647],
[0.1511371 ],
[0.53809724]],
[[0.50779498],
[0.41321185],
[0.45686143],
[0.80532259]]],
[[[0.93412402],
[0.02820063],
[0.5452628 ],
[0.8462806 ]],
[[0.4315332 ],
[0.9528761 ],
[0.69604215],
[0.538589 ]]]])
In [25]: y.shape
Out[25]: (2, 2, 4, 1)
You can find the np.expand_dims documentation here.
EDIT: Here's a one-liner
np.reshape(np.ravel(x), (x.shape[0], x.shape[1], x.shape[2], 1)).shape
You can see thenp.reshape documentation here.
I like to evaluate my object detection model with mAP (mean average precision). In https://github.com/tensorflow/models/tree/master/research/object_detection/utils/ there is object_detection_evaluation.py that I want to use.
I use following for the groundtruth boxes:
pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator(
categories, matching_iou_threshold=0.1)
groundtruth_boxes = np.array([[10, 10, 11, 11]], dtype=float)
groundtruth_class_labels = np.array([1], dtype=int)
groundtruth_is_difficult_list = np.array([False], dtype=bool)
pascal_evaluator.add_single_ground_truth_image_info(
'img2',
{
standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes,
standard_fields.InputDataFields.groundtruth_classes: groundtruth_class_labels,
standard_fields.InputDataFields.groundtruth_difficult: groundtruth_is_difficult_list
}
)
and this for the prediction Boxes:
# Add detections
image_key = 'img2'
detected_boxes = np.array(
[ [100, 100, 220, 220], [10, 10, 11, 11]],
dtype=float)
detected_class_labels = np.array([1,1], dtype=int)
detected_scores = np.array([0.8, 0.9], dtype=float)
pascal_evaluator.add_single_detected_image_info(image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
I print the results with
metrics = pascal_evaluator.evaluate()
print(metrics)
And my Question:
if I use this prediction Boxes [100, 100, 220, 220], [10, 10, 11, 11] the result is:
{'PASCAL/Precision/mAP#0.1IOU': 1.0,
'PASCAL/PerformanceByCategory/AP#0.1IOU/face': 1.0}
If I use [10, 10, 11, 11], [100, 100, 220, 220] (other Box sequence)
I get following result:
{'PASCAL/Precision/mAP#0.1IOU': 0.5,
'PASCAL/PerformanceByCategory/AP#0.1IOU/face': 0.5}
Why is that so? Or is it bug?
Cheers Michael
Although you are not so clear about it I think I found the error in your code. You mentioned you get different results for different order of bounding boxes. This seems peculiar and if true then it was surely a bug.
But, since I tested the code myself, you probably did not change the corresponding scores (detected_scores = np.array([0.8, 0.9], dtype=float)) to the bounding boxes. But this way you changes also the problem not just the order of the bounding boxes. If you apply the correct bounding boxes the mAP remains the same in both cases:
{'PascalBoxes_Precision/mAP#0.5IOU': 1.0,
'PascalBoxes_PerformanceByCategory/AP#0.5IOU/person': 1.0}
Using the commits from breznak for the encoders (I wasn't able to figure out "git checkout ..." with GitHub, so I just carefully copied over the three files - base.py, multi.py, and multi_test.py).
I ran multi_test.py without any problems.
Then I adjusted my model parameters (MODEL_PARAMS), so that the encoders portion of 'sensorParams' looks like this:
'encoders': {
'frequency': {
'fieldname': u'frequency',
'type': 'SimpleVector',
'length': 5,
'minVal': 0,
'maxVal': 210
}
},
I also adjusted the modelInput portion of my code, so it looked like this:
model = ModelFactory.create(model_params.MODEL_PARAMS)
model.enableInference({'predictedField': 'frequency'})
y = [1,2,3,4,5]
modelInput = {"frequency": y}
result = model.run(modelInput)
But I get the final error, regardless if I instantiate 'y' as a list or a numpy.ndarray
File "nta/eng/lib/python2.7/site-packages/nupic/encoders/base.py", line 183, in _getInputValue
return getattr(obj, fieldname)
AttributeError: 'list' object has no attribute 'idx0'
I also tried initializing a SimpleVector encoder inline with my modelInput, directly encoding my array, then passing it through modelInput. That violated the input parameters of my SimpleVector, because I was now double encoding. So I removed the encoders portion of my model parameters dictionary. That caused a spit up, because some part of my model was looking for that portion of the dictionary.
Any suggestions on what I should do next?
Edit: Here're the files I'm using with the OPF.
sendAnArray.py
import numpy
from nupic.frameworks.opf.modelfactory import ModelFactory
import model_params
class sendAnArray():
def __init__(self):
self.model = ModelFactory.create(model_params.MODEL_PARAMS)
self.model.enableInference({'predictedField': 'frequency'})
for i in range(100):
self.run()
def run(self):
y = [1,2,3,4,5]
modelInput = {"frequency": y}
result = self.model.run(modelInput)
anomalyScore = result.inferences['anomalyScore']
print y, anomalyScore
sAA = sendAnArray()
model_params.py
MODEL_PARAMS = {
'model': "CLA",
'version': 1,
'predictAheadTime': None,
'modelParams': {
'inferenceType': 'TemporalAnomaly',
'sensorParams': {
'verbosity' : 0,
'encoders': {
'frequency': {
'fieldname': u'frequency',
'type': 'SimpleVector',
'length': 5,
'minVal': 0,
'maxVal': 210
}
},
'sensorAutoReset' : None,
},
'spEnable': True,
'spParams': {
'spVerbosity' : 0,
'globalInhibition': 1,
'columnCount': 2048,
'inputWidth': 5,
'numActivePerInhArea': 60,
'seed': 1956,
'coincInputPoolPct': 0.5,
'synPermConnected': 0.1,
'synPermActiveInc': 0.1,
'synPermInactiveDec': 0.01,
},
'tpEnable' : True,
'tpParams': {
'verbosity': 0,
'columnCount': 2048,
'cellsPerColumn': 32,
'inputWidth': 2048,
'seed': 1960,
'temporalImp': 'cpp',
'newSynapseCount': 20,
'maxSynapsesPerSegment': 32,
'maxSegmentsPerCell': 128,
'initialPerm': 0.21,
'permanenceInc': 0.1,
'permanenceDec' : 0.1,
'globalDecay': 0.0,
'maxAge': 0,
'minThreshold': 12,
'activationThreshold': 16,
'outputType': 'normal',
'pamLength': 1,
},
'clParams': {
'regionName' : 'CLAClassifierRegion',
'clVerbosity' : 0,
'alpha': 0.0001,
'steps': '5',
},
'anomalyParams': {
u'anomalyCacheRecords': None,
u'autoDetectThreshold': None,
u'autoDetectWaitRecords': 2184
},
'trainSPNetOnlyIfRequested': False,
},
}
The problem seems to be that the SimpleVector class is accepting an array instead of a dict as its input, and then reconstructs that internally as {'list': {'idx0': 1, 'idx1': 2, ...}} (ie as if this dict had been the input). This is fine if it is done consistently, but your error shows that it's broken down somewhere. Have a word with #breznak about this.
Working through the OPF was difficult. I wanted to input an array of indices into the temporal pooler, so I opted to interface directly with the algorithms (I relied heavy on hello_tp.py). I ignored SimpleVector all together, and instead worked through the BitmapArray encoder.
Subutai has a useful email on the nupic-discuss listserve, where he breaks down the three main areas of the NuPIC API: algorithms, networks/regions, & the OPF. That helped me understand my options better.