Background
I have been playing around with Deep Dream and Inceptionism, using the Caffe framework to visualize layers of GoogLeNet, an architecture built for the Imagenet project, a large visual database designed for use in visual object recognition.
You can find Imagenet here: Imagenet 1000 Classes.
To probe into the architecture and generate 'dreams', I am using three notebooks:
https://github.com/google/deepdream/blob/master/dream.ipynb
https://github.com/kylemcdonald/deepdream/blob/master/dream.ipynb
https://github.com/auduno/deepdraw/blob/master/deepdraw.ipynb
The basic idea here is to extract some features from each channel in a specified layer from the model or a 'guide' image.
Then we input an image we wish to modify into the model and extract the features in the same layer specified (for each octave),
enhancing the best matching features, i.e., the largest dot product of the two feature vectors.
So far I've managed to modify input images and control dreams using the following approaches:
(a) applying layers as 'end' objectives for the input image optimization. (see Feature Visualization)
(b) using a second image to guide de optimization objective on the input image.
(c) visualize Googlenet model classes generated from noise.
However, the effect I want to achieve sits in-between these techniques, of which I haven't found any documentation, paper, or code.
Desired result (not part of the question to be answered)
To have one single class or unit belonging to a given 'end' layer (a) guide the optimization objective (b) and have this class visualized (c) on the input image:
An example where class = 'face' and input_image = 'clouds.jpg':
please note: the image above was generated using a model for face recognition, which was not trained on the Imagenet dataset. For demonstration purposes only.
Working code
Approach (a)
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
import matplotlib as plt
import caffe
model_name = 'GoogLeNet'
model_path = 'models/dream/bvlc_googlenet/' # substitute your path here
net_fn = model_path + 'deploy.prototxt'
param_fn = model_path + 'bvlc_googlenet.caffemodel'
model = caffe.io.caffe_pb2.NetParameter()
text_format.Merge(open(net_fn).read(), model)
model.force_backward = True
open('models/dream/bvlc_googlenet/tmp.prototxt', 'w').write(str(model))
net = caffe.Classifier('models/dream/bvlc_googlenet/tmp.prototxt', param_fn,
mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
def showarray(a, fmt='jpeg'):
a = np.uint8(np.clip(a, 0, 255))
f = StringIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
return np.dstack((img + net.transformer.mean['data'])[::-1])
def objective_L2(dst):
dst.diff[:] = dst.data
def make_step(net, step_size=1.5, end='inception_4c/output',
jitter=32, clip=True, objective=objective_L2):
'''Basic gradient ascent step.'''
src = net.blobs['data'] # input image is stored in Net's 'data' blob
dst = net.blobs[end]
ox, oy = np.random.randint(-jitter, jitter+1, 2)
src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift
net.forward(end=end)
objective(dst) # specify the optimization objective
net.backward(start=end)
g = src.diff[0]
# apply normalized ascent step to the input image
src.data[:] += step_size/np.abs(g).mean() * g
src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image
if clip:
bias = net.transformer.mean['data']
src.data[:] = np.clip(src.data, -bias, 255-bias)
def deepdream(net, base_img, iter_n=20, octave_n=4, octave_scale=1.4,
end='inception_4c/output', clip=True, **step_params):
# prepare base images for all octaves
octaves = [preprocess(net, base_img)]
for i in xrange(octave_n-1):
octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))
src = net.blobs['data']
detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
for octave, octave_base in enumerate(octaves[::-1]):
h, w = octave_base.shape[-2:]
if octave > 0:
# upscale details from the previous octave
h1, w1 = detail.shape[-2:]
detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)
src.reshape(1,3,h,w) # resize the network's input image size
src.data[0] = octave_base+detail
for i in xrange(iter_n):
make_step(net, end=end, clip=clip, **step_params)
# visualization
vis = deprocess(net, src.data[0])
if not clip: # adjust image contrast if clipping is disabled
vis = vis*(255.0/np.percentile(vis, 99.98))
showarray(vis)
print octave, i, end, vis.shape
clear_output(wait=True)
# extract details produced on the current octave
detail = src.data[0]-octave_base
# returning the resulting image
return deprocess(net, src.data[0])
I run the code above with:
end = 'inception_4c/output'
img = np.float32(PIL.Image.open('clouds.jpg'))
_=deepdream(net, img)
Approach (b)
"""
Use one single image to guide
the optimization process.
This affects the style of generated images
without using a different training set.
"""
def dream_control_by_image(optimization_objective, end):
# this image will shape input img
guide = np.float32(PIL.Image.open(optimization_objective))
showarray(guide)
h, w = guide.shape[:2]
src, dst = net.blobs['data'], net.blobs[end]
src.reshape(1,3,h,w)
src.data[0] = preprocess(net, guide)
net.forward(end=end)
guide_features = dst.data[0].copy()
def objective_guide(dst):
x = dst.data[0].copy()
y = guide_features
ch = x.shape[0]
x = x.reshape(ch,-1)
y = y.reshape(ch,-1)
A = x.T.dot(y) # compute the matrix of dot-products with guide features
dst.diff[0].reshape(ch,-1)[:] = y[:,A.argmax(1)] # select ones that match best
_=deepdream(net, img, end=end, objective=objective_guide)
and I run the code above with:
end = 'inception_4c/output'
# image to be modified
img = np.float32(PIL.Image.open('img/clouds.jpg'))
guide_image = 'img/guide.jpg'
dream_control_by_image(guide_image, end)
Question
Now the failed approach how I tried to access individual classes, hot encoding the matrix of classes and focusing on one (so far to no avail):
def objective_class(dst, class=50):
# according to imagenet classes
#50: 'American alligator, Alligator mississipiensis',
one_hot = np.zeros_like(dst.data)
one_hot.flat[class] = 1.
dst.diff[:] = one_hot.flat[class]
To make this clear: the question is not about the dream code, which is the interesting background and which is already working code, but it is about this last paragraph's question only: Could someone please guide me on how to get images of a chosen class (take class #50: 'American alligator, Alligator mississipiensis') from ImageNet (so that I can use them as input - together with the cloud image - to create a dream image)?
The question is how to get images of the chosen class #50: 'American alligator, Alligator mississipiensis' from ImageNet.
Go to image-net.org.
Go to "Download".
Follow the instructions for "Download Image URLs":
How to download the URLs of a synset from your Brower?
1. Type a query in the Search box and click "Search" button
The alligator is not shown. ImageNet is under maintenance. Only ILSVRC synsets are included in the search results. No problem, we are fine with the similar animal "alligator lizard", since this search is about getting to the right branch of the WordNet treemap. I do not know whether you will get the direct ImageNet images here even if there were no maintenance.
2. Open a synset papge
Scrolling down:
Scrolling down:
Searching for the American alligator, which happens to be a saurian diapsid reptile as well, as a near neighbour:
3. You will find the "Download URLs" button under the left-bottom corner of the image browsing window.
You will get all of the URLs with the chosen class. A text file pops up in the browser:
http://image-net.org/api/text/imagenet.synset.geturls?wnid=n01698640
We see here that it is just about knowing the right WordNet id that needs to be put at the end of the URL.
Manual image download
The text file looks as follows:
http://farm1.static.flickr.com/136/326907154_d975d0c944.jpg
http://weeksbay.org/photo_gallery/reptiles/American20Alligator.jpg
...
till image number 1261.
As an example, the first URL links to:
And the second is a dead link:
The third link is dead, but the fourth is working.
The images of these URLs are publicly available, but many links are dead, and the pictures are of lower resolution.
Automated image download
From the ImageNet guide again:
How to download by HTTP protocol? To download a synset by HTTP
request, you need to obtain the "WordNet ID" (wnid) of a synset first.
When you use the explorer to browse a synset, you can find the WordNet
ID below the image window.(Click Here and search "Synset WordNet ID"
to find out the wnid of "Dog, domestic dog, Canis familiaris" synset).
To learn more about the "WordNet ID", please refer to
Mapping between ImageNet and WordNet
Given the wnid of a synset, the URLs of its images can be obtained at
http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=[wnid]
You can also get the hyponym synsets given wnid, please refer to API
documentation to learn more.
So what is in that API documentation?
There is everything needed to get all of the WordNet IDs (so called "synset IDs") and their words for all synsets, that is, it has any class name and its WordNet ID at hand, for free.
Obtain the words of a synset
Given the wnid of a synset, the words of
the synset can be obtained at
http://www.image-net.org/api/text/wordnet.synset.getwords?wnid=[wnid]
You can also Click Here to
download the mapping between WordNet ID and words for all synsets,
Click Here to download the
mapping between WordNet ID and glosses for all synsets.
If you know the WordNet ids of choice and their class names, you can use the nltk.corpus.wordnet of "nltk" (natural language toolkit), see the WordNet interface.
In our case, we just need the images of class #50: 'American alligator, Alligator mississipiensis', we already know what we need, thus we can leave the nltk.corpus.wordnet aside (see tutorials or Stack Exchange questions for more). We can automate the download of all alligator images by looping through the URLs that are still alive. We could also widen this to the full WordNet with a loop over all WordNet IDs, of course, though this would take far too much time for the whole treemap - and is also not recommended since the images will stop being there if 1000s of people download them daily.
I am afraid I will not take the time to write this Python code that accepts the ImageNet class number "#50" as the argument, though that should be possible as well, using mapping tables from WordNet to ImageNet. Class name and WordNet ID should be enough.
For a single WordNet ID, the code could be as follows:
import urllib.request
import csv
wnid = "n01698640"
url = "http://image-net.org/api/text/imagenet.synset.geturls?wnid=" + str(wnid)
# From https://stackoverflow.com/a/45358832/6064933
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with open(wnid + ".csv", "wb") as f:
with urllib.request.urlopen(req) as r:
f.write(r.read())
with open(wnid + ".csv", "r") as f:
counter = 1
for line in f.readlines():
print(line.strip("\n"))
failed = []
try:
with urllib.request.urlopen(line) as r2:
with open(f'''{wnid}_{counter:05}.jpg''', "wb") as f2:
f2.write(r2.read())
except:
failed.append(f'''{counter:05}, {line}'''.strip("\n"))
counter += 1
if counter == 10:
break
with open(wnid + "_failed.csv", "w", newline="") as f3:
writer = csv.writer(f3)
writer.writerow(failed)
Result:
If you need the images even behind the dead links and in original quality, and if your project is non-commercial, you can sign in, see "How do I get a copy of the images?" at the Download FAQ.
In the URL above, you see the wnid=n01698640 at the end of the URL which is the WordNet id that is mapped to ImageNet.
Or in the "Images of the Synset" tab, just click on "Wordnet IDs".
To get to:
or right-click -- save as:
You can use the WordNet id to get the original images.
If you are commercial, I would say contact the ImageNet team.
Add-on
Taking up the idea of a comment: If you do not want many images, but just the "one single class image" that represents the class as much as possible, have a look at Visualizing GoogLeNet Classes and try to use this method with the images of ImageNet instead. Which is using the deepdream code as well.
Visualizing GoogLeNet Classes
July 2015
Ever wondered what a deep neural network thinks a Dalmatian should
look like? Well, wonder no more.
Recently Google published a post describing how they managed to use
deep neural networks to generate class visualizations and modify
images through the so called “inceptionism” method. They later
published the code to modify images via the inceptionism method
yourself, however, they didn’t publish code to generate the class
visualizations they show in the same post.
While I never figured out exactly how Google generated their class
visualizations, after butchering the deepdream code and this ipython
notebook from Kyle McDonald, I managed to coach GoogLeNet into drawing
these:
... [with many other example images to follow]
Related
I'm trying to use Tensorflow to Machine Learning to analyze an image and return the probability if is positive or negative based on a model created (extension .h5). I couldn't found a documentation exactly for that, or repository, so even a link to read will be awesome.
Link for the application: https://share.streamlit.io/felipelx/hackathon/IDC_Detector.py
Libraries that I'm trying to use.
import numpy as np
import streamlit as st
import tensorflow as tf
from keras.models import load_model
The function to load the model.
#st.cache(allow_output_mutation=True)
def loadIDCModel():
model_idc = load_model('models/IDC_model.h5', compile=False)
model_idc.summary()
return model_idc
The function to work the image, and what I'm trying to see: model.predict - I can see but is not updating the %, independent of the image the value is always the same.
if uploaded_file is not None:
# transform image to numpy array
file_bytes = tf.keras.preprocessing.image.load_img(uploaded_file, target_size=(96,96), grayscale = False, interpolation = 'nearest', color_mode = 'rgb', keep_aspect_ratio = False)
c.image(file_bytes, channels="RGB")
Genrate_pred = st.button("Generate Prediction")
if Genrate_pred:
model = loadMetModel()
input_arr = tf.keras.preprocessing.image.img_to_array(file_bytes)
input_arr = np.array([input_arr])
probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
prediction = probability_model.predict(input_arr)
dict_pred = {0: 'Benigno/Normal', 1: 'Maligno'}
result = dict_pred[np.argmax(prediction)]
value = 0
if result == 'Benigno/Normal':
value = str(((prediction[0][0])*100).round(2)) + '%'
else:
value = str(((prediction[0][1])*100).round(2)) + '%'
c.metric('Predição', result, delta=value, delta_color='normal')
Thank you in advance to any help.
The first thing I'm noticing is that your function for loading the model is named loadIDCModel, but then the function you call for loading the model is loadMetModel. When I check your source code, though, it looks like you've already addressed this issue. I'd recommend updating your question to reflect this.
Playing around with your application, I think the issue is your model itself. I tried various images — images containing carcinomas, and even a picture of a cat — and each gave me a probability around 73%. The lowest score I got was 72.74%, and the highest was 73.11% (this one was the cat). It seems that the output percentage is varying slightly, hinting that rather than something being wrong in the code, your model itself is likely at fault. You might need to retrain your model, as it seems to have learned to always return a value of approximately 0.73.
I would like to use Huggingface Transformers to implement a chatbot. Currently, I have the code shown below. The transformer model already takes into account the history of past user input.
Is there something else (additional code) I have to take into account for building the chatbot?
Second, how can I modify my code to run with TensorFlow instead of PyTorch?
Later on, I also plan to fine-tune the model on other data. I also plan to test different models such as BlenderBot and GPT2. I think to test this different models it should be as easy as replacing the corresponding model in AutoTokenizer.from_pretrained("microsoft/DialoGPT-small") and AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
for step in range(5):
# encode the new user input, add the eos_token and return a tensor in Pytorch
new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')
# append the new user input tokens to the chat history
bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids
# generated a response while limiting the total chat history to 1000 tokens,
chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
# pretty print last ouput tokens from bot
print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))
Here is an example of using the DialoGPT model with Tensorflow:
from transformers import TFAutoModelForCausalLM, AutoTokenizer, BlenderbotTokenizer, TFBlenderbotForConditionalGeneration
import tensorflow as tf
chat_bots = {
'BlenderBot': [BlenderbotTokenizer.from_pretrained('facebook/blenderbot-400M-distill'), TFT5ForConditionalGeneration.from_pretrained('facebook/blenderbot-400M-distill')],
'DialoGPT': [AutoTokenizer.from_pretrained("microsoft/DialoGPT-small"), TFAutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")],
}
key = 'DialoGPT'
tokenizer, model = chat_bots[key]
for step in range(5):
new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='tf')
if step > 0:
bot_input_ids = tf.concat([chat_history_ids, new_user_input_ids], axis=-1)
else:
bot_input_ids = new_user_input_ids
chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
print(key + ": {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))
>> User:How are you?
DialoGPT: I'm here
>> User:Why are you here
DialoGPT: I'm here
>> User:But why
DialoGPT: I'm here
>> User:Where is here
DialoGPT: Where is where?
>> User:Here
DialoGPT: Where is here?
If you want to compare different chatbots, you might want to adapt their decoder parameters, because they are not always identical. For example, using BlenderBot and a max_length of 50 you get this kind of response with the current code:
>> User:How are you?
BlenderBot: ! I am am great! how how how are are are???
In general, you should ask yourself which special characters are important for a chatbot (depending on your domain) and which characters should / can be omitted?
You should also experiment with different decoding methods such as greedy search, beam search, random sampling, top-k sampling, and nucleus sampling and find out what works best for your use case. For more information on this topic check out this post
I can't seem to find any documentation on how to use this model.
I am trying to use it to print out the objects that appear in a video
any help would be greatly appreciated
I am just starting out so go easy on me
I am trying to use it to print out the objects that appear in a video
I interpret that your problem is to print out the name of the found objects.
I don't know how you implemented where you got Fast RCNN trained on OpenImages v4. Therefore, I will give you the way with the model from Tensorflow Hub. Google Colab. AI Hub
After some digging around and a LOT of trial and error I came up with this
#!/home/ahmed/anaconda3/envs/TensorFlow/bin/python3.8
import tensorflow as tf
import tensorflow_hub as hub
import time,imageio,sys,pickle
# sys.argv[1] is used for taking the video path from the terminal
video = sys.argv[1]
#passing the video file to ImageIO to be read later in form of frames
video = imageio.get_reader(video)
dictionary = {}
#download and extract the model( faster_rcnn/openimages_v4/inception_resnet_v2 or
# openimages_v4/ssd/mobilenet_v2) in the same folder
module_handle = "*Path to the model folder*"
detector = hub.load(module_handle).signatures['default']
#looping over every frame in the video
for index, frames in enumerate(video):
# converting the images ( video frames ) to tf.float32 which is the only acceptable input format
image = tf.image.convert_image_dtype(frames, tf.float32)[tf.newaxis]
# passing the converted image to the model
detector_output = detector(image)
class_names = detector_output["detection_class_entities"]
scores = detector_output["detection_scores"]
# in case there are multiple objects in the frame
for i in range(len(scores)):
if scores[i] > 0.3:
#converting form bytes to string
object = class_names[i].numpy().decode("ascii")
#adding the objects that appear in the frames in a dictionary and their frame numbers
if object not in dictionary:
dictionary[object] = [index]
else:
dictionary[object].append(index)
print(dictionary)
I have retrained a MobileNet model with tweaks in the model and custom outputs in TensorFlow. I have to run the model on Android using Google ML KIT but the problem is it requires metadata. But whenever I go through he process it is giving me an error:
ValueError: File, '/content/drive/My Drive/labels.txt', is recorded in the metadata, but has not been loaded into the populator.
Here is my code for adding metadata:
model_metadata=_metadata_fb.ModelMetadataT()
model_metadata.name="MobileNet_with_Metadata"
model_metadata.description="This model is trained on plant village leaf disease dataset so that it can be used for detectiong crop diseases"
model_metadata.version="v1.0.0.0"
model_metadata.author="open-source"
model_metadata.license=("Apache License. Version 2.0 "
"http://www.apache.org/licenses/LICENSE-2.0.")
input_metadata=_metadata_fb.TensorMetadataT()
output_metadata=_metadata_fb.TensorMetadataT()
input_metadata.name="image"
input_metadata.description="input_meta.description = (
"Input image to be classified. The expected image is {0} x {1}, with "
"three channels (red, blue, and green) per pixel. Each value in the "
"tensor is a single byte between 0 and 1.".format(256, 256))"
input_normalization = _metadata_fb.ProcessUnitT()
input_normalization.optionsType = (
_metadata_fb.ProcessUnitOptions.NormalizationOptions)
input_normalization.options = _metadata_fb.NormalizationOptionsT()
input_normalization.options.mean = [127.5]
input_normalization.options.std = [127.5]
input_metadata.processUnits = [input_normalization]
input_stats = _metadata_fb.StatsT()
input_stats.max = [255]
input_stats.min = [0]
input_metadata.stats = input_stats
output_metadata.name="Probability"
output_metadata.description="Probabbility of 50 classes"
output_stats = _metadata_fb.StatsT()
output_stats.max = [1.0]
output_stats.min = [0.0]
output_metadata.stats = output_stats
label_file = _metadata_fb.AssociatedFileT()
label_file.name = "/content/drive/My Drive/labels.txt"
label_file.description = "Labels for objects that the model can recognize."
label_file.type = _metadata_fb.AssociatedFileType.TENSOR_AXIS_LABELS
output_metadata.associatedFiles = [label_file]
subgraph = _metadata_fb.SubGraphMetadataT()
subgraph.inputTensorMetadata = [input_metadata]
subgraph.outputTensorMetadata = [output_metadata]
model_metadata.subgraphMetadata = [subgraph]
b = flatbuffers.Builder(0)
b.Finish(
model_metadata.Pack(b),
_metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
metadata_buf = b.Output()
populator = _metadata.MetadataPopulator.with_model_file("/content/drive/My Drive/MobileNet_Model_latest.tflite")
populator.load_metadata_buffer(metadata_buf)
populator.load_associated_files(["/content/drive/My Drive/labels.txt"])
populator.populate()
I am doing this for first time and I couldn't get any proper help or documentation. Also, can anyone tell me properly on how to add meta data to the tflite model?
I have referred this link: Add Metadata to TensorFlow
I have faced this issue before while trying to deploy TF2 object detection models to mlkit vision sample. If I remember correctly it was trying to read labels from tflite metadata instead of the labelmap in assets folder.
There are existing issues on github for this metadata problem. Seems like it is not yet fixed as others pointed out. There are a few ways to solve it. Have a look at these issues,
https://github.com/tensorflow/models/issues/9341
https://github.com/tensorflow/tensorflow/issues/43583
Edit: The way shown below is not correct for adding metadata outputs. Correct way should in links above.
For adding metadata to object detection model have a look at these,
https://nbviewer.jupyter.org/github/quickgrid/CodeLab/blob/master/tensorflow/TFlite_Object_Detection_Custom_Model_Export_With_Metadata_TF1.ipynb
https://nbviewer.jupyter.org/github/quickgrid/CodeLab/blob/master/tensorflow/TFlite_Object_Detection_Custom_Model_Export_With_Metadata_TF2.ipynb
ok, I think you were guided by the mentioned code sections, but at the top the paragraph gives you a link to a github with the complete metadata code: https://github.com/tensorflow/examples/blob/master/lite/examples/image_classification/metadata/metadata_writer_for_image_classifier.py#L63-L74
Hi I've trying to get a TFX Pipeline going just as an exercise really. I'm using ImportExampleGen to load TFRecords from disk. Each Example in the TFRecord contains a jpg in the form of a byte string, height, width, depth, steering and throttle labels.
I'm trying to use StatisticsGen but I'm receiving this warning;
WARNING:root:Feature "image_raw" has bytes value "None" which cannot be decoded as a UTF-8 string. and crashing my Colab Notebook. As far as I can tell all the byte-string images in the TFRecord are not corrupt.
I cannot find concrete examples on StatisticsGen and handling image data. According to the docs Tensorflow Data Validation can deal with image data.
In addition to computing a default set of data statistics, TFDV can also compute statistics for semantic domains (e.g., images, text). To enable computation of semantic domain statistics, pass a tfdv.StatsOptions object with enable_semantic_domain_stats set to True to tfdv.generate_statistics_from_tfrecord.
But I'm not sure how this fits in with StatisticsGen.
Here is the code that instantiates an ImportExampleGen then the StatisticsGen
from tfx.utils.dsl_utils import tfrecord_input
from tfx.components.example_gen.import_example_gen.component import ImportExampleGen
from tfx.proto import example_gen_pb2
examples = tfrecord_input(_tf_record_dir)
# https://www.tensorflow.org/tfx/guide/examplegen#custom_inputoutput_split
# has a good explanation of splitting the data the 'output_config' param
# Input train split is _tf_record_dir/*'
# Output 2 splits: train:eval=8:2.
train_ratio = 8
eval_ratio = 10-train_ratio
output = example_gen_pb2.Output(
split_config=example_gen_pb2.SplitConfig(splits=[
example_gen_pb2.SplitConfig.Split(name='train',
hash_buckets=train_ratio),
example_gen_pb2.SplitConfig.Split(name='eval',
hash_buckets=eval_ratio)
]))
example_gen = ImportExampleGen(input=examples,
output_config=output)
context.run(example_gen)
statistics_gen = StatisticsGen(
examples=example_gen.outputs['examples'])
context.run(statistics_gen)
Thanks in advance.
From git issue response
Thanks Evan Rosen
Hi Folks,
The warnings you are seeing indicate that StatisticsGen is trying to treat your raw image features like a categorical string feature. The image bytes are being decoded just fine. The issue is that when the stats (including top K examples) are being written, the output proto is expecting a UTF-8 valid string, but instead gets the raw image bytes. Nothing is wrong with your setups from what I can tell, but this is just an unintended side-effect of a well-intentioned warning in the event that you have a categorical string feature which can't be serialized. We'll look into finding a better default that handles image data more elegantly.
In the meantime, to tell StatisticsGen that this feature is really an opaque blob, you can pass in a user-modified schema as described in the StatsGen docs. To generate this schema, you can run StatisticsGen and SchemaGen once (on a sample of data) and then modify the inferred schema to annotate that image features. Here is a modified version of the colab from #tall-josh:
Open In Colab
The additional steps are a bit verbose, but having a curated schema is often a good practice for other reasons. Here is the cell that I added to the notebook:
from google.protobuf import text_format
from tensorflow.python.lib.io import file_io
from tensorflow_metadata.proto.v0 import schema_pb2
# Load autogenerated schema (using stats from small batch)
schema = tfx.utils.io_utils.SchemaReader().read(
tfx.utils.io_utils.get_only_uri_in_dir(
tfx.types.artifact_utils.get_single_uri(schema_gen.outputs['schema'].get())))
# Modify schema to indicate which string features are images.
# Ideally you would persist a golden version of this schema somewhere rather
# than regenerating it on every run.
for feature in schema.feature:
if feature.name == 'image/raw':
feature.image_domain.SetInParent()
# Write modified schema to local file
user_schema_dir ='/tmp/user-schema/'
tfx.utils.io_utils.write_pbtxt_file(
os.path.join(user_schema_dir, 'schema.pbtxt'), schema)
# Create ImportNode to make modified schema available to other components
user_schema_importer = tfx.components.ImporterNode(
instance_name='import_user_schema',
source_uri=user_schema_dir,
artifact_type=tfx.types.standard_artifacts.Schema)
# Run the user schema ImportNode
context.run(user_schema_importer)
Hopefully you find this workaround is useful. In the meantime, we'll take a look at a better default experience for image-valued features.
Groked this and found the solution to be dramatically simpler than i thought...
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
import logging
...
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)
...
context = InteractiveContext(pipeline_name='my_pipe')
...
c = StatisticsGen(...)
...
context.run(c)