When I execute nlp.begin_training() I'm getting following error - spacy

"I'm trying to train the ner model using spacy. It works fine for CPU. But when I try executing it using GPU I'm getting the following error. Spacy version 2.1.4, CUDA version 10.1"
"I tried re-installing thinc but still I'm getting the error"
from __future__ import unicode_literals, print_function
import plac
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding
import json
spacy.require_gpu()
nlp = spacy.blank("en")
ner = nlp.create_pipe("ner")
ner = nlp.create_pipe("ner")
for _, annotations in TRAIN_DATA:
for ent in annotations.get("entities"):
ner.add_label(ent[2])
optimizer = nlp.begin_training()
"I'm getting the following error"
"CUDARuntimeError
Traceback (most recent call last)
in
----> 1 optimizer = nlp.begin_training()
G:\Anaconda3\lib\site-packages\spacy\language.py in begin_training(self, get_gold_tuples, sgd, component_cfg, **cfg)
547 if self.vocab.vectors.data.shape[1] >= 1:
548 self.vocab.vectors.data = Model.ops.asarray(self.vocab.vectors.data)
--> 549 link_vectors_to_models(self.vocab)
550 if self.vocab.vectors.data.shape[1]:
551 cfg["pretrained_vectors"] = self.vocab.vectors.name
G:\Anaconda3\lib\site-packages\spacy_ml.py in link_vectors_to_models(vocab)
297 else:
298 word.rank = 0
--> 299 data = ops.asarray(vectors.data)
300 # Set an entry here, so that vectors are accessed by StaticVectors
301 # (unideal, I know)
ops.pyx in thinc.neural.ops.CupyOps.asarray()
G:\Anaconda3\lib\site-packages\cupy\creation\from_data.py in array(obj, dtype, copy, order, subok, ndmin)
39
40 """
---> 41 return core.array(obj, dtype, copy, order, subok, ndmin)
42
43
cupy\core\core.pyx in cupy.core.core.array()
cupy\core\core.pyx in cupy.core.core.array()
cupy\core\core.pyx in cupy.core.core.ndarray.__init__()
cupy\cuda\memory.pyx in cupy.cuda.memory.alloc()
cupy\cuda\memory.pyx in cupy.cuda.memory.MemoryPool.malloc()
cupy\cuda\memory.pyx in cupy.cuda.memory.MemoryPool.malloc()
cupy\cuda\device.pyx in cupy.cuda.device.get_device_id()
cupy\cuda\runtime.pyx in cupy.cuda.runtime.getDevice()
cupy\cuda\runtime.pyx in cupy.cuda.runtime.check_status()
CUDARuntimeError: cudaErrorUnknown: unknown error"

Related

tflite_model_maker if obj['difficult'] == 'Unspecified': KeyError: 'difficult'

i am trying to train a tflite model using just people in coco dataset.
I am using tflite model maker to train and fiftyone to process dataset.
when running the training file .py i get the error below.
root#85ac26b47f92:/external# root#85ac26b47f92:/external# python demofie.py
2022-11-01 21:02:01.059188: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: UNKNOWN ERROR (34)
2022-11-01 21:02:01.059234: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 85ac26b47f92
2022-11-01 21:02:01.059242: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 85ac26b47f92
2022-11-01 21:02:01.059324: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program
2022-11-01 21:02:01.059381: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.141.3
2022-11-01 21:02:01.059821: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Traceback (most recent call last):
File "demofie.py", line 20, in <module>
train_data = object_detector.DataLoader.from_pascal_voc(images_dir='/external/train/data',annotations_dir='/external/train/labels', label_map=['person'],ignore_difficult_instances= False,num_shards = 100)
File "/usr/local/lib/python3.8/dist-packages/tensorflow_examples/lite/model_maker/core/data_util/object_detector_dataloader.py", line 217, in from_pascal_voc
cache_writer.write_files(
File "/usr/local/lib/python3.8/dist-packages/tensorflow_examples/lite/model_maker/core/data_util/object_detector_dataloader_util.py", line 252, in write_files
tf_example = create_pascal_tfrecord.dict_to_tf_example(
File "/usr/local/lib/python3.8/dist-packages/tensorflow_examples/lite/model_maker/third_party/efficientdet/dataset/create_pascal_tfrecord.py", line 162, in dict_to_tf_example
if obj['difficult'] == 'Unspecified':
KeyError: 'difficult'
code that causes the error. can anyone with better coding knowledge than me shed some light on any mistakes i may have made.
I have added the fiftyone code below this (no error)
import numpy as np
import os
from tflite_model_maker.config import QuantizationConfig
from tflite_model_maker.config import ExportFormat
from tflite_model_maker import model_spec
from tflite_model_maker import object_detector
import tensorflow as tf
assert tf.__version__.startswith('2')
tf.get_logger().setLevel('ERROR')
from absl import logging
logging.set_verbosity(logging.ERROR)
spec = model_spec.get('efficientdet_lite1')
train_data = object_detector.DataLoader.from_pascal_voc(images_dir='/external/train/data',annotations_dir='/external/train/labels', label_map=['person'],ignore_difficult_instances= False,num_shards = 100)
validation_data = object_detector.DataLoader.from_pascal_voc(images_dir='/external/val/data',annotations_dir='/external/val/labels',label_map= ['person'],ignore_difficult_instances= False,num_shards = 100)
test_data = object_detector.DataLoader.from_pascal_voc(images_dir='/external/test/data',annotations_dir='/external/test/labels',label_map= ['person'],ignore_difficult_instances= False,num_shards = 100)
model = object_detector.create(train_data, model_spec=spec, batch_size=8,epochs=2000, train_whole_model=True, validation_data=validation_data)
model.evaluate(test_data)
model.export(export_dir='/external/')
**dataset generation code
**
import fiftyone.zoo as foz
import fiftyone as fo
from fiftyone import ViewField as F
cocodataset_test = foz.load_zoo_dataset(
"coco-2017",
splits="test",
label_types=["detections"],
classes=["person"],
only_matching=True,
# max_samples=50,
)
cocodataset_validation = foz.load_zoo_dataset(
"coco-2017",
splits="validation",
label_types=["detections"],
classes=["person"],
only_matching=True,
# max_samples=50
)
cocodataset_train = foz.load_zoo_dataset(
"coco-2017",
splits="train",
label_types=["detections"],
classes=["person"],
only_matching=True,
# max_samples=50,
)
cocodataset_validation.export(
'/external/val',
fo.types.VOCDetectionDataset,
)
cocodataset_train.export(
'/external/train/',
fo.types.VOCDetectionDataset,
)
cocodataset_test.export(
'/external/test/',
fo.types.VOCDetectionDataset,
)

Not able to execute sample code provided in Hugging faces Models card

When i am trying sample code from Hugging face i get below error.
the code can be found from https://huggingface.co/facebook/tts_transformer-en-ljspeech
Code:
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
import IPython.display as ipd
models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
"facebook/fastspeech2-en-ljspeech",
arg_overrides={"vocoder": "hifigan", "fp16": False}
)
model = models[0]
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
generator = task.build_generator(model, cfg)
text = "Hello, this is a test run."
sample = TTSHubInterface.get_model_input(task, text)
wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
ipd.Audio(wav, rate=rate)
Error:
TypeError Traceback (most recent call last)
Input In [1], in <module>
10 model = models[0]
11 TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
---> 12 generator = task.build_generator(model, cfg)
14 text = "Hello, this is a test run."
16 sample = TTSHubInterface.get_model_input(task, text)
File ~/office/virtual_environments/eye_for_bliend/Images/fairseq/fairseq/tasks/text_to_speech.py:151, in TextToSpeechTask.build_generator(self, models, cfg, vocoder, **unused)
149 if vocoder is None:
150 vocoder = self.build_default_vocoder()
--> 151 model = models[0]
152 if getattr(model, "NON_AUTOREGRESSIVE", False):
153 return NonAutoregressiveSpeechGenerator(model, vocoder, self.data_cfg)
TypeError: 'TTSTransformerModel' object is not subscriptable
What worked for me was to put the model in a list where you build the generator on line 12.
generator = task.build_generator([model], cfg)

How to make a custom metric available to TFMA/Beam?

I have created a custom Keras metric, similar to the demo implementation below:
import tensorflow as tf
class MyMetric(tf.keras.metrics.Mean):
def __init__(self, name='my_metric', dtype=None):
super(MyMetric, self).__init__(name=name, dtype=dtype)
def update_state(self, y_true, y_pred, sample_weight=None):
return super(MyMetric, self).update_state(
y_pred, sample_weight=sample_weight)
I have turned the implementation into a Python module with the init/main files and added the path to the system's PYTHONPATH.
I can use the metric when I train the Keras model.
Unfortunately, I haven't found a way to make the custom metric available to TensorFlow Model Analysis (TFMA).
In my interactive context notebook, I can load the metric when I create the eval_config.
import tensorflow as tf
import tensorflow_model_analysis as tfma
from mymetric.metric import MyMetric
metrics = [MyMetric()]
metrics_specs = tfma.metrics.specs_from_metrics(metrics)
eval_config = tfma.EvalConfig(
model_specs=[tfma.ModelSpec(label_key='label_xf')],
metrics_specs=metrics_specs,
slicing_specs=[tfma.SlicingSpec()]
)
evaluator = Evaluator(
examples=example_gen.outputs['examples'],
model=trainer.outputs['model'],
baseline_model=model_resolver.outputs['model'],
eval_config=eval_config)
When I try to execute the evaluator, the metric is listed as in the metric specifications
metrics_specs {
metrics {
class_name: "MyMetric"
config: "{\"dtype\": \"float32\", \"name\": \"my_metric\"}"
threshold {
}
}
}
but the execution fails with the error
ValueError: Unknown metric function: MyMetric
Since the metric calculation is executed via Apache Beam's executor.Do function, I assume that Beam can't find the module (even though it is on the PYTHONPATH). If that is the case, how can I make the module available to Apache Beam beyond the PYTHONPATH configuration?
Traceback:
/usr/local/lib/python3.6/dist-packages/tensorflow_model_analysis/metrics/metric_specs.py in _deserialize_tf_metric(metric_config, custom_objects)
741 cls_name, cfg = _tf_class_and_config(metric_config)
742 with tf.keras.utils.custom_object_scope(custom_objects):
--> 743 return tf.keras.metrics.deserialize({'class_name': cls_name, 'config': cfg})
744
745
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/metrics.py in deserialize(config, custom_objects)
3441 module_objects=globals(),
3442 custom_objects=custom_objects,
-> 3443 printable_module_name='metric function')
3444
3445
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in deserialize_keras_object(identifier, module_objects, custom_objects, printable_module_name)
345 config = identifier
346 (cls, cls_config) = class_and_config_for_serialized_keras_object(
--> 347 config, module_objects, custom_objects, printable_module_name)
348
349 if hasattr(cls, 'from_config'):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in class_and_config_for_serialized_keras_object(config, module_objects, custom_objects, printable_module_name)
294 cls = get_registered_object(class_name, custom_objects, module_objects)
295 if cls is None:
--> 296 raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
297
298 cls_config = config['config']
ValueError: Unknown metric function: MyMetric
You need to specify the module so that TFX knows where to find your MyMetric class. One way of doing this is to specify it as part of the metric specs:
from tensorflow_model_analysis import config
metric_config = [config.MetricConfig(class_name='MyMetric', module='mymodule.mymetric')]
metrics_specs = [config.MetricsSpec(metrics=metric_config)]
You will also need to create a module called mymodule and put your MyMetric class in in mymetric.py for this to work. Also make sure that the module is accessible from where you are executing the code (which should be the case if you have added it to your PYTHONPATH).

Tensorflow how to change hub.Module() to local folder

how can I change:
BERT_MODEL = "https://tfhub.dev/google/bert_multi_cased_L-12_H-768_A-12/1"
def create_tokenizer_from_hub_module():
"""Get the vocab file and casing info from the Hub module."""
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(
vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
So that I can load a local BERT model without the hub.Module() call as it doesn't work with a local path.
I downloaded a different TF1 pre-trained model from a different website, unzipped it and stored in /test/module/.
If I change above BERT_MODEL = "/test/module" how would I need to change the rest? I now get string errors as tokenization_info = bert_module(signature="tokenization_info", as_dict=True) doesn't work.
Help please I am new to TF - note I need to use TF1, not TF2.
Note: on suggestion below I get:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-a98e44536f87> in <module>()
9 return vocab_file, do_lower_case
10
---> 11 print(get_bert_tokenizer_info("/tmp/local_copy"))
12 # Will print: (b'/tmp/local_copy/assets/vocab.txt', False)
4 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_hub/registry.py in __call__(self, *args, **kwargs)
43 raise RuntimeError(
44 "Missing implementation that supports: %s(*%r, **%r)" % (
---> 45 self._name, args, kwargs))
46
47
RuntimeError: Missing implementation that supports: loader(*('/tmp/local_copy',), **{})
hub.Module works with local uncompressed paths, so you can change BERT_MODEL to another path and reuse the same code.
Example:
Create local copy of the module:
mkdir /tmp/local_copy
wget "https://tfhub.dev/google/bert_multi_cased_L-12_H-768_A-12/1?tf-hub-format=compressed" -O "module.tar.gz"
tar -C /tmp/local_copy -xzvf module.tar.gz
Use the local copy of the module:
import tensorflow as tf
import tensorflow_hub as hub
def get_bert_tokenizer_info(bert_module):
"""Get the vocab file and casing info from the Hub module."""
with tf.Graph().as_default():
bert_module = hub.Module(bert_module)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
tokenization_info["do_lower_case"]])
return vocab_file, do_lower_case
print(get_bert_tokenizer_info("/tmp/local_copy"))
# Will print: (b'/tmp/local_copy/assets/vocab.txt', False)

ValueError: np.nan is an invalid document, expected byte or unicode string

I am trying to perform sentiment analysis on Uber-Review. I have used Naive bays sklearn to perform sentiment analyis,I used trianing data from kaggle on reviwes,
But The test data is in xlsx sheet, I used pandas to create data frame,
import pandas as pd
test=pd.read_excel("uber.xlsx",sep="\t",encoding="ISO-8859-1");
test.head(3)
as it returned d:type object, I transformed it to list using this
test_text = []
for comments in comments_t:
test_text.append(comments)
My code for classifying text based on training data:
# Training Phase
from sklearn.naive_bayes import BernoulliNB
classifier = BernoulliNB().fit(train_documents,labels)
def sentiment(word):
return classifier.predict(count_vectorizer.transform([word]))
but while predicting it return this value error:
/anaconda3/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in transform(self, raw_documents)
1084
1085 # use the same matrix-building strategy as fit_transform
-> 1086 _, X = self._count_vocab(raw_documents, fixed_vocab=True)
1087 if self.binary:
1088 X.data.fill(1)
/anaconda3/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in _count_vocab(self, raw_documents, fixed_vocab)
940 for doc in raw_documents:
941 feature_counter = {}
--> 942 for feature in analyze(doc):
943 try:
944 feature_idx = vocabulary[feature]
/anaconda3/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in <lambda>(doc)
326 tokenize)
327 return lambda doc: self._word_ngrams(
--> 328 tokenize(preprocess(self.decode(doc))), stop_words)
329
330 else:
/anaconda3/lib/python3.7/site-packages/sklearn/feature_extraction/text.py in decode(self, doc)
141
142 if doc is np.nan:
--> 143 raise ValueError("np.nan is an invalid document, expected byte or "
144 "unicode string.")
145
ValueError: np.nan is an invalid document, expected byte or unicode string.
I tried to solve according to this:
https://stackoverflow.com/questions/39303912/tfidfvectorizer-in-scikit-learn-valueerror-np-nan-is-an-invalid-document
the Data that i have found in Kaggle for Uber is https://www.kaggle.com/purvank/uber-rider-reviews-dataset/downloads/Uber_Ride_Reviews.csv/2
now coming to your problem
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import BernoulliNB
df = pd.read_csv('Uber_Ride_Reviews.csv')
df.head()
Out[7]:
ride_review ... sentiment
0 I completed running New York Marathon requeste... ... 0
1 My appointment time auto repairs required earl... ... 0
2 Whether I using Uber ride service Uber Eats or... ... 0
3 Why hard understand I trying retrieve Uber cab... ... 0
4 I South Beach FL I staying major hotel ordered... ... 0
df.columns
Out[8]: Index(['ride_review', 'ride_rating', 'sentiment'], dtype='object')
vect = CountVectorizer()
vect1 = vect.fit_transform(df['ride_review'])
classifier = BernoulliNB()
classifier.fit(vect1,df['sentiment'])
# predicting new comment it is giving O/p
new_test_= vect.transform(['uber ride is very good'])
classifier.predict(new_test_)
Out[5]: array([0], dtype=int64)
# but when applying your function sentiment you are only passing word, you need to
#passclassifier as well as Countvectorizer instance
def sentiment(word, classifier, vect):
return classifier.predict(vect.transform([word]))
#calling above function for new sentiment
sentiment('uber ride is very good', vect, classifier)
O/p --> Out[10]: array([0], dtype=int64)