Issue accessing S3 from Tensorflow - tensorflow

With the following config.
os.environ['AWS_ACCESS_KEY_ID'] = 'xxxxxx'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'xxxxxxxx'
os.environ['AWS_REGION'] = 'us-west-2'
os.environ['S3_ENDPOINT'] = 's3-us-west-2.amazonaws.com'
os.environ['S3_USE_HTTPS'] = '1'
os.environ['S3_VERIFY_SSL'] = '1'
print(file_io.stat('s3://abcd/def.txt'))
I get the error
/usr/local/lib/python3.6/dist-packages/tensorflow/python/lib/io/file_io.py in stat(filename)
556 with errors.raise_exception_on_not_ok_status() as status:
557 pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status)
--> 558 return file_statistics
559
560
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
NotFoundError: Object s3://abcd/def.txt does not exist
Note this file does exist.
I also get the following error on a write and close.
UnknownError: PermanentRedirect: Unable to parse ExceptionName: PermanentRedirect Message: The bucket you are attempting to access must be addressed using the specified endpoint. Please send all future requests to this endpoint.
What more is needed to fix this?

This is how my config looks like:
import os
os.environ['AWS_REGION'] = 'us-west-2'
os.environ['S3_ENDPOINT'] = 'https://s3-us-west-2.amazonaws.com'
os.environ['S3_VERIFY_SSL'] = '0'
I think you have to change from
os.environ['S3_ENDPOINT'] = 's3-us-west-2.amazonaws.com'
to
os.environ['S3_ENDPOINT'] = 'https://s3-us-west-2.amazonaws.com'
Here is a link for your reference.

you can use endpoint like
mybucket.s3-us-west-2.amazonaws.com
then use s3://pathtofile to access

Related

I created a simple class that works without #jitclass. When I try to improve it with #jitclass it stops working. What is happening here?

Following example 12.4 from the following website https://python-programming.quantecon.org/numba.html#id4 i constructed a simple class to model an AR(1) process.
Although the code works fine without the use of #jitclass, the code stops working after I remove ("#").
import numpy as np
import numba
import matplotlib.pyplot as plt
from numba import float64
from numba import int32
from numba.experimental import jitclass
#ar_1_data = [('ρ', float64), ('z_0', float64), ('μ', float64), ('σ', float64)]
##jitclass(ar_1_data)
class ar_1:
def __init__(self, ρ = 0.5, z_0 = 1, μ = 0, σ = 1):
self.ρ = ρ
self.z = z_0
self.lnz = np.log(z_0)
self.μ = μ
self.σ = σ
def update(self):
self.z = self.z**(self.ρ) * np.e**(np.random.normal(self.μ,self.σ))
def sequence(self, n):
path = []
path_log = []
for i in range(n):
path.append(self.z)
path_log.append(np.log(self.z))
self.update()
self.sequence = path
self.sequence_log = path_log
a = ar_1()
a.sequence(100)
Here is the Error im getting after removing the "#":
---------------------------------------------------------------------------
TypingError Traceback (most recent call last)
Input In [83], in <cell line: 1>()
----> 1 a = ar_1()
2 a.sequence(100)
File ~\anaconda3\lib\site-packages\numba\experimental\jitclass\base.py:124, in JitClassType.__call__(cls, *args, **kwargs)
122 bind = cls._ctor_sig.bind(None, *args, **kwargs)
123 bind.apply_defaults()
--> 124 return cls._ctor(*bind.args[1:], **bind.kwargs)
File ~\anaconda3\lib\site-packages\numba\core\dispatcher.py:468, in _DispatcherBase._compile_for_args(self, *args, **kws)
464 msg = (f"{str(e).rstrip()} \n\nThis error may have been caused "
465 f"by the following argument(s):\n{args_str}\n")
466 e.patch_message(msg)
--> 468 error_rewrite(e, 'typing')
469 except errors.UnsupportedError as e:
470 # Something unsupported is present in the user code, add help info
471 error_rewrite(e, 'unsupported_error')
File ~\anaconda3\lib\site-packages\numba\core\dispatcher.py:409, in _DispatcherBase._compile_for_args.<locals>.error_rewrite(e, issue_type)
407 raise e
408 else:
--> 409 raise e.with_traceback(None)
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Failed in nopython mode pipeline (step: nopython frontend)
Cannot resolve setattr: (instance.jitclass.ar_1#2658a3e4d30<ρ:float64,z_0:float64,μ:float64,σ:float64>).z = int64
File "..\..\..\..\..\AppData\Local\Temp\ipykernel_17336\4275445632.py", line 9:
<source missing, REPL/exec in use?>
During: typing of set attribute 'z' at C:\Users\Hogar\AppData\Local\Temp\ipykernel_17336\4275445632.py (9)
File "..\..\..\..\..\AppData\Local\Temp\ipykernel_17336\4275445632.py", line 9:
<source missing, REPL/exec in use?>
During: resolving callee type: jitclass.ar_1#2658a3e4d30<ρ:float64,z_0:float64,μ:float64,σ:float64>
During: typing of call at <string> (3)
During: resolving callee type: jitclass.ar_1#2658a3e4d30<ρ:float64,z_0:float64,μ:float64,σ:float64>
During: typing of call at <string> (3)
File "<string>", line 3:
<source missing, REPL/exec in use?>

KeyBERT package is not working on Google Colab

I'm using KeyBERT on Google Colab to extract keywords from the text.
from keybert import KeyBERT
model = KeyBERT('distilbert-base-nli-mean-tokens')
text_keywords = model.extract_keywords(my_long_text)
But I get the following error:
OSError: Model name 'distilbert-base-nli-mean-token' was not found in model name list (distilbert-base-uncased, distilbert-base-uncased-distilled-squad). We assumed 'distilbert-base-nli-mean-token' was a path or url to a configuration file named config.json or a directory containing such a file but couldn't find any such file at this path or url.
Any idea how to fix this?
Thanks
Exception when trying to download http://sbert.net/models/distilbert-base-nli-mean-token.zip. Response 404
SentenceTransformer-Model http://sbert.net/models/distilbert-base-nli-mean-token.zip not found. Try to create it from scratch
Try to create Transformer Model distilbert-base-nli-mean-token with mean pooling
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/sentence_transformers/SentenceTransformer.py in __init__(self, model_name_or_path, modules, device)
78 zip_save_path = os.path.join(model_path_tmp, 'model.zip')
---> 79 http_get(model_url, zip_save_path)
80 with ZipFile(zip_save_path, 'r') as zip:
11 frames
/usr/local/lib/python3.7/dist-packages/sentence_transformers/util.py in http_get(url, path)
241 print("Exception when trying to download {}. Response {}".format(url, req.status_code), file=sys.stderr)
--> 242 req.raise_for_status()
243 return
/usr/local/lib/python3.7/dist-packages/requests/models.py in raise_for_status(self)
940 if http_error_msg:
--> 941 raise HTTPError(http_error_msg, response=self)
942
HTTPError: 404 Client Error: Not Found for url: https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/v0.2/distilbert-base-nli-mean-token.zip
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/transformers/configuration_utils.py in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
133 that will be used by default in the :obj:`generate` method of the model. In order to get the tokens of the
--> 134 words that should not appear in the generated text, use :obj:`tokenizer.encode(bad_word,
135 add_prefix_space=True)`.
/usr/local/lib/python3.7/dist-packages/transformers/file_utils.py in cached_path(url_or_filename, cache_dir, force_download, proxies)
181 except importlib_metadata.PackageNotFoundError:
--> 182 _timm_available = False
183
OSError: file distilbert-base-nli-mean-token not found
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-59-d0fa7b6b7cd1> in <module>()
1 doc = full_text
----> 2 model = KeyBERT('distilbert-base-nli-mean-token')
/usr/local/lib/python3.7/dist-packages/keybert/model.py in __init__(self, model)
46 * https://www.sbert.net/docs/pretrained_models.html
47 """
---> 48 self.model = select_backend(model)
49
50 def extract_keywords(self,
/usr/local/lib/python3.7/dist-packages/keybert/backend/_utils.py in select_backend(embedding_model)
40 # Create a Sentence Transformer model based on a string
41 if isinstance(embedding_model, str):
---> 42 return SentenceTransformerBackend(embedding_model)
43
44 return SentenceTransformerBackend("xlm-r-bert-base-nli-stsb-mean-tokens")
/usr/local/lib/python3.7/dist-packages/keybert/backend/_sentencetransformers.py in __init__(self, embedding_model)
33 self.embedding_model = embedding_model
34 elif isinstance(embedding_model, str):
---> 35 self.embedding_model = SentenceTransformer(embedding_model)
36 else:
37 raise ValueError("Please select a correct SentenceTransformers model: \n"
/usr/local/lib/python3.7/dist-packages/sentence_transformers/SentenceTransformer.py in __init__(self, model_name_or_path, modules, device)
93 save_model_to = model_path
94 model_path = None
---> 95 transformer_model = Transformer(model_name_or_path)
96 pooling_model = Pooling(transformer_model.get_word_embedding_dimension())
97 modules = [transformer_model, pooling_model]
/usr/local/lib/python3.7/dist-packages/sentence_transformers/models/Transformer.py in __init__(self, model_name_or_path, max_seq_length, model_args, cache_dir, tokenizer_args, do_lower_case)
25 self.do_lower_case = do_lower_case
26
---> 27 config = AutoConfig.from_pretrained(model_name_or_path, **model_args, cache_dir=cache_dir)
28 self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=config, cache_dir=cache_dir)
29 self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, cache_dir=cache_dir, **tokenizer_args)
/usr/local/lib/python3.7/dist-packages/transformers/configuration_auto.py in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
/usr/local/lib/python3.7/dist-packages/transformers/configuration_utils.py in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
144 after the :obj:`decoder_start_token_id`. Useful for multilingual models like :doc:`mBART
145 <../model_doc/mbart>` where the first generated token needs to be the target language token.
--> 146 - **forced_eos_token_id** (:obj:`int`, `optional`) -- The id of the token to force as the last generated token
147 when :obj:`max_length` is reached.
148 - **remove_invalid_values** (:obj:`bool`, `optional`) -- Whether to remove possible `nan` and `inf` outputs of
OSError: Model name 'distilbert-base-nli-mean-token' was not found in model name list (distilbert-base-uncased, distilbert-base-uncased-distilled-squad). We assumed 'distilbert-base-nli-mean-token' was a path or url to a configuration file named config.json or a directory containing such a file but couldn't find any such file at this path or url.
I couldn't reproduce this issue with the code you've provided but from the provided error message I believe you're just missing an 's' in the model name so just make sure that the model name is as follows:
distilbert-base-nli-mean-tokens
and not
distilbert-base-nli-mean-token
Also refer to this link for all models available for use.

How to make a custom metric available to TFMA/Beam?

I have created a custom Keras metric, similar to the demo implementation below:
import tensorflow as tf
class MyMetric(tf.keras.metrics.Mean):
def __init__(self, name='my_metric', dtype=None):
super(MyMetric, self).__init__(name=name, dtype=dtype)
def update_state(self, y_true, y_pred, sample_weight=None):
return super(MyMetric, self).update_state(
y_pred, sample_weight=sample_weight)
I have turned the implementation into a Python module with the init/main files and added the path to the system's PYTHONPATH.
I can use the metric when I train the Keras model.
Unfortunately, I haven't found a way to make the custom metric available to TensorFlow Model Analysis (TFMA).
In my interactive context notebook, I can load the metric when I create the eval_config.
import tensorflow as tf
import tensorflow_model_analysis as tfma
from mymetric.metric import MyMetric
metrics = [MyMetric()]
metrics_specs = tfma.metrics.specs_from_metrics(metrics)
eval_config = tfma.EvalConfig(
model_specs=[tfma.ModelSpec(label_key='label_xf')],
metrics_specs=metrics_specs,
slicing_specs=[tfma.SlicingSpec()]
)
evaluator = Evaluator(
examples=example_gen.outputs['examples'],
model=trainer.outputs['model'],
baseline_model=model_resolver.outputs['model'],
eval_config=eval_config)
When I try to execute the evaluator, the metric is listed as in the metric specifications
metrics_specs {
metrics {
class_name: "MyMetric"
config: "{\"dtype\": \"float32\", \"name\": \"my_metric\"}"
threshold {
}
}
}
but the execution fails with the error
ValueError: Unknown metric function: MyMetric
Since the metric calculation is executed via Apache Beam's executor.Do function, I assume that Beam can't find the module (even though it is on the PYTHONPATH). If that is the case, how can I make the module available to Apache Beam beyond the PYTHONPATH configuration?
Traceback:
/usr/local/lib/python3.6/dist-packages/tensorflow_model_analysis/metrics/metric_specs.py in _deserialize_tf_metric(metric_config, custom_objects)
741 cls_name, cfg = _tf_class_and_config(metric_config)
742 with tf.keras.utils.custom_object_scope(custom_objects):
--> 743 return tf.keras.metrics.deserialize({'class_name': cls_name, 'config': cfg})
744
745
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/metrics.py in deserialize(config, custom_objects)
3441 module_objects=globals(),
3442 custom_objects=custom_objects,
-> 3443 printable_module_name='metric function')
3444
3445
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in deserialize_keras_object(identifier, module_objects, custom_objects, printable_module_name)
345 config = identifier
346 (cls, cls_config) = class_and_config_for_serialized_keras_object(
--> 347 config, module_objects, custom_objects, printable_module_name)
348
349 if hasattr(cls, 'from_config'):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in class_and_config_for_serialized_keras_object(config, module_objects, custom_objects, printable_module_name)
294 cls = get_registered_object(class_name, custom_objects, module_objects)
295 if cls is None:
--> 296 raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
297
298 cls_config = config['config']
ValueError: Unknown metric function: MyMetric
You need to specify the module so that TFX knows where to find your MyMetric class. One way of doing this is to specify it as part of the metric specs:
from tensorflow_model_analysis import config
metric_config = [config.MetricConfig(class_name='MyMetric', module='mymodule.mymetric')]
metrics_specs = [config.MetricsSpec(metrics=metric_config)]
You will also need to create a module called mymodule and put your MyMetric class in in mymetric.py for this to work. Also make sure that the module is accessible from where you are executing the code (which should be the case if you have added it to your PYTHONPATH).

Using Sagemaker predictor in a Spark UDF function

I am trying to run inference on a Tensorflow model deployed on SageMaker from a Python Spark job.
I am running a (Databricks) notebook which has the following cell:
def call_predict():
batch_size = 1
data = [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2]]
tensor_proto = tf.make_tensor_proto(values=np.asarray(data), shape=[batch_size, len(data[0])], dtype=tf.float32)
prediction = predictor.predict(tensor_proto)
print("Process time: {}".format((time.clock() - start)))
return prediction
If I just call call_predict() it works fine:
call_predict()
and I get the output:
Process time: 65.261396
Out[61]: {'model_spec': {'name': u'generic_model',
'signature_name': u'serving_default',
'version': {'value': 1578909324L}},
'outputs': {u'ages': {'dtype': 1,
'float_val': [5.680944442749023],
'tensor_shape': {'dim': [{'size': 1L}]}}}}
but when I try to call from a Spark context (in a UDF) I get a serialization error.
The code I'm trying to run is:
dataRange = range(1, 10001)
rangeRDD = sc.parallelize(dataRange, 8)
new_data = rangeRDD.map(lambda x : call_predict())
new_data.count()
and the error I get is:
---------------------------------------------------------------------------
PicklingError Traceback (most recent call last)
<command-2282434> in <module>()
2 rangeRDD = sc.parallelize(dataRange, 8)
3 new_data = rangeRDD.map(lambda x : call_predict())
----> 4 new_data.count()
5
/databricks/spark/python/pyspark/rdd.pyc in count(self)
1094 3
1095 """
-> 1096 return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
1097
1098 def stats(self):
/databricks/spark/python/pyspark/rdd.pyc in sum(self)
1085 6.0
1086 """
-> 1087 return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
1088
1089 def count(self):
/databricks/spark/python/pyspark/rdd.pyc in fold(self, zeroValue, op)
956 # zeroValue provided to each partition is unique from the one provided
957 # to the final reduce call
--> 958 vals = self.mapPartitions(func).collect()
959 return reduce(op, vals, zeroValue)
960
/databricks/spark/python/pyspark/rdd.pyc in collect(self)
829 # Default path used in OSS Spark / for non-credential passthrough clusters:
830 with SCCallSiteSync(self.context) as css:
--> 831 sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
832 return list(_load_from_socket(sock_info, self._jrdd_deserializer))
833
/databricks/spark/python/pyspark/rdd.pyc in _jrdd(self)
2573
2574 wrapped_func = _wrap_function(self.ctx, self.func, self._prev_jrdd_deserializer,
-> 2575 self._jrdd_deserializer, profiler)
2576 python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), wrapped_func,
2577 self.preservesPartitioning, self.is_barrier)
/databricks/spark/python/pyspark/rdd.pyc in _wrap_function(sc, func, deserializer, serializer, profiler)
2475 assert serializer, "serializer should not be empty"
2476 command = (func, profiler, deserializer, serializer)
-> 2477 pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
2478 return sc._jvm.PythonFunction(bytearray(pickled_command), env, includes, sc.pythonExec,
2479 sc.pythonVer, broadcast_vars, sc._javaAccumulator)
/databricks/spark/python/pyspark/rdd.pyc in _prepare_for_python_RDD(sc, command)
2461 # the serialized command will be compressed by broadcast
2462 ser = CloudPickleSerializer()
-> 2463 pickled_command = ser.dumps(command)
2464 if len(pickled_command) > sc._jvm.PythonUtils.getBroadcastThreshold(sc._jsc): # Default 1M
2465 # The broadcast will have same life cycle as created PythonRDD
/databricks/spark/python/pyspark/serializers.pyc in dumps(self, obj)
709 msg = "Could not serialize object: %s: %s" % (e.__class__.__name__, emsg)
710 cloudpickle.print_exec(sys.stderr)
--> 711 raise pickle.PicklingError(msg)
712
713
PicklingError: Could not serialize object: TypeError: can't pickle _ssl._SSLSocket objects
Not sure what is this serialization error - does is complain about failing to deserialize the Predictor
My notebook has a cell which was called prior to the above cells with the following imports:
import sagemaker
import boto3
from sagemaker.tensorflow.model import TensorFlowPredictor
import tensorflow as tf
import numpy as np
import time
The Predictor was created with the following code:
sagemaker_client = boto3.client('sagemaker', aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY, region_name='us-east-1')
sagemaker_runtime_client = boto3.client('sagemaker-runtime', aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY, region_name='us-east-1')
boto_session = boto3.Session(region_name='us-east-1')
sagemaker_session = sagemaker.Session(boto_session, sagemaker_client=sagemaker_client, sagemaker_runtime_client=sagemaker_runtime_client)
predictor = TensorFlowPredictor('endpoint-poc', sagemaker_session)
The udf function will be executed by multiple spark tasks in parallel. Those tasks run in completely isolated python processes and they are scheduled to physically different machines. Hence each data, those functions reference, must be on the same node. This is the case for everything created within the udf.
Whenever you reference any object outside of the udf from the function, this data structure needs to be serialised (pickled) to each executor. Some object state, like open connections to a socket, cannot be pickled.
You need to make sure, that connections are lazily opened each executor. It must happen only on the first function call on that executor. The connection pooling topic is covered in the docs, however only in the spark streaming guide (though it also applies for normal batch jobs).
Normally one can use the Singleton Pattern for this. But in python people use the Borgh pattern.
class Env:
_shared_state = {
"sagemaker_client": None
"sagemaker_runtime_client": None
"boto_session": None
"sagemaker_session": None
"predictor": None
}
def __init__(self):
self.__dict__ = self._shared_state
if not self.predictor:
self.sagemaker_client = boto3.client('sagemaker', aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, region_name='us-east-1')
self.sagemaker_runtime_client = boto3.client('sagemaker-runtime', aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, region_name='us-east-1')
self.boto_session = boto3.Session(region_name='us-east-1')
self.sagemaker_session = sagemaker.Session(self.boto_session, sagemaker_client=self.sagemaker_client, sagemaker_runtime_client=self.sagemaker_runtime_client)
self.predictor = TensorFlowPredictor('endpoint-poc', self.sagemaker_session)
#....
def call_predict():
env = Env()
batch_size = 1
data = [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2]]
tensor_proto = tf.make_tensor_proto(values=np.asarray(data), shape=[batch_size, len(data[0])], dtype=tf.float32)
prediction = env.predictor.predict(tensor_proto)
print("Process time: {}".format((time.clock() - start)))
return prediction
new_data = rangeRDD.map(lambda x : call_predict())
The Env class is defined on the master node. Its _shared_state has empty entries. When then Env object is instantiated first time, it shares the state with all further instances of Env on any subsequent call to the udf. On each separate parallel running process this will happen exactly one time. This way the sessions are shared and do not need to pickled.

authentication - connect to datalab from compute engine

I would like to connect to bigQuery from datalab and execute update commands.
I run the following code for the API & authentication:
from google.cloud import bigquery
# Get everything we possibly can from the service account JSON file
#set GOOGLE_APPLICATION_CREDENTIALS
cred = bigquery.Client.from_service_account_json('OrielResearch-da46e752c7ff.json')
# Instantiates a client
client = bigquery.Client(project='speedy-emissary-167213',credentials=cred)
# The name of the dataset
dataset_name = 'pgp_orielresearch'
# The name of the table
table_name = 'update_queries'
# Perform a synchronous query.
QUERY = (
'SELECT * FROM [speedy-emissary-167213:pgp_orielresearch.update_queries]')
query = client.run_sync_query(QUERY)
dataset = client.dataset(dataset_name)
tables, token = dataset.list_tables()
and get the following error:
AttributeError: 'Client' object has no attribute 'authorize'
any idea?
the full stack is:
AttributeErrorTraceback (most recent call last)
<ipython-input-2-616f54fa35ba> in <module>()
19 query = client.run_sync_query(QUERY)
20 dataset = client.dataset(dataset_name)
---> 21 t = dataset.list_tables()
22 #query.timeout_ms = TIMEOUT_MS
23 #query.run()
/usr/local/lib/python2.7/dist-packages/google/cloud/bigquery/dataset.py in list_tables(self, max_results, page_token)
568 connection = self._client.connection
569 resp = connection.api_request(method='GET', path=path,
--> 570 query_params=params)
571 tables = [Table.from_api_repr(resource, self)
572 for resource in resp.get('tables', ())]
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in api_request(self, method, path, query_params, data, content_type, api_base_url, api_version, expect_json, _target_object)
344 response, content = self._make_request(
345 method=method, url=url, data=data, content_type=content_type,
--> 346 target_object=_target_object)
347
348 if not 200 <= response.status < 300:
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in _make_request(self, method, url, data, content_type, headers, target_object)
242 headers['User-Agent'] = self.USER_AGENT
243
--> 244 return self._do_request(method, url, headers, data, target_object)
245
246 def _do_request(self, method, url, headers, data,
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in _do_request(self, method, url, headers, data, target_object)
270 :returns: The HTTP response object and the content of the response.
271 """
--> 272 return self.http.request(uri=url, method=method, headers=headers,
273 body=data)
274
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in http(self)
101 self._http = httplib2.Http()
102 if self._credentials:
--> 103 self._http = self._credentials.authorize(self._http)
104 return self._http
105
AttributeError: 'Client' object has no attribute 'authorize'
Trying setting the credentials like so:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'OrielResearch-da46e752c7ff.json'
from google.cloud.bigquery.client import Client
client = Client()