authentication - connect to datalab from compute engine - google-bigquery

I would like to connect to bigQuery from datalab and execute update commands.
I run the following code for the API & authentication:
from google.cloud import bigquery
# Get everything we possibly can from the service account JSON file
#set GOOGLE_APPLICATION_CREDENTIALS
cred = bigquery.Client.from_service_account_json('OrielResearch-da46e752c7ff.json')
# Instantiates a client
client = bigquery.Client(project='speedy-emissary-167213',credentials=cred)
# The name of the dataset
dataset_name = 'pgp_orielresearch'
# The name of the table
table_name = 'update_queries'
# Perform a synchronous query.
QUERY = (
'SELECT * FROM [speedy-emissary-167213:pgp_orielresearch.update_queries]')
query = client.run_sync_query(QUERY)
dataset = client.dataset(dataset_name)
tables, token = dataset.list_tables()
and get the following error:
AttributeError: 'Client' object has no attribute 'authorize'
any idea?
the full stack is:
AttributeErrorTraceback (most recent call last)
<ipython-input-2-616f54fa35ba> in <module>()
19 query = client.run_sync_query(QUERY)
20 dataset = client.dataset(dataset_name)
---> 21 t = dataset.list_tables()
22 #query.timeout_ms = TIMEOUT_MS
23 #query.run()
/usr/local/lib/python2.7/dist-packages/google/cloud/bigquery/dataset.py in list_tables(self, max_results, page_token)
568 connection = self._client.connection
569 resp = connection.api_request(method='GET', path=path,
--> 570 query_params=params)
571 tables = [Table.from_api_repr(resource, self)
572 for resource in resp.get('tables', ())]
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in api_request(self, method, path, query_params, data, content_type, api_base_url, api_version, expect_json, _target_object)
344 response, content = self._make_request(
345 method=method, url=url, data=data, content_type=content_type,
--> 346 target_object=_target_object)
347
348 if not 200 <= response.status < 300:
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in _make_request(self, method, url, data, content_type, headers, target_object)
242 headers['User-Agent'] = self.USER_AGENT
243
--> 244 return self._do_request(method, url, headers, data, target_object)
245
246 def _do_request(self, method, url, headers, data,
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in _do_request(self, method, url, headers, data, target_object)
270 :returns: The HTTP response object and the content of the response.
271 """
--> 272 return self.http.request(uri=url, method=method, headers=headers,
273 body=data)
274
/usr/local/lib/python2.7/dist-packages/google/cloud/connection.pyc in http(self)
101 self._http = httplib2.Http()
102 if self._credentials:
--> 103 self._http = self._credentials.authorize(self._http)
104 return self._http
105
AttributeError: 'Client' object has no attribute 'authorize'

Trying setting the credentials like so:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'OrielResearch-da46e752c7ff.json'
from google.cloud.bigquery.client import Client
client = Client()

Related

403 forbbiden block - how to fixed it?

I have been trying to figure out a way to web scrap 500 analysts and more without getting blocked. Is there a way to fix the 403 forbidden error?
Will I need to go incognito, or have multiple accounts?
Added the error code it gives me but I notice it stops at analyst # 18. I checked and nothing is wrong with that analyst. It has to do the website blocking me for webscrapping.
ID1 = Predictions['analyst'].drop_duplicates()
len(ID1)
len(ID)
IDD = ID1.loc[0:500]
Analyst = []
Analyst
path = r'/Users/ashleyrabanales/chromedriver' #f path
driver_service = Service(executable_path=path)
driver = webdriver.Chrome(service=driver_service)
url = 'https://estimize.com/'
for ID in IDD:
time.sleep(random.uniform(2,5))
driver.get(f"{url}/users/{ID}")
time.sleep(random.uniform(2,3))
# row to append to final list/dataframe
try:
login = driver.find_element('xpath','//*[#id="top-navigation"]/div[1]/ul/li[8]/a')
login.click()
time.sleep(random.uniform(3,8))
# list username and password as variables
username = 'cbrown180#student.gsu.edu'
password = '123456'
# find email field in HTML and send username to field -- sleep 2 seconds
email_field = driver.find_element('name','user[login]')
email_field.send_keys(username)
time.sleep(random.uniform(2,9))
# find the password field and input password and submit
password_field = driver.find_element('name','user[password]')
password_field.send_keys(password)
password_field.submit()
time.sleep(random.uniform(3,3))
except NoSuchElementException:
pass
row = {}
# create variable to assign ticker to the row
row['AnalystID'] = ID
# grab analyst id... not just the name grab the data user to use in url for final scraper
name = driver.find_element('xpath',f'//*[#id="users_show"]/div[4]/div[1]/div[1]/div[1]/div/h1/a').text
row['Name'] = name
role = driver.find_element('xpath', f'//*[#id="users_show"]/div[4]/div[1]/div[1]/div[1]/div/ul').text
row['Role'] = role
join_date = driver.find_element('xpath', f'//*[#id="users_show"]/div[4]/div[1]/div[1]/div[1]/div/div[2]/div[2]').text
row['Join Date'] = join_date
cs = driver.find_element('xpath', f'//*[#id="confidence-wrap"]/div/div[2]').text
row['Analyst Confidence Score'] = cs
Err = driver.find_element('xpath',f'//*[#id="profile-tab-wrap"]/div[1]/div[1]/div[3]').text
row['Error rate'] = Err
Accu = driver.find_element('xpath',f'//*[#id="profile-tab-wrap"]/div[1]/div[2]/div[2]').text
row['Accuracy Percentile'] = Accu
Points = driver.find_element('xpath',f'//*[#id="profile-tab-wrap"]/div[2]/div[1]/div[3]').text
row['Points'] = Points
PointsE = driver.find_element('xpath',f'//*[#id="profile-tab-wrap"]/div[2]/div[2]/div[2]').text
row['Points/Estimate'] = PointsE
Stocks = driver.find_element('xpath',f'//*[#id="profile-tab-wrap"]/div[3]/div[1]/div[3]').text
row['Stocks'] = Stocks
Pending = driver.find_element('xpath',f'//*[#id="profile-tab-wrap"]/div[3]/div[2]/div[3]').text
row['Pending'] = Pending
if row not in Analyst:
Analyst.append(row)
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py in find_element(self, by, value)
859 value = '[name="%s"]' % value
860
--> 861 return self.execute(Command.FIND_ELEMENT, {"using": by, "value": value})["value"]
862
863 def find_elements(self, by=By.ID, value: Optional[str] = None) -> List[WebElement]:
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
442 response = self.command_executor.execute(driver_command, params)
443 if response:
--> 444 self.error_handler.check_response(response)
445 response["value"] = self._unwrap_value(response.get("value", None))
446 return response
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
247 alert_text = value["alert"].get("text")
...
18 chromedriver 0x000000010849d81e chromedriver + 4782110
19 libsystem_pthread.dylib 0x00007fff765332eb _pthread_body + 126
20 libsystem_pthread.dylib 0x00007fff76536249 _pthread_start + 66
21 libsystem_pthread.dylib 0x00007fff7653240d thread_start + 13

KeyBERT package is not working on Google Colab

I'm using KeyBERT on Google Colab to extract keywords from the text.
from keybert import KeyBERT
model = KeyBERT('distilbert-base-nli-mean-tokens')
text_keywords = model.extract_keywords(my_long_text)
But I get the following error:
OSError: Model name 'distilbert-base-nli-mean-token' was not found in model name list (distilbert-base-uncased, distilbert-base-uncased-distilled-squad). We assumed 'distilbert-base-nli-mean-token' was a path or url to a configuration file named config.json or a directory containing such a file but couldn't find any such file at this path or url.
Any idea how to fix this?
Thanks
Exception when trying to download http://sbert.net/models/distilbert-base-nli-mean-token.zip. Response 404
SentenceTransformer-Model http://sbert.net/models/distilbert-base-nli-mean-token.zip not found. Try to create it from scratch
Try to create Transformer Model distilbert-base-nli-mean-token with mean pooling
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/sentence_transformers/SentenceTransformer.py in __init__(self, model_name_or_path, modules, device)
78 zip_save_path = os.path.join(model_path_tmp, 'model.zip')
---> 79 http_get(model_url, zip_save_path)
80 with ZipFile(zip_save_path, 'r') as zip:
11 frames
/usr/local/lib/python3.7/dist-packages/sentence_transformers/util.py in http_get(url, path)
241 print("Exception when trying to download {}. Response {}".format(url, req.status_code), file=sys.stderr)
--> 242 req.raise_for_status()
243 return
/usr/local/lib/python3.7/dist-packages/requests/models.py in raise_for_status(self)
940 if http_error_msg:
--> 941 raise HTTPError(http_error_msg, response=self)
942
HTTPError: 404 Client Error: Not Found for url: https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/v0.2/distilbert-base-nli-mean-token.zip
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/transformers/configuration_utils.py in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
133 that will be used by default in the :obj:`generate` method of the model. In order to get the tokens of the
--> 134 words that should not appear in the generated text, use :obj:`tokenizer.encode(bad_word,
135 add_prefix_space=True)`.
/usr/local/lib/python3.7/dist-packages/transformers/file_utils.py in cached_path(url_or_filename, cache_dir, force_download, proxies)
181 except importlib_metadata.PackageNotFoundError:
--> 182 _timm_available = False
183
OSError: file distilbert-base-nli-mean-token not found
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-59-d0fa7b6b7cd1> in <module>()
1 doc = full_text
----> 2 model = KeyBERT('distilbert-base-nli-mean-token')
/usr/local/lib/python3.7/dist-packages/keybert/model.py in __init__(self, model)
46 * https://www.sbert.net/docs/pretrained_models.html
47 """
---> 48 self.model = select_backend(model)
49
50 def extract_keywords(self,
/usr/local/lib/python3.7/dist-packages/keybert/backend/_utils.py in select_backend(embedding_model)
40 # Create a Sentence Transformer model based on a string
41 if isinstance(embedding_model, str):
---> 42 return SentenceTransformerBackend(embedding_model)
43
44 return SentenceTransformerBackend("xlm-r-bert-base-nli-stsb-mean-tokens")
/usr/local/lib/python3.7/dist-packages/keybert/backend/_sentencetransformers.py in __init__(self, embedding_model)
33 self.embedding_model = embedding_model
34 elif isinstance(embedding_model, str):
---> 35 self.embedding_model = SentenceTransformer(embedding_model)
36 else:
37 raise ValueError("Please select a correct SentenceTransformers model: \n"
/usr/local/lib/python3.7/dist-packages/sentence_transformers/SentenceTransformer.py in __init__(self, model_name_or_path, modules, device)
93 save_model_to = model_path
94 model_path = None
---> 95 transformer_model = Transformer(model_name_or_path)
96 pooling_model = Pooling(transformer_model.get_word_embedding_dimension())
97 modules = [transformer_model, pooling_model]
/usr/local/lib/python3.7/dist-packages/sentence_transformers/models/Transformer.py in __init__(self, model_name_or_path, max_seq_length, model_args, cache_dir, tokenizer_args, do_lower_case)
25 self.do_lower_case = do_lower_case
26
---> 27 config = AutoConfig.from_pretrained(model_name_or_path, **model_args, cache_dir=cache_dir)
28 self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=config, cache_dir=cache_dir)
29 self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, cache_dir=cache_dir, **tokenizer_args)
/usr/local/lib/python3.7/dist-packages/transformers/configuration_auto.py in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
/usr/local/lib/python3.7/dist-packages/transformers/configuration_utils.py in from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
144 after the :obj:`decoder_start_token_id`. Useful for multilingual models like :doc:`mBART
145 <../model_doc/mbart>` where the first generated token needs to be the target language token.
--> 146 - **forced_eos_token_id** (:obj:`int`, `optional`) -- The id of the token to force as the last generated token
147 when :obj:`max_length` is reached.
148 - **remove_invalid_values** (:obj:`bool`, `optional`) -- Whether to remove possible `nan` and `inf` outputs of
OSError: Model name 'distilbert-base-nli-mean-token' was not found in model name list (distilbert-base-uncased, distilbert-base-uncased-distilled-squad). We assumed 'distilbert-base-nli-mean-token' was a path or url to a configuration file named config.json or a directory containing such a file but couldn't find any such file at this path or url.
I couldn't reproduce this issue with the code you've provided but from the provided error message I believe you're just missing an 's' in the model name so just make sure that the model name is as follows:
distilbert-base-nli-mean-tokens
and not
distilbert-base-nli-mean-token
Also refer to this link for all models available for use.

TypeError: Wrong number or type of arguments for overloaded function 'new_Date'

I am new to python. I am getting an error when running below code. The issue seems to be with date. can someone help me to correct i please. I have tried changing the date format in the excel but it does not solve the issue. The excel have a list of several bonds. I want to generate the coupon dates of the different bonds
BondData = pd.read_excel (r'C:\Users\Avishen\Desktop\Python\BONDDATA.xlsx')
Data = pd.DataFrame(BondData)
def scheduledates():
tenor = ql.Period(ql.Semiannual)
day_count = ql.Thirty360
calendar = ql.UnitedStates()
businessConvention = ql.Unadjusted
dateGeneration = ql.DateGeneration.Backward
monthEnd = False
# Dates in Bond Period
return ql.Schedule (issueDate, maturityDate, tenor, calendar, businessConvention,
businessConvention , dateGeneration, monthEnd)
new_df["Dates"]= Data.apply(lambda x: scheduledates(),axis = 1)
new_df["ISIN"] = Data.ISIN
new_df
Error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-4-877415e9cf83> in <module>
21 businessConvention , dateGeneration, monthEnd)
22
---> 23 new_df["Dates"]= Data.apply(lambda x: scheduledates(),axis = 1)
24 new_df["ISIN"] = Data.ISIN
25 new_df
~\anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7546 kwds=kwds,
7547 )
-> 7548 return op.get_result()
7549
7550 def applymap(self, func) -> "DataFrame":
~\anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):
~\anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
269
270 def apply_standard(self):
--> 271 results, res_index = self.apply_series_generator()
272
273 # wrap results
~\anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
298 for i, v in enumerate(series_gen):
299 # ignore SettingWithCopy here in case the user mutates
--> 300 results[i] = self.f(v)
301 if isinstance(results[i], ABCSeries):
302 # If we have a view on v, we need to make a copy because
<ipython-input-4-877415e9cf83> in <lambda>(x)
21 businessConvention , dateGeneration, monthEnd)
22
---> 23 new_df["Dates"]= Data.apply(lambda x: scheduledates(),axis = 1)
24 new_df["ISIN"] = Data.ISIN
25 new_df
<ipython-input-4-877415e9cf83> in scheduledates()
8
9 def scheduledates():
---> 10 issueDate = ql.Date(Data.issuedate)
11 maturityDate = ql.Date(Data.maturitydate)
12 tenor = ql.Period(ql.Semiannual)
~\anaconda3\lib\site-packages\QuantLib\QuantLib.py in __init__(self, *args)
425
426 def __init__(self, *args):
--> 427 _QuantLib.Date_swiginit(self, _QuantLib.new_Date(*args))
428
429 def weekdayNumber(self):
TypeError: Wrong number or type of arguments for overloaded function 'new_Date'.
Possible C/C++ prototypes are:
Date::Date()
Date::Date(Day,Month,Year)
Date::Date(Day,Month,Year,Hour,Minute,Second,Millisecond,Microsecond)
Date::Date(Day,Month,Year,Hour,Minute,Second,Millisecond)
Date::Date(Day,Month,Year,Hour,Minute,Second)
Date::Date(BigInteger)
Date::Date(std::string const &,std::string)
---------------------------------------------------------------------------
Data = pd.DataFrame(BondData)
Fields from Bond Data
ISIN
issuedate
maturitydate
coupon
Tradeyield
Bond_Price
MarketPrice
Nominal_Amount
From the traceback, the problem is the line:
issueDate = ql.Date(Data.issuedate)
(which for some reason is not in the code you pasted). Coming from Excel, issuedate should be an integer and thus compatible with the ql.Date constructor, but it's possible that pandas is reading it as a string or some other type. You should examine the data frame and check the type of the column. If it's not what you expect, you'll have to figure out if there are data in that column that pandas can't interpret as integers, and either clean them up of force the conversion somehow before passing them to ql.Date.

UnicodeDecodeError: While reading a csv file using .csvreader

Here's my code:
import csv
path = "/home/Downloads/sample_email.csv"
with open(path) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
print(row['first_name'], row['last_name'])
The error is:
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-47-d89ea200a227> in <module>
3 with open(path) as csvfile:
4 reader = csv.DictReader(csvfile)
----> 5 for i in reader:
6 print(i['first_name'], i['last_name'])
/usr/lib/python3.6/csv.py in __next__(self)
109 if self.line_num == 0:
110 # Used only for its side effect.
--> 111 self.fieldnames
112 row = next(self.reader)
113 self.line_num = self.reader.line_num
/usr/lib/python3.6/csv.py in fieldnames(self)
96 if self._fieldnames is None:
97 try:
---> 98 self._fieldnames = next(self.reader)
99 except StopIteration:
100 pass
/usr/lib/python3.6/codecs.py in decode(self, input, final)
319 # decode input (taking the buffer into account)
320 data = self.buffer + input
--> 321 (result, consumed) = self._buffer_decode(data, self.errors, final)
322 # keep undecoded input until the next call
323 self.buffer = data[consumed:]
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 596: invalid start byte
When i want to convert this file to df, i have to use :
df = pd.read_csv(path, sep=',', engine = 'python')
Any help?
My aim is , extract the data, and a create a email template. Any guide about this matter is also appriciated.

Issue accessing S3 from Tensorflow

With the following config.
os.environ['AWS_ACCESS_KEY_ID'] = 'xxxxxx'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'xxxxxxxx'
os.environ['AWS_REGION'] = 'us-west-2'
os.environ['S3_ENDPOINT'] = 's3-us-west-2.amazonaws.com'
os.environ['S3_USE_HTTPS'] = '1'
os.environ['S3_VERIFY_SSL'] = '1'
print(file_io.stat('s3://abcd/def.txt'))
I get the error
/usr/local/lib/python3.6/dist-packages/tensorflow/python/lib/io/file_io.py in stat(filename)
556 with errors.raise_exception_on_not_ok_status() as status:
557 pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status)
--> 558 return file_statistics
559
560
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
NotFoundError: Object s3://abcd/def.txt does not exist
Note this file does exist.
I also get the following error on a write and close.
UnknownError: PermanentRedirect: Unable to parse ExceptionName: PermanentRedirect Message: The bucket you are attempting to access must be addressed using the specified endpoint. Please send all future requests to this endpoint.
What more is needed to fix this?
This is how my config looks like:
import os
os.environ['AWS_REGION'] = 'us-west-2'
os.environ['S3_ENDPOINT'] = 'https://s3-us-west-2.amazonaws.com'
os.environ['S3_VERIFY_SSL'] = '0'
I think you have to change from
os.environ['S3_ENDPOINT'] = 's3-us-west-2.amazonaws.com'
to
os.environ['S3_ENDPOINT'] = 'https://s3-us-west-2.amazonaws.com'
Here is a link for your reference.
you can use endpoint like
mybucket.s3-us-west-2.amazonaws.com
then use s3://pathtofile to access