I want to get the excel file from the data frame created which automatically changes as written in the code - dataframe

i have tried two methods and both showing different location as given by me in this image
apikey='abcd'
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
import time
ts=TimeSeries(key=apikey,output_format='pandas')
data,metadata=ts.get_intraday(symbol='name',interval='1min',outputsize='full')
data
while True:
data, metadata=ts.get_intraday(symbol='TCS',interval='1min',outputsize='full')
data.to_excel('livedat.xlsx')
time.sleep(60)
The code is running properly but I don't know how to get the data file in excel.
imp- the method should get the file which is updated timely i.e 1min automaticaly.
Also i am using IBM watson studio to write the code.

I am not familiar with the alpha_vantage wrapper that you are using however this is how i would perform your question. The code works and i have included comments.
To get the file in the python script i would do pd.read_excel(filepath).
import requests
import pandas as pd
import time
import datetime
# Your API KEY and the URL we will request from
API_KEY = "YOUR API KEY"
url = "https://www.alphavantage.co/query?"
def Generate_file(symbol="IBM", interval="1min"):
# URL parameters
parameters = {"function": "TIME_SERIES_INTRADAY",
"symbol": symbol,
"interval": interval,
"apikey": API_KEY,
"outputsize": "compact"}
# get the json response from AlphaVantage
response = requests.get(url, params=parameters)
data = response.json()
# filter the response to only get the time series data we want
time_series_interval = f"Time Series ({interval})"
prices = data[time_series_interval]
# convert the filtered reponse to a Pandas DataFrame
df = pd.DataFrame.from_dict(prices, orient="index").reset_index()
df = df.rename(columns={"index": time_series_interval})
# create a timestampe for our excel file. So that the file does not get overriden with new data each time.
current_time = datetime.datetime.now()
file_timestamp = current_time.strftime("%Y%m%d_%H.%M")
filename = f"livedat_{file_timestamp}.xlsx"
df.to_excel(filename)
# sent a limit on the number of calls we make to prevent infinite loop
call_limit = 3
number_of_calls = 0
while(number_of_calls < call_limit):
Generate_file() # our function
number_of_calls += 1
time.sleep(60)

Related

How to scrape the tweets' external links content for text analytics?

I am using following code to scrape the news associated with tweets external links.
import snscrape.modules.twitter as sntwitter
import pandas as pd
import urllib
import re
import requests
# Creating list to append tweet data to a dataframe
tweets_list = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('#SkySportsNews OR #cnnsport since:2022-07-06').get_items()):
url = re.findall("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_#.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", tweet.content)
if len(url) != 0:
page = requests.get(url)
if i>18000:
break
tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username,url,page.text])
# Creating a dataframe from the tweets list above
tweets_df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username','url','text'])
But I am getting below error
> `InvalidSchema: No connection adapters were found for "['http link']"`
and its coming from page = requests.get(url)` line

Use URLs from List to save zip file

Trying to use urllib.request to read a list of urls from a shapefile, then download the zips from all those URLs. So far I got my list of a certain number of URLs, but I am unable to pass all of them through. The error is expected string or bytes-like object. Meaning theres prob an issue with the URL. As a side note, I also need to download them and name them by their file name/#. Need help!! Code below.
import arcpy
import urllib.request
import os
os.chdir('C:\\ProgInGIS\\FinalExam\\Final')
lidar_shp = 'C:\\ProgInGIS\\FinalExam\\Final\\lidar-2013.shp'
zip_file_download = 'C:\\ProgInGIS\\FinalExam\\Final\\file1.zip'
data = []
with arcpy.da.SearchCursor(lidar_shp,"*") as cursor:
for row in cursor:
data.append(row)
data.sort(key=lambda tup: tup[2])
i = 0
with arcpy.da.UpdateCursor(lidar_shp,"*") as cursor:
for row in cursor:
row = data[i]
i += 1
cursor.updateRow(row)
counter = 0
url_list = []
with arcpy.da.UpdateCursor(lidar_shp, ['geotiff_ur']) as cursor:
for row in cursor:
url_list.append(row)
counter += 1
if counter == 18:
break
for item in url_list:
print(item)
urllib.request.urlretrieve(item)
I understand your question this way: you want to download a zip file for each record in a shapefile from an URL defined in a certain field.
It's easier to use the requests package which is also recommended in the urllib.request documentation:
The Requests package is recommended for a higher-level HTTP client interface.
Here is an example:
import arcpy, arcpy.da
import shutil
import requests
SHAPEFILE = "your_shapefile.shp"
with arcpy.da.SearchCursor(SHAPEFILE, ["name", "url"]) as cursor:
for name, url in cursor:
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(f"{name}.zip", "wb") as file:
response.raw.decode_content = True
shutil.copyfileobj(response.raw, file)
There is another example on GIS StackExchange:
https://gis.stackexchange.com/a/392463/21355

django rest framework test code self.client.delete problem

from rest_framework import status, response
from rest_framework.test import APITestCase
from lots.models import Lot
class LotsTestCase(APITestCase):
def setUp(self) -> None:
self.lot = Lot.objects.create(name="1",
address="Dont Know",
phone_num="010-4451-2211",
latitude=127.12,
longitude=352.123,
basic_rate=20000,
additional_rate=2000,
partnership=False,
section_count=3,)
def test_delete(self):
response = self.client.delete(f'api/lots/{self.lot["name"]}')
# response = self.client.delete(f'/api/users/{self.users[0].pk}')
# url = reverse(f'/api/lots/{self.lot}', kwargs={'pk': self.lot.pk})
# self.client.delete(url)
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
self.assertEqual(self.lot.objects.filter(pk=self.lot.pk.count()))
I have problems with the test code above. Why doesn't it work? I know it has to do with calling dictionary values but I just can't figure it out. Thanks for your help.
Lot.objects.create(...) returns a Lot type so you access name by self.lot.name.

How to send numpy array to sagemaker endpoint using lambda function

How to invoke sagemaker endpoint with input data type numpy.ndarray.
I have deployed a sagemaker model and trying to hit it using lambda function.
But I am unable to figure out how to do it. I am getting server error.
One row of the Input data.
The total data set has shape=(91,5,12).
The below is only one row of Input data.
array([[[0.30440741, 0.30209799, 0.33520652, 0.41558442, 0.69096432,
0.69611016, 0.25153326, 0.98333333, 0.82352941, 0.77187154,
0.7664042 , 0.74468085],
[0.30894981, 0.33151662, 0.22907725, 0.46753247, 0.69437367,
0.70410559, 0.29259044, 0.9 , 0.80882353, 0.79401993,
0.89501312, 0.86997636],
[0.33511896, 0.34338939, 0.24065546, 0.48051948, 0.70384005,
0.71058715, 0.31031288, 0.86666667, 0.89705882, 0.82724252,
0.92650919, 0.89125296],
[0.34617355, 0.36150251, 0.23726854, 0.54545455, 0.71368726,
0.71703244, 0.30228356, 0.85 , 0.86764706, 0.86157254,
0.97112861, 0.94089835],
[0.36269508, 0.35923332, 0.40285461, 0.62337662, 0.73325475,
0.7274392 , 0.26241391, 0.85 , 0.82352941, 0.89922481,
0.9343832 , 0.90780142]]])
I am using the following code but unable to invoke the endpoint
import boto3
def lambda_handler(event, context):
# The SageMaker runtime is what allows us to invoke the endpoint that we've created.
runtime = boto3.Session().client('sagemaker-runtime')
endpoint = 'sagemaker-tensorflow-2019-04-22-07-16-51-717'
print('givendata ', event['body'])
# data = numpy.array([numpy.array(xi) for xi in event['body']])
data = event['body']
print('numpy array ', data)
# Now we use the SageMaker runtime to invoke our endpoint, sending the review we were given
response = runtime.invoke_endpoint(EndpointName = endpoint,# The name of the endpoint we created
ContentType = 'application/json', # The data format that is expected
Body = data) # The actual review
# The response is an HTTP response whose body contains the result of our inference
result = response['Body'].read().decode('utf-8')
print('response', result)
# Round the result so that our web app only gets '1' or '0' as a response.
result = round(float(result))
return {
'statusCode' : 200,
'headers' : { 'Content-Type' : 'text/plain', 'Access-Control-Allow-Origin' : '*' },
'body' : str(result)
}
I am unable to figure out what should be written in place of ContentType.
Because I am not aware of MIME type in case of numpy.ndarray.
Illustration of what I had and how I solved
from sagemaker.tensorflow import TensorFlowPredictor
predictor = TensorFlowPredictor('sagemaker-tensorflow-serving-date')
data = np.array(raw_data)
response = predictor.predict(data=data)
predictions = response['predictions']
print(predictions)
What I did to find the answer:
Looked up predictions.py and content_types.py implementation in sagemaker python library to see what content types it used and what arguments it had.
First I thought that application/x-npy content_type was used and thus tried using serialisation code from predictor.py and passing the application/x-npy as content_type to invoke_endpoint.
After receiving 415 (unsupported media type), the issue was still the content_type. The following print statements helped me to reveal what content_type predictor actually uses (application/json) and thus I took the appropriate serialisation code from predictor.py
from sagemaker.tensorflow import TensorFlowPredictor
predictor = TensorFlowPredictor('sagemaker-tensorflow-serving-date')
data = np.array(raw_data)
response = predictor.predict(data=data)
print(predictor.content_type)
print(predictor.accept)
predictions = response['predictions']
print(predictions)
TL;DR
Solution for lambda:
import json
import boto3
ENDPOINT_NAME = 'sagemaker-tensorflow-serving-date'
config = botocore.config.Config(read_timeout=80)
runtime= boto3.client('runtime.sagemaker', config=config)
data = np.array(raw_data)
payload = json.dumps(data.tolist())
response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
ContentType='application/json',
Body=payload)
result = json.loads(response['Body'].read().decode())
res = result['predictions']
Note: numpy is not included in lambda thus you would either include the numpy yourself or instead of data.tolist() operate with python list and json.dump that list (of lists). From your code it seems to me you have python list instead of numpy array, so simple json dump should work.
If you are training and hosting custom algorithm on SageMaker using TensorFlow, you can serialize/de-serialize the request and response format as JSON as in TensorFlow Serving Predict API.
import numpy
from sagemaker.predictor import json_serializer, json_deserializer
# define predictor
predictor = estimator.deploy(1, instance_type)
# format request
data = {'instances': numpy.asarray(np_array).astype(float).tolist()}
# set predictor request/response formats
predictor.accept = 'application/json'
predictor.content_type = 'application/json'
predictor.serializer = json_serializer
predictor.deserializer = json_deserializer
# run inference using SageMaker predict class
# https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/predictor.py
predictor.predict(data)
You can refer the example notebook here to train and host custom TensorFlow container.

Google Custom Search via API is too slow

I am using Google Custom Search to index content on my website.
When I use a REST client to make the get request at
https://www.googleapis.com/customsearch/v1?key=xxx&q=query&cx=xx
I get response in sub seconds.
But when I try to make the call using my code, it takes up six seconds. What am I doing wrong ?
__author__ = 'xxxx'
import urllib2
import logging
import gzip
from cfc.apikey.googleapi import get_api_key
from cfc.url.processor import set_query_parameter
from StringIO import StringIO
CX = 'xxx:xxx'
URL = "https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=sd&fields=kind,items(title)" % (get_api_key(), CX)
def get_results(query):
url = set_query_parameter(URL, 'q', query)
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
request.add_header('User-Agent','cfc xxxx (gzip)')
response = urllib2.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
return data
I have implemented performance tips mentioned in Performance Tips. I would appreciate any help. Thanks.