How to add username and name columns to pandas dataframe with search_all_tweets lookup in python - pandas

I am trying to collect tweets from 2022 using Twitter API. I can record the tweet_fields for the tweets, but I can't figure out how to add columns for the username and name (the user_fields) for each tweet.
I'm running the following code:
import requests
import os
import json
import tweepy
import pandas as pd
from datetime import timedelta
import datetime
bearer_token = "my_bearer_token_here"
keyword = "#WomeninSTEM"
start_time = "2022-01-01T12:01:00Z"
end_time = "2023-01-01T12:01:00Z"
client = tweepy.Client(bearer_token=bearer_token)
responses = client.search_all_tweets(query = "#WomeninSTEM", max_results= 500, start_time=start_time, end_time = end_time,
user_fields = ["username", "name"],
tweet_fields =["in_reply_to_user_id", "author_id", "lang",
"public_metrics", "created_at", "conversation_id"])
**##I can't get the username or name columns to work here.**
column = []
for i in range(len(responses.data)) :
row = []
Username = responses.data[i]["username"]
row.append(Username)
name = responses.data[i]["name"]
row.append(name)
text = responses.data[i].text
row.append(text)
favoriteCount = responses.data[i].public_metrics["like_count"]
row.append(favoriteCount)
retweet_count = responses.data[i].public_metrics["retweet_count"]
row.append(retweet_count)
reply_count = responses.data[i].public_metrics["reply_count"]
row.append(reply_count)
quote_count = responses.data[i].public_metrics["quote_count"]
row.append(quote_count)
created = responses.data[i].created_at
row.append(created)
ReplyTo = responses.data[i].text.split(" ")[0]
row.append(ReplyTo)
ReplyToUID = responses.data[i].in_reply_to_user_id
row.append(ReplyToUID)
ConversationID = responses.data[i]["conversation_id"]
row.append(ConversationID)
column.append(row)
data = pd.DataFrame(column)
Whenever I try and include username and name, I get this error:KeyError Traceback (most recent call last)

Assuming you're querying at https://api.twitter.com/2/tweets/[...], the response does not have a 'username' or a 'name' parameter, that's why you're getting a KeyError when trying to access them.
It does have an 'author_id' parameter, which you can use to perform an additional query at https://api.twitter.com/2/users/:id and retrieve 'username' and 'name'.
More info here and here.

Related

Getting error in a python script when using QuickSight API calls to retrieve the value of user parameter selection

I am working on a python script which will use QS APIs to retrieve the user parameter selections but keep getting the below error:
parameters = response['Dashboard']['Parameters'] KeyError: 'Parameters'
If I try a different code to retrieve the datasets in my QS account, it works but the Parameters code doesn't. I think I am missing some configuration.
#Code to retrieve the parameters from a QS dashboard (which fails):
import boto3
quicksight = boto3.client('quicksight')
response = quicksight.describe_dashboard(
AwsAccountId='99999999999',
DashboardId='zzz-zzzz-zzzz'
)
parameters = response['Dashboard']['Parameters']
for parameter in parameters:
print(parameter['Name'], ':', parameter['Value'])
#Code to display the datasets in the QS account (which works):
import boto3
import json
account_id = '99999999999'
session = boto3.Session(profile_name='default')
qs_client = session.client('quicksight')
response = qs_client.list_data_sets(AwsAccountId = account_id,MaxResults = 100)
results = response['DataSetSummaries']
while "NextToken" in response.keys():
response = qs_client.list_data_sets(AwsAccountId = account_id,MaxResults = 100,NextToken=response["NextToken"])
results.extend(response["DataSetSummaries"])
for i in results:
x = i['DataSetId']
try:
response = qs_client.describe_data_set(AwsAccountId=account_id,DataSetId=x)
print("succeeded loading: {} for data set {} ".format(x, response['DataSet']['Name']))
except:
print("failed loading: {} ".format(x))

How do I connect items from one parse method to another?

'''
import scrapy
from ..items import GooddealItem
class FarmtoolsSpider(scrapy.Spider):
name = 'farmtools'
allowed_domains = ['www.gooddeal.com']
start_urls = ['https://www.gooddeal.com/all?
source=private&sort=publishdate%20desc']
def parse(self, response):
items = GooddealItem()
rows = response.xpath('//ul[#class="card-collection"]/li')
for row in rows:
link = row.xpath('.//a/#href').get() #this is the full link.
link_split = link.split('/')[-1] #this splits the url link th first time.
linkid = link_split.split('?')[0] #this splits it the second time.
title = row.xpath('.//div[1]/p[#class="card__body-title"]/text()').get()
county = row.xpath('.//a/div/div[2]/div[1]/ul[#class="card__body-keyinfo"]/li[contains(text(),"min")]/following-sibling::node()/text()').get()
price = row.xpath('.//p[#class="card__price"]/span[1]/text()').get()
subcat = row.xpath('.//a/div/div[2]/div[1]/p[2]/text()[2]').get()
zero = row.xpath('.//a/div/div[2]/div[1]/ul[#class="card__body-keyinfo"]/li[contains(text(),"min")]/text()').get()
if zero == '0 min':
items['linkid'] = linkid
items['title'] = title
items['county'] = county
items['price'] = price
items['subcat'] = subcat
items['zero'] = zero
items['link'] = link
yield response.follow(url = link, callback=self.parse_item_page)
def parse_item_page(self, response):
items = GooddealItem()
rows = response.xpath('/html/body[1]')
for row in rows:
category = row.xpath('.//main/div/div[1]/div/div[1]/div/nav/span/a[1]/span/text()').get(),
views = row.xpath('.//main/div/div[1]/div/div[2]/div[2]/div[1]/div[3]/div[1]/div/div[1]/div/div/span[2]/text()').get(),
seller_id = row.xpath('.//main/div/div[1]/div/div[2]/div[2]/div[2]/div[3]/div/div[1]/div[1]/div[2]/a/#href').get(),
seller_ads = row.xpath('.//main/div/div[1]/div/div[2]/div[2]/div[2]/div[3]/div/div[2]/div/dl[3]/dd/text()').get(),
lifetime_ads = row.xpath('//main/div/div[1]/div/div[2]/div[2]/div[2]/div[3]/div/div[2]/div/dl[4]/dd/text()').get()
items['category'] = category
items['views'] = views
items['seller_id'] = seller_id
items['seller_ads'] = seller_ads
items['lifetime_ads'] = lifetime_ads
yield items
'''
I'm stuck on this as it's my first attempt. When I run the code I'm just getting back:
2020-07-12 22:53:21 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.gooddeal.com/dogs-for-sale/dachshunds/25348559>
{'category': (None,),
'lifetime_ads': None,
'seller_ads': (None,),
'seller_id': (None,),
'views': (None,)}
Any help will be appreciated, thanks
I'm assuming you want the data scraped in parse method to be joined together with the data scraped in the parse_item_page.
If you are using Scrapy v1.7+ you can use cb_kwargs when building the request.
This parameter receives a dict with arbitrary data that will be used as argument in the callback function. So you would have to do something like this in your request:
...
yield response.follow(url = link, callback=self.parse_item_page, cb_kwargs={'scraped_item': items})
For this to work, you also need to change the callback function to receive this parameter. Like this:
def parse_item_page(self, response, scraped_item):
...
Scrapy will take care of sending the scraped_item when calling the parse_item_page.
If you are using Scrapy v1.6 or older:
You will need to use the meta parameter. This method still works in more recent versions, but cb_kwargs(solution above) are preferable.
When building the request you will use the meta parameter to include some arbitrary data in the request. The data will be accessible in the response object that the callback function receives. Your request should look like this:
...
yield response.follow(url = link, callback=self.parse_item_page, meta={'scraped_item': items})
In this case you will access the data by calling response.meta:
def parse_item_page(self, response):
items = response.meta.get('scraped_item') #response.meta is a dict
...

I want to get the excel file from the data frame created which automatically changes as written in the code

i have tried two methods and both showing different location as given by me in this image
apikey='abcd'
import pandas as pd
from alpha_vantage.timeseries import TimeSeries
import time
ts=TimeSeries(key=apikey,output_format='pandas')
data,metadata=ts.get_intraday(symbol='name',interval='1min',outputsize='full')
data
while True:
data, metadata=ts.get_intraday(symbol='TCS',interval='1min',outputsize='full')
data.to_excel('livedat.xlsx')
time.sleep(60)
The code is running properly but I don't know how to get the data file in excel.
imp- the method should get the file which is updated timely i.e 1min automaticaly.
Also i am using IBM watson studio to write the code.
I am not familiar with the alpha_vantage wrapper that you are using however this is how i would perform your question. The code works and i have included comments.
To get the file in the python script i would do pd.read_excel(filepath).
import requests
import pandas as pd
import time
import datetime
# Your API KEY and the URL we will request from
API_KEY = "YOUR API KEY"
url = "https://www.alphavantage.co/query?"
def Generate_file(symbol="IBM", interval="1min"):
# URL parameters
parameters = {"function": "TIME_SERIES_INTRADAY",
"symbol": symbol,
"interval": interval,
"apikey": API_KEY,
"outputsize": "compact"}
# get the json response from AlphaVantage
response = requests.get(url, params=parameters)
data = response.json()
# filter the response to only get the time series data we want
time_series_interval = f"Time Series ({interval})"
prices = data[time_series_interval]
# convert the filtered reponse to a Pandas DataFrame
df = pd.DataFrame.from_dict(prices, orient="index").reset_index()
df = df.rename(columns={"index": time_series_interval})
# create a timestampe for our excel file. So that the file does not get overriden with new data each time.
current_time = datetime.datetime.now()
file_timestamp = current_time.strftime("%Y%m%d_%H.%M")
filename = f"livedat_{file_timestamp}.xlsx"
df.to_excel(filename)
# sent a limit on the number of calls we make to prevent infinite loop
call_limit = 3
number_of_calls = 0
while(number_of_calls < call_limit):
Generate_file() # our function
number_of_calls += 1
time.sleep(60)

How to perform operation on flask-admin database columns to store results in other column of same table

I am building flask admin app where I need to store total of couple of integer columns into third column of same table.
from flask_sqlalchemy import SQLAlchemy
from flask_admin.contrib.sqla import ModelView
from flask import Flask
import os
from flask_admin import Admin
application = Flask(__name__)
project_dir = os.path.dirname(os.path.abspath(__file__))
database_file = "sqlite:///{}".format(os.path.join(project_dir,"testing.db"))
application = Flask(__name__)
application.config["SQLALCHEMY_DATABASE_URI"] = database_file
application.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
application.secret_key = "ssshhhh"
db = SQLAlchemy(application)
admin = Admin(application,name="FLASK")
class Test(db.Model):
id = db.Column("ID",db.Integer,primary_key=True)
first_no = db.Column("First_no",db.Integer)
second_no = db.Column("Second_no",db.Integer)
total = db.Column("Total",db.Integer)
class TestView(ModelView):
page_size = 20
edit_modal = True
if __name__ == '__main__':
db.create_all()
admin.add_view(TestView(Test, db.session))
application.run(debug=True)
Above example let me store values in all three fields manually which is not expected.
Expected result that I am looking for get total of couple of integers and stores in database as well.
Two options you can use:
Use the onupdate parameter of Column to set the value of total on update, for example:
total = db.Column("Total", db.Integer, onupdate=first_no + second_no)
Use a hybrid_property to calculated the total without storing the value:
class Test(db.Model):
...
#hybrid_property
def total(self):
return self.first_no + self.second_no

How do i select a random row in admin.view localhost sqlalchemy?

Hi im having hard thinking of how to select a random row from sqlalchemy in a localhost admin database view. I want it to select a random list but the ones with user assigned so it selects a random raffle like `Colour:Ruby up1:dgg2 up2:fh73.below is where the code for the local host database session & view of the database in modelview localhost.
admin = Admin(app, name='raffles', template_mode='bootstrap3')
admin.add_view(ModelView(User, db.session))
admin.add_view(ModelView(Raffle, db.session))
i need help adding code to select a raffle from there
heres the app.py
from flask import Flask, request, render_template, redirect, url_for
from flask_admin import Admin
from flask_admin.contrib.sqla import ModelView
from forms import RaffleForm
from models import db, get_db_uri, User, Raffle
from utils import assign_raffles_to_user, seed_raffles_into_db
from flask import Flask
from flask_mail import Mail
from mail import mail
from flask_mail import Message
app = Flask(__name__)
app.config['MAIL_SERVER']='smtp.gmail.com'
app.config['MAIL_PORT'] = 465
app.config['MAIL_USERNAME'] = 'osman33454#gmail.com'
app.config['MAIL_PASSWORD'] = 'Password'
app.config['MAIL_USE_TLS'] = False
app.config['MAIL_USE_SSL'] = True
app.config['SQLALCHEMY_DATABASE_URI'] = get_db_uri()
app.config['SECRET_KEY'] = 'some-random-secret-key'
mail.init_app(app)
db.app = app
db.init_app(app)
db.create_all()
seed_raffles_into_db()
admin = Admin(app, name='raffles', template_mode='bootstrap3')
admin.add_view(ModelView(User, db.session))
admin.add_view(ModelView(Raffle, db.session))
#app.route('/', methods=['GET', 'POST'])
def home():
form = RaffleForm(request.form)
if request.method == 'POST' and form.validate():
email = form.email.data
# check if user exists
user = User.query.filter_by(email=email).all()
if not user:
user = User(email=email)
db.session.add(user)
else:
user = user[0]
# assign required raffles to user
assign_raffles_to_user(form.raffle_count.data, user)
return redirect(url_for('success'))
return render_template('home.html', form=form)
#app.route('/success', methods=['GET'])
def success():
return render_template('success.html')
if __name__ == '__main__':
app.run(debug=True)
and heres the utils.py
# utility functions go here
import random
import os
from uuid import uuid4
import constants
from models import Raffle, User
from flask_mail import Message
from flask_mail import Mail
from mail import mail
def generate_raffles(count):
for i in xrange(count):
colour = constants.COLORS[i % constants.COLORS_LEN]
uniq = uuid4().hex
uniq_p1, uniq_p2 = uniq[:4], uniq[4:8]
yield (colour, uniq_p1, uniq_p2)
def seed_raffles_into_db(max_raffles=constants.MAX_RAFFLES):
if is_inited():
print 'Raffles have already been seeded...'
return False
from app import db
print 'Seeding raffles...'
for raffle_colour, raffle_up1, raffle_up2 in generate_raffles(max_raffles):
raffle = Raffle(
colour=raffle_colour,
up1=raffle_up1,
up2=raffle_up2,
)
print "Adding", raffle
db.session.add(raffle)
db.session.commit()
mark_as_inited()
return True
def get_unused_raffles(raffle_count):
return (
Raffle.query.filter_by(
user=None
).limit(
constants.RAFFLE_PADDING + raffle_count
)
).all()
def mark_as_inited():
open(constants.INIT_FILE_PATH, 'w').close()
def is_inited():
return os.path.exists(constants.INIT_FILE_PATH)
def assign_raffles_to_user(raffle_count, user):
from app import db
raffles = get_unused_raffles(raffle_count)
for raffle in random.sample(raffles, raffle_count):
print "Assigning {0} to {1}".format(raffle, user)
msg = Message('Raffle assigned', sender = 'osman.soloking009#outlook.com', recipients = [user.email])
msg.body = myRaffle = "Assigning {0} to {1}".format(raffle, user)
mail.send(msg)
raffle.user = user
db.session.commit()
return True
link to see image of my admin view localhost defined tables -where I want the selection to take place
Select a random row with MySQL:
SELECT column FROM table
ORDER BY RAND()
LIMIT 5
Select a random row with IBM DB2
SELECT column, RAND() as IDX
FROM table
ORDER BY IDX FETCH FIRST 1 ROWS ONLY
Select a random row with Microsoft SQL Server:
SELECT TOP 5 column FROM table
ORDER BY NEWID()
Select a random record with Oracle:
SELECT column FROM
( SELECT column FROM table
ORDER BY dbms_random.value )
WHERE rownum = 5
Select a random row with PostgreSQL:
SELECT column FROM table
ORDER BY RANDOM()
LIMIT 5
Select a random row with SQLite:
SELECT column FROM table
ORDER BY
RANDOM() LIMIT 5;