Extracting data from google worksheet and selenium to write to webpage - selenium

from __future__ import print_function
from googleapiclient.discovery import build
from google.oauth2 import service_account
from selenium import webdriver
import time
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=SPREADSHEET_ID, range="TAX!A1:E5").execute()
data = result.get('values')
list = []
for i in user_input:
if i == 1:
list.append(data[0])
elif i == 2:
list.append(data[1])
elif i == 3:
list.append(data[2])
elif i == 4:
list.append(data[3])
elif i == 5:
list.append(data[4])
else:
print('ERROR')
print(list)
print(('The length of list is: ' + str(len(list)))
print(range(len(list)))
driver = webdriver.Safari()
try:
driver.get('some_website')
for i in range(len(list)):
search_box = driver.find_element_by_xpath('some_search_box')
search_box.click()
search_box.send_keys(list[i])
cart_button = driver.find_element_by_xpath('some_button')
cart_button.click()
#print any exceptions such as element not found error, then close browser
except Exception as e:
print(e)
print('driver closing on error')
driver.close()
This is what I get:
[['1', '2', '3', '4', '5'], ['2', '3', '5', '6'], ['6', '4', '2', '3', '4']]
The length of list is : 3
range(0, 3)
The user input one or more options, and depending on that option it extracts data from sheet that could be multiple values. This how the list forms.
It is working as it should but there is one problem. it types the whole first array in search box (['1', '2', '3', '4', '5']) but i want it to search with each element.
I want the length of list to be 14 not 3, and I don't know how to solve this.
Is it possible to append to list and get one array on the output, the length of array is depending on user input?

for i in range(len(list)):
# Add this line to loop through the second element and so forth.
for j in range(len(list[i])):
# Fix this one
search_box.send_keys(list[i][j])
Just use a for for loop to go through a 2d list.
You can also simplify the top bit
if i<=5 and i >=1:
list.append(data[i-1])
else:
print('ERROR')

Related

searching a pandas dataframe using PySimpleGUI

How do I update my list box using pysimple gui as below
so the user input is AP and the list box suggests "APPLE"
the issue as I see it is describing the updated listbox as the user is inputting with respect to the pandas dataframe
import PySimpleGUI as sg
import pandas as pd
sg.theme('Default1')
pd.set_option('display.max_rows',None)
formulation =pd.read_csv('FORMULATIONS.csv')
names=formulation["PRODUCT"]
#names=product.values.tolist()
#names=formulation["PRODUCT"].to_string
#left side search
left_col=[[sg.Text('SEARCH')],
[sg.Input(size=(20,1),enable_events=True,key='-INPUT-',do_not_clear=True)],
[sg.Listbox(names,size=(50,len(names)),key='-LIST-',enable_events=True)]]
#right side batch sheet
right_col=[[sg.Text('Product : \n \n ITEM | RAW MATERIAL |')],
[sg.Text(size=(40,1),key='-TOUT-')]]
#together
layout=[[sg.Column(left_col,element_justification='c'),sg.VSeperator(),sg.Column(right_col)]]
#create window
window =sg.Window('BF-2.1',layout,resizable=True)
#event loop
while True:
event, values =window.Read()
if event in (sg.WIN_CLOSED,'Exit'):
break
if values['-INPUT-'] != '' :
search =values['-INPUT-']
new_values=[formulation["PRODUCT"]==['-INPUT-']] #how to use the input to navigate
window.Element('-LIST-').Update(new_values)
else:
window.Element('-LIST-').Update(names)
if event =='-LIST-' and len(values['-LIST-']):
sg.popup('Selected',values['-LIST-'])
window.close()
have tried
new_values=[x for x in names if search in x]
Use df.loc[df["PRODUCT"]==text]["KIND"] to get items which matched the text.
Following code demo how to get event from Input element to filtered the DataFrame to update the List element. Try to input 'car', 'motorcycleorship` to get the List element updated, or empty the List element to get all.
import pandas as pd
import PySimpleGUI as sg
data = [
["car", item] for item in ('SUV', 'Hatchback', 'Crossover', 'Convertible', 'Sedan', 'Sports Car', 'Coupe', 'Minivan', 'Station Wagon', 'Pichup Truck')] + [
["motorcycle", item] for item in ('standard', 'cruiser', 'touring', 'sports', 'off-road', 'dual-purpose')] + [
["ship", item] for item in ('Container', 'Bulk Carrier', 'Tanker', 'Passenger', 'Naval', 'Offshore', 'Special Purpose')]
df = pd.DataFrame(data, columns=["PRODUCT", "KIND"])
left_col=[
[sg.Text('SEARCH')],
[sg.Input(size=20, enable_events=True, key='-INPUT-')],
[sg.Listbox(df["KIND"], size=(50, 10), key='-LIST-', enable_events=True)],
]
right_col=[
[sg.Text('Product : \n \n ITEM | RAW MATERIAL |')],
[sg.Text(size=40, key='-TOUT-')],
]
layout=[
[sg.Column(left_col,element_justification='c'),
sg.VSeperator(),
sg.Column(right_col)],
]
window = sg.Window('BF-2.1', layout, resizable=True)
while True:
event, values = window.read()
if event == sg.WIN_CLOSED:
break
if event == '-INPUT-':
if values[event]:
text = values['-INPUT-'].lower()
new_values= df.loc[df["PRODUCT"]==text]["KIND"]
window['-LIST-'].update(new_values)
else:
new_values = df["KIND"]
window['-LIST-'].update(new_values)
if event =='-LIST-' and len(values['-LIST-']):
sg.popup('Selected', values['-LIST-'])
window.close()

Creating new column in pandas using existing column values as filter using pandas - .isin() fails as Attribute Error

Error: AttributeError: 'int' object has no attribute 'isin'
Question: There are no null values, works in individual code block. Tried to modify the data type of series R to object, error goes : 'str' object has no attribute 'isin'
What am I missing?
Code:
X = [1, 2, 3, 4]
if dg['RFM_Segment'] == '111':
return 'Core'
elif (dg['R'].isin(X) & dg['F'].isin([1]) & dg['M'].isin(X) & (dg['RFM_Segment'] != '111')).any():
return 'Loyal'
elif (dg['R'].isin(X) & dg['F'].isin(X) & dg['M'].isin([1]) & (dg['RFM_Segment'] != '111')).any():
return 'Whales'
elif (dg['R'].isin(X) & dg['F'].isin([1]) & dg['M'].isin([3,4])).any():
return 'Promising'
elif (dg['R'].isin([1]) & dg['F'].isin([4]) & dg['M'].isin(X)).any():
return 'Rookies'
elif (dg['R'].isin([4]) & dg['F'].isin([4]) & dg['M'].isin(X)).any():
return 'Slipping'
else:
return 'NA'
dg['user_segment']= dg.apply(user_segment, axis= 1)
I will assume that you accidentally cut off the top of your code snipet, in which you define user_segment.
The issue lies in the way you tried to use apply. Note that apply will operate on Series, rather than DataFrame. So, by indexing into any element of a series, you will not receive a Series object (as you would when indexing into DataFrame), but rather a object of a given columns' type (like int, str etc.). An example:
import pandas as pd
X = ['a', 'c']
df = pd.DataFrame([['a', 'b'], ['c', 'd'], ['e', 'f']], columns=['col1', 'col2'])
df['col1'].isin(X) # this works, because I'm applying `isin` on the entire column.
def test_apply(x):
print(x['col1'].isin(X))
return x
df.apply(test_apply, axis=1) # this doesn't work,
# because I'm applying `isin` on a non-pandas object, in
# this example `str`

In Pandas Dataframe error: nothing to repeat at position 17217

I am trying to use str.contains to identify elements of one column of a dataframe in another column of another dataframe. Here is the code:
pattern = fr"(?:{'|'.join(strategic_accounts['Account Name'])})"
all_leads['in_strategic_list'] = all_leads['Company'].str.contains(pattern).astype(int)
Here are the heads of both dataframes as well as position 17271 of the all_leads dataframe. I don't understand the error because it looks like there isn't anything abnormal at position 17217. Also, all related errors online seem to refer to error nothing to repeat at position 0 which seems like it would be a different error since mine came up at loc 17217. Any insights appreciated! Thanks!
This mock example works perfectly with the same code:
df1 = pd.DataFrame({'name': ['Marc', 'Jake', 'Sam', 'Brad', 'SpongeBob']})
df2 = pd.DataFrame({'IDs': ['Jake', 'John', 'Marc', 'Tony', 'Bob']})
pattern = fr"(?:{'|'.join(df2['IDs'])})"
df1['In_df2'] = df1['name'].str.contains(pattern).astype(int)
Update:
I have managed to figure out that the error is referring to loc 17217 in pattern not in strategic_accounts df. Printing the loc 17217 in pattern returns '*'. I have tried to apply this function to pattern before inserting it into the str.contains and I can't seem to get it to remove.
import re
pattern = fr"(?:{'|'.join(strategic_accounts['Account Name'])})"
def esc_spec_char(pattern):
for p in pattern:
if p == '\*':
re.sub('*', '1', p)
else:
continue
return pattern
pattern = esc_spec_char(pattern)
pattern[17217]
New_Update:
I have applied #LiamFiddler's method of turning the string into a re.Pattern object and run it on a dummy df and while it does seem to escape the * it doesn't seem to find the N. Not sure if I made some mistake. Here is the code:
sries = pd.Series(['x','y','$','%','^','N','*'])
ac = '|'.join(sries)
p = re.compile(re.escape(ac))
df1 = pd.DataFrame(data = {'Id' : [123, 232, 344, 455, 566, 377],
'col2' : ["N", "X", "Y", '*', "W", "Z"]})
df1['col2'].str.contains(p, regex=True).astype(int)
EDIT
I realized that re.escape() also escapes the | delimiter, so I think the appropriate solution is to map re.escape() to the series before joining the names:
strategic_accounts['Escaped Accounts'] = strategic_accounts['Account Name'].apply(lambda x: re.escape(x))
pattern = re.compile('|'.join(strategic_accounts['Escaped Accounts']))
Then you can proceed as below with using Series.str.contains(). On your sample dataframe, here is what I get:
sries = pd.Series(['x','y','$','%','^','N','*'])
ac = sries.apply(lambda x: re.escape(x))
p = re.compile('|'.join(ac))
df1 = pd.DataFrame(data = {'Id' : [123, 232, 344, 455, 566, 377],
'col2' : ["N", "X", "Y", '*', "W", "Z"]})
df1['col2'].str.contains(p, regex=True).astype(int)
Out:
0 1
1 0
2 0
3 1
4 0
5 0
Original
Ok, so based on the discovery of the special character, I think this is your solution:
First, we need to escape the special characters in the strings so that they don't mess up the regex. Fortunately, Python's re module has an .escape() method specifically for escaping special characters.
import re
accounts = '|'.join(strategic_accounts['Account Name'])
pattern = re.compile(re.escape(accounts))
Now we can proceed as before:
all_leads['in_strategic_list'] = all_leads['Company'].str.contains(pattern, regex=True).astype(int)

series.str.split(expand=True) returns error: Wrong number of items passed 2, placement implies 1

I have a series of web addresses, which I want to split them by the first '.'. For example, return 'google', if the web address is 'google.co.uk'
d1 = {'id':['1', '2', '3'], 'website':['google.co.uk', 'google.com.au', 'google.com']}
df1 = pd.DataFrame(data=d1)
d2 = {'id':['4', '5', '6'], 'website':['google.co.jp', 'google.com.tw', 'google.kr']}
df2 = pd.DataFrame(data=d2)
df_list = [df1, df2]
I use enumerate to iterate the dataframe list
for i, df in enumerate(df_list):
df_list[i]['website_segments'] = df['website'].str.split('.', n=1, expand=True)
Received error: ValueError: Wrong number of items passed 2, placement implies 1
You are splitting the website which gives you a list-like data structure. Think [google, co.uk]. You just want the first element of that list so:
for i, df in enumerate(df_list):
df_list[i]['website_segments'] = df['website'].str.split('.', n=1, expand=True)[0]
Another alternative is to use extract. It is also ~40% faster for your data:
for i, df in enumerate(df_list):
df_list[i]['website_segments'] = df['website'].str.extract('(.*?)\.')

PsychoPy: Logging User Input Strings

I have a PsychoPy routine that is intended to act as a memory object-span test. In one loop, the software presents an object (a single text character) followed by a user task multiple times. The software remembers the string of characters, and later asks the user to enter the characters as they were presented. This sequence (loop of tasks followed by character recall) is itself presented several times in a larger loop.
The characters are chosen randomly.
I would like to record, either in the CSV file which PsychoPy generates, or a log file of some sort, the characters that the user is entering. How does one do that in the PsychoPy graphical interface system?
The code block used to record the character sequences is:
Begin Routine
givenAnswer = ""
returnPressed = False
R_memPrompt.setText("Please enter characters in the order they were presented, then hit 'Return':")
R_memPrompt.draw()
win.flip()
Each Frame
loopTest = True
userInput = ""
if returnPressed == False:
while loopTest == True:
response = event.waitKeys(keyList=['a','b','c','d','e','f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'return', 'backspace', 'left'])
if response[0] == 'return':
loopTest = False
returnPressed = True
continueRoutine = False
elif response[0] == 'backspace':
userInput = userInput[:-1]
userInput = userInput.upper()
R_disp.setText(userInput)
R_disp.draw()
win.flip()
else:
userInput = userInput + response[0]
userInput = userInput.upper()
R_disp.setText(userInput)
R_disp.draw()
win.flip()
End Routine
givenAnswer = givenAnswer + userInput
A later routine has, as its Begin Routine
if memorySequence == givenAnswer: # memorySequence is the prior record of memory characters
# do some stuff
else:
# do some other stuff
Crude (it is a prototype) but the intent of the Each Frame section is simply to reflect characters as they are written to the screen, through the R_disp text stimulus, while allowing the user to backspace and not worry about case sensitivity. The final answer ends up in givenAnswer,
and is later compared to memorySequence which was built up previously.
I would like to dump the contents of those variables, memorySequence and givenAnswer to the CSV file or some log file so that I do not lose the information.
Is there a way to do this?
Certainly. In the "End Routine" tab of your code component, put something like this:
thisExp.addData("sequence", memorySequence)
thisExp.addData("answer", givenAnswer)
This will add two new columns to your data file, with column headers of whatever literal values you put in the quotes.