importing data from MySQL using python and tkinter - pandas

I am trying to import file from MYSQL, python and tkinter with input
field order ID
import pymysql
from tkinter import *
from tkinter import messagebox
import mysql.connector as sql
import pandas as pd
def search():
try:
db_connection = sql.connect(host='localhost', database='northwind', user='xxxx', password='xxxx')
db_cursor = db_connection.cursor()
db_cursor.execute("SELECT * FROM orders WHERE OrderID=%s" % orderid.get())
table_rows = db_cursor.fetchall()
df = pd.DataFrame(table_rows)
df
e1.configure(state='disable')
con.Close()
except:
messagebox.showinfo('No Data','No Such Data available')
clear()
def clear():
orderid.set('')
e1.configure(state='normal')
w1 =Tk()
w1.title('My App')
w1.geometry('500x200')
ptitle = Label(w1, text='''Order Details''')
ptitle.grid(row=0,column=0,columnspan=2)
orderid = StringVar()
11=Label(w1, text = ' Order ID ')
e1=Entry(w1, textvariable= orderid )
b1=Button(w1, text = 'Search', command=search)
11.grid(row = 1, column = 0)
e1.grid(row = 1, column = 1)
b1.grid(row=1, column=2)
b2.grid(row=4, column=0)
w1.mainloop()
I am getting below Error:
File "<ipython-input-34-1f76a2830089>",
line 38 11.grid(row = 1, column = 0)
^
SyntaxError: invalid syntax

You have a variable that begins with a digit, which is illegal in python. Change 11 to something that begins with a letter.

Related

Tkinter and pandas output into a label

I need some assistance, I have written an application that queries all records in a table and prints it out but when I run the program it prints it out in the IDE, and trying to get it to print in the lower label.
from __future__ import print_function
from ast import Lambda
import sqlalchemy as sa
import pandas as pd
import tkinter as tk
from tkinter import *
from PIL import ImageTk, Image
server = 'ABQSQ03t'
username = 'TECO//AAPWP'
password = '*****##'
timeout = '60'
database = 'NMGC_PIM_DEV'
driver = 'ODBC+Driver+17+for+SQL+Server'
engine = sa.create_engine(
f'mssql+pyodbc://{server}/{database}?username={username}?password={password}?timeout=
{timeout}&driver={driver}')
cn = engine.connect()
root = tk.Tk()
root.title("Compliance SQL Backend Reporting")
HEIGHT = 800
WIDTH = 1000
#background image
canv = tk.Canvas(root, width=80, height=80, bg='white')
canv.place(relwidth=1, relheight=1)
img = ImageTk.PhotoImage(Image.open("NMGC_Emera_color240x75.png")) # PIL solution
canv.create_image(20, 20, anchor='nw', image=img)
background_label= tk.Label(root, image=img)
background_label.place(x=0,y=0, relwidth=1, relheight=1)
#button Functionaility
#def test_function(entry):
#print("Button Clicked you typed:", entry)
def test_function(entry):
sql = pd.read_sql(f'Select * from [dbo].[{entry}]', con=cn)
df=pd.DataFrame(sql)
final_str = print(sql)
#Guid setup
canvas = tk.Canvas(root, height = HEIGHT, width=WIDTH)
canvas.pack()
frame = tk.Frame(root, bg='#7B94AD', bd=5)
frame.place(relx=0.5, rely=0.1, relwidth=0.75, relheight=0.1, anchor='n')
entry = tk.Entry(frame, bg='white')
entry.place(relwidth=0.65, relheight=1)
button = tk.Button(frame, text='Generate Report', font = 40, command=lambda:
test_function(entry.get()))
button.place(relx=0.7, relheight=1, relwidth=0.3)
lower_frame = tk.Frame(root, bg='#7B94AD', bd=10)
lower_frame.place(relx=0.5, rely=0.25, relwidth=0.75, relheight=0.6, anchor='n')
label=tk.Label(lower_frame, bg='white', textvariable=test_function)
label.place(relwidth=1, relheight=1)
root.mainloop()
Sample dataframe headers:
APPGROUPID APPTITLE DESCRIPTION ORGAREANAME CONGROUPNAME DISPLAYLEVELCOUNT CreatedDate CreatedBy ModifiedDate ModifiedBy

ProgrammingError when trying to skip duplicate data in postgres sql

PostGres SQL will not accept data which is in violation of primary key. To ignore the duplicate data, I have this code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo',start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(Symbol, Date, Volume, Open, Close, High, Low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
I am getting this error:
ProgrammingError: (psycopg2.errors.UndefinedColumn) column "symbol" of relation "stockprices" does not exist
LINE 2: (Symbol,Date, Volume, Open, Close, H...
^
[SQL: INSERT INTO stockprices
Could you please advise as to what does this error mean? I got rid of all the double quotes as advised in the comment.
I had used this code to create the table:
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['Symbol'] = symbol
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
create_price_table('amzn')
Also as was mentioned in the comment. I used this to check the table name:
SELECT table_name
FROM information_schema.tables
WHERE table_schema='public'
AND table_type='BASE TABLE';
Edit 1:
I changed the code as suggested in the comment, now the column name is in small case. Below is the code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['symbol'] = symbol
df = df.rename(columns= {'Open':'open'})
df = df.rename(columns= {'Close':'close'})
df = df.rename(columns= {'High':'high'})
df = df.rename(columns= {'Low':'low'})
df = df.rename(columns= {'Volume':'volume'})
df = df.rename(columns= {'Adj Close':'adj_close'})
df.index.name ='date'
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
# create_price_table('amzn')
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(symbol, date, volume, open, close, high, low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
This code however is producing a new error:
DataError: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for type bigint: "166.14999389648438"
LINE 4: ('aapl','2022-02-23 00:00:00','166.14999...
^
Per my comment you have two issues:
You are trying to INSERT a float value(166.14999389648438) into an integer field. First thing to figure out is why the mismatch? Do really want the database field to be an integer? Second thing is that trying to force a float into an integer will work if the value is being entered as a float/numeric:
select 166.14999389648438::bigint; 166
Though as you see it gets truncated.
It will not work if entered as a string:
ERROR: invalid input syntax for type bigint: "166.14999389648438"
Which is what you are doing. This leads to the second issue below.
You are not using proper Parameter passing as shown in the link. Where among other things is the warning:
Warning
Never, never, NEVER use Python string concatenation (+) or string parameters interpolation (%) to pass variables to a SQL query string. Not even at gunpoint.
For the purposes of this question the important part is that using parameter passing will result in proper type adaptation.

KeyError' in Python

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import numpy as np
dt = pd.read_csv("C:\Subhro\ML_Internship\MARUTI_2.csv")
data = pd.DataFrame(dt)
data = data.drop('Date',axis=1)
data.drop(['Unnamed: 0'],axis=1,inplace=True)
print(data)
Roll_Mean_14 = data['Close Price'].rolling(window=14).mean()
Standard_Dev_14 = data['Close Price'].rolling(window=14).mean().std()
Upper_Band_14 = data['Close Price'].rolling(window=14).mean() + (2*Standard_Dev_14)
Low_Band_14 = data['Close Price'].rolling(window=14).mean() - (2*Standard_Dev_14)
avg_stock_price = data['Average Price']
stock_price = data['Close Price']
data['Roll_Avg'] = Roll_Mean_14
data['Upper_Band'] = Upper_Band_14
data['Lower_Band'] = Low_Band_14
data['Avg_Stock_Price'] = avg_stock_price
data=data.drop(data.head(14).index, inplace=False)
print(data)
for i in (data):
if((data['Close Price'][i])<(data['Lower_Band'][i])):
data['Call'][i]='Buy'
elif((data['Close Price'][i])>(data['Lower Band'][i])) and ((data['Close Price'][i])<(data['Roll_Avg'])):
data['Call'][i]='Hold Buy/Liquidate Short'
elif((data['Close Price'][i])>(data['Roll_Avg'][i])) and ((data['Close Price'][i])<(data['Upper Band'])):
data['Call'][i]='Hold Short/Liquidate Buy'
elif((data['Close Price'][i])>(data['Upper_Band'])):
data['Call'][i]='Short'
print(data)
In this code, I have been creating a new column : 'Call' to print the categories 'Buy','Short','Hold Buy/Liquidate Short', 'Hold Short/Liquidate Buy' according to the conditions given in the code. On running the code it is showing me the error as
KeyError : 'Symbol' in line
if((data['Close Price'][i])<(data['Lower_Band'][i])):
Your manner of accessing the indexes of the dataframe is incorrect.
You could try this :
for i in data.index:
if((data[i]['Close Price'])<(data[i]['Lower_Band'])):
The way you access a particular value(cell) in a dataframe(table) is :
data[row_index][column_index]

could not convert string to float in python

i try to analysis the Principle Component from cvs file but when i run the code i get this error
C:\Users\Lenovo\Desktop>python pca.py
ValueError: could not convert string to float: Annee;NET;INT;SUB;LMT;DCT;IMM;EXP;VRD
this is my cvs file
i try to remove any space and any think
this is my python script, i don't know what i miss
Note: i run this code under python2.7
from sklearn.externals import joblib
import numpy as np
import glob
import os
import time
import numpy
my_matrix = numpy.loadtxt(open("pca.csv","rb"),delimiter= ",",skiprows=0)
def pca(dataMat, r, autoset_r=False, autoset_rate=0.9):
"""
purpose: principal components analysis
"""
print("Start to do PCA...")
t1 = time.time()
meanVal = np.mean(dataMat, axis=0)
meanRemoved = dataMat - meanVal
# normData = meanRemoved / np.std(dataMat)
covMat = np.cov(meanRemoved, rowvar=0)
eigVals, eigVects = np.linalg.eig(np.mat(covMat))
eigValIndex = np.argsort(-eigVals)
if autoset_r:
r = autoset_eigNum(eigVals, autoset_rate)
print("autoset: take top {} of {} features".format(r, meanRemoved.shape[1]))
r_eigValIndex = eigValIndex[:r]
r_eigVect = eigVects[:, r_eigValIndex]
lowDDataMat = meanRemoved * r_eigVect
reconMat = (lowDDataMat * r_eigVect.T) + meanVal
t2 = time.time()
print("PCA takes %f seconds" %(t2-t1))
joblib.dump(r_eigVect, './pca_args_save/r_eigVect.eig')
joblib.dump(meanVal, './pca_args_save/meanVal.mean')
return lowDDataMat, reconMat
def autoset_eigNum(eigValues, rate=0.99):
eigValues_sorted = sorted(eigValues, reverse=True)
eigVals_total = eigValues.sum()
for i in range(1, len(eigValues_sorted)+1):
eigVals_sum = sum(eigValues_sorted[:i])
if eigVals_sum / eigVals_total >= rate:
break
return i
It seemed that NumPy has some problem parsing your index row to float.
Try setting skiprows = 1 in your np.readtxt command in order to skip the table header.

How can I populate a pandas DataFrame with the result of a Snowflake sql query?

Using the Python Connector I can query Snowflake:
import snowflake.connector
# Gets the version
ctx = snowflake.connector.connect(
user=USER,
password=PASSWORD,
account=ACCOUNT,
authenticator='https://XXXX.okta.com',
)
ctx.cursor().execute('USE warehouse MY_WH')
ctx.cursor().execute('USE MYDB.MYSCHEMA')
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
cur = ctx.cursor().execute(query)
The result is a snowflake.connector.cursor.SnowflakeCursor. How can I convert that to a pandas DataFrame?
You can use DataFrame.from_records() or pandas.read_sql() with snowflake-sqlalchemy. The snowflake-alchemy option has a simpler API
pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])
will return a DataFrame with proper column names taken from the SQL result. The iter(cur) will convert the cursor into an iterator and cur.description gives the names and types of the columns.
So the complete code will be
import snowflake.connector
import pandas as pd
# Gets the version
ctx = snowflake.connector.connect(
user=USER,
password=PASSWORD,
account=ACCOUNT,
authenticator='https://XXXX.okta.com',
)
ctx.cursor().execute('USE warehouse MY_WH')
ctx.cursor().execute('USE MYDB.MYSCHEMA')
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
cur = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])
If you prefer using pandas.read_sql then you can
import pandas as pd
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL
url = URL(
account = 'xxxx',
user = 'xxxx',
password = 'xxxx',
database = 'xxx',
schema = 'xxxx',
warehouse = 'xxx',
role='xxxxx',
authenticator='https://xxxxx.okta.com',
)
engine = create_engine(url)
connection = engine.connect()
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
df = pd.read_sql(query, connection)
There is now a method .fetch_pandas.all() for this, no need for SQL Alchemy anymore.
Note that you need to install snowflake.connector for pandas by doing this
pip install snowflake-connector-python[pandas]
Full documentation here
import pandas as pd
import snowflake.connector
conn = snowflake.connector.connect(
user="xxx",
password="xxx",
account="xxx",
warehouse="xxx",
database="MYDB",
schema="MYSCHEMA"
)
cur = conn.cursor()
# Execute a statement that will generate a result set.
sql = "select * from MYTABLE limit 10"
cur.execute(sql)
# Fetch the result set from the cursor and deliver it as the Pandas DataFrame.
df = cur.fetch_pandas_all()
I just want to leave here a small change made to the code to ensure that the columns have correct names (in my case the fetch call returned long column names that included information beyond the name itself). I leave it here, in case someone needs it:
import snowflake.connector
import pandas as pd
def fetch_pandas(cur, sql):
cur.execute(sql)
rows = 0
while True:
dat = cur.fetchmany(n)
if not dat:
break
a = [cursor.description[i][0] for i in range(len(cursor.description))]
df = pd.DataFrame(dat, columns=a)
rows += df.shape[0]
return df
n = 100000
conn = snowflake.connector.connect(
user='xxxxx',
password='yyyyyy',
account='zzzzz',
warehouse = 'wwwww',
database = 'mmmmmm',
schema = 'nnnnn'
)
cursor = conn.cursor()
fetch_pandas(cursor, 'select * from "mmmmmm"."wwwww"."table"')