I'm hoping to find a unique address identifier from a SQL table. The table on SQL server is too large for me to pull it into R. I have the connection set up, but I don't know how to query each row of the table that I have in R according to each column.
So I have one table loaded into R that has postcode, street, house number etc.
In SQL I have the same table, but with a unique identifier that is missing from what I have in R.
How can I pull just the unique identifiers into my table in R?
Below is one line of how it looks on SQL Server, in R I have the same table but without the UDPRN code. How can I get UDPRN into my R data for each row?
UDPRN
BUILDING_NUMBER
THROUGHFARE
POST_TOWN
POSTCODE
59
LONG ROAD
LONDON
N1 2GT
My apologize,R should:
library(tidyverse)
library(RMariaDB)
library(DBI)
conn <- dbConnect(RMariaDB::MariaDB(),
host = 'xxx.xxx.xxx.xxx',
port = 3306,
user = 'xxxxxx',
password = 'xxxxxx',
dbname = 'high_school')
dbListTables(conn)
res <- dbSendQuery(conn, "SELECT * FROM exam WHERE stud_id = 21")
dbDisconnect(conn)
Just an example that I fetch one student record from MySQL database. which the stud_id is a unique key.
import pymysql
import pandas as pd
from pandas import Series
def conn_mysql(conn_arg, qry):
try:
conn = pymysql.connect(**conn_arg)
result = pd.read_sql(qry, conn)
return result
except ConnectionRefusedError:
print('connect to MySQL failed.')
finally:
conn.close()
def main():
conn_arg = {
'host': 'xxx.xxx.xxx.xxx',
'port': 3306,
'user': 'xxxxxx',
'password': 'xxxxxx',
'charset': 'utf8mb4',
'use_unicode': True
}
qry = ('''SELECT * FROM high_school.exam where stud_id = 21''')
result_set = conn_mysql(conn_arg, qry)
if __name__ == '__main__':
main()
Related
I have a postgresql table with this schema:
id terminated code
string boolean integer
I want to add values from a pandas dataframe using this code:
param_dic = {
"host" : "xxx",
"database" : "xxxx",
"user" : "xxxxx",
"password" : "xxxx"
}
def connect(params_dic):
""" Connect to the PostgreSQL database server """
conn = None
try:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params_dic)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
sys.exit(1)
return conn
conn = connect(param_dic)
def single_insert(conn, insert_req):
""" Execute a single INSERT request """
cursor = conn.cursor()
try:
cursor.execute(insert_req)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
cursor.close()
return 1
cursor.close()
and then I am using:
for i in df.index:
query = """
INSERT into status(id, terminated, code) values('%s','%s','%s');
""" % (df['id'], df['terminated'], df['code'])
single_insert(conn, query)
# Close the connection
conn.close()
But I am getting this error msg:
Error: invalid input syntax for type boolean: "0 1
16 1
28 1
44 1
51 1
..
1604 1
1615 1
Can anyone help me with this?
So, I was working on API with flask. The data is in DB2. I tried to connect with pyodbc as below
#app.route('/api/acrdkl/all', methods=['GET'])
def api_all():
conn = pyodbc.connect("DSN=AS400;UID=....;PWD=....")
cur = conn.cursor()
all_books = cur.execute(""" select trim(dkkdcb), trim(dkkdps), trim(dkcob), trim(dkureg), trim(dkbktg), trim(dkblrg), trim(dkthrg)
from simdta.ACRDKL where dkkdcb=1402 and dkblrg=10 and dkthrg=2020""")
rows = cur.fetchall()
result = []
for dt in rows:
result.append([x for x in dt])
return jsonify(result)
Result are shown as JSON.
But when I tried to use some parameter as below
#app.route('/api/acrdkl/filter', methods=['GET'])
def api_filter():
dkkdcb = request.args.get('DKKDCB', 0)
dkblrg = request.args.get('DKBLRG', 0)
dkthrg = request.args.get('DKTHRG', 0)
query = """selecttrim(dkkdcb),trim(dkkdps),trim(dkcob),trim(dkureg),
trim(dkbktg), trim(dkblrg), trim(dkthrg)
from simdta.ACRDKL WHERE """
conn = pyodbc.connect("DSN=AS400;UID=.....;PWD=.....")
cur = conn.cursor()
rows = cur.execute(query, [int(dkkdcb), int(dkblrg), int(dkthrg)])
rows.fetchall()
print("rows 2 ", rows)
result = []
for dt in rows:
result.append([x for x in dt])
return jsonify(results)
And I go to this http://127.0.0.1:5000/api/acrdkl/filter?DKKDCB=1402&DKBLRG=10&DKTHRG=2020 and it throws error like this
pyodbc.DataError: ('22023', '[22023] [Microsoft][ODBC DB2 Driver]Data
exception - SQLSTATE 22023, SQLCODE -302. SQLSTATE: 22023, SQLCODE:
-302 (-302) (SQLExecDirectW)')
How do I get the desired result? Where is my mistake? Any help would be appreciate. Thanks
I don't see that you are accessing the request data provided by Flask, e.g.:
dkbrlg=request.args.get('dkbrlg',0)
i am trying to save a data frame which was first imported in pandas from postgresql as dfraw and then do some manipulation and create another dataframe as df and save it back in postgresql same database using sql alchemy. but when i am trying to save it back its giving error of 'DataFrame' objects are mutable, thus they cannot be hashed
PFB code below
import psycopg2
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy import create_engine
# connect the database to python
# Update connection string information
host = "something.something.azure.com"
dbname = "abcd"
user = "abcd"
password = "abcd"
sslmode = "require"
schema = 'xyz'
# Construct connection string
conn_string = "host={0} user={1} dbname={2} password={3} sslmode={4}".format(host, user, dbname, password, sslmode)
conn = psycopg2.connect(conn_string)
print("Connection established")
cursor = conn.cursor()
# Fetch all rows from table
cursor.execute("SELECT * FROM xyz.abc;")
rows = cursor.fetchall()
# Convert the tuples in dataframes
dfraw = pd.DataFrame(rows, columns =["ID","Timestamp","K","S","H 18","H 19","H 20","H 21","H 22","H 23","H 24","H 2zzz","H zzz4","H zzzzzz","H zzz6","H zzz7","H zzz8","H zzz9","H 60","H zzz0","H zzz2"])
dfraw[["S","H 18","H 19","H 20","H 21","H 22","H 23","H 24","H 2zzz","H zzz4","H zzzzzz","H zzz6","H zzz7","H zzz8","H zzz9","H 60","H zzz0","H zzz2"]] = dfraw[["S","H 18","H 19","H 20","H 21","H 22","H 23","H 24","H 2zzz","H zzz4","H zzzzzz","H zzz6","H zzz7","H zzz8","H zzz9","H 60","H zzz0","H zzz2"]].apply(pd.to_numeric)
dfraw[["Timestamp","K"]]=dfraw[["Timestamp","K"]].apply(pd.to_datetime)
# Creating temp files
temp1 = dfraw
dfraw = temp1
# creating some fucntions for data manipulation and imputations
def remZero(df,dropCol):
for k in df.drop(dropCol,axis=1):
if all(df[k] == 0):
continue
if any(df[k] == 0):
print(k)
df[k] = df[k].replace(to_replace=0, method='ffill')
return df
# Drop Columns function
dropCol = ['Timestamp','K','ID','H','C','S']
dropCol2 = ['Timestamp','K','ID','Shift']
df = remZero(dfraw,dropCol)
from sqlalchemy import create_engine
engine = create_engine('postgresql://abcd:abcd#something.something.azure.com:5432/abcd')
df.to_sql(name = df,
con=engine,
index = False,
if_exists= 'replace'
)
Error Message
Found basic error in the code I just missed putting the inverted comma before the data frame name to be published. The basic hygiene was missed
df.to_sql(name = "df",
con=engine,
index = False,
if_exists= 'replace'
)
I think I am stucked on an easy job as a beginner but have to ask this question.
My objective is to create another list from data obtained from a SQL Table.
This list will be created in acocrdance with the input data by user.
The SQL table has no problem, but I couldn't write on a txt file despite not receiving an error message.
Where is the problem?
import sqlite3
db = sqlite3.connect("air2.sql")
cs = db.cursor()
epcs = dict()
for i in range(19):
epcs[i] = 0
def addepc():
epcNo = input("EPC No:")
a = "SELECT * FROM 'epcval5' WHERE epc='EPC{}'".format(epcNo)
cs.execute(a)
data = cs.fetchone()
print("You have selected this EPC:")
for i in data:
print(i)
b = "SELECT value FROM 'epcval5' as float WHERE epc='EPC{}'".format(epcNo)
cs.execute(b)
epcv = cs.fetchone()
res = str('.'.join(str(ele) for ele in epcv))
print(type(res))
with open('epcs.txt', 'w') as f:
epcs[epcNo] = res
f.write(epcs[epcNo])
addepc()
print("Done! Press ENTER to continue")
input()
You could use pandas to write it to a csv file
with sqlite3.connect(DB_FILENAME) as con:
df = pd.read_sql_query('your sql query',con)
df.to_csv('file_name')
Using the Python Connector I can query Snowflake:
import snowflake.connector
# Gets the version
ctx = snowflake.connector.connect(
user=USER,
password=PASSWORD,
account=ACCOUNT,
authenticator='https://XXXX.okta.com',
)
ctx.cursor().execute('USE warehouse MY_WH')
ctx.cursor().execute('USE MYDB.MYSCHEMA')
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
cur = ctx.cursor().execute(query)
The result is a snowflake.connector.cursor.SnowflakeCursor. How can I convert that to a pandas DataFrame?
You can use DataFrame.from_records() or pandas.read_sql() with snowflake-sqlalchemy. The snowflake-alchemy option has a simpler API
pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])
will return a DataFrame with proper column names taken from the SQL result. The iter(cur) will convert the cursor into an iterator and cur.description gives the names and types of the columns.
So the complete code will be
import snowflake.connector
import pandas as pd
# Gets the version
ctx = snowflake.connector.connect(
user=USER,
password=PASSWORD,
account=ACCOUNT,
authenticator='https://XXXX.okta.com',
)
ctx.cursor().execute('USE warehouse MY_WH')
ctx.cursor().execute('USE MYDB.MYSCHEMA')
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
cur = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])
If you prefer using pandas.read_sql then you can
import pandas as pd
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL
url = URL(
account = 'xxxx',
user = 'xxxx',
password = 'xxxx',
database = 'xxx',
schema = 'xxxx',
warehouse = 'xxx',
role='xxxxx',
authenticator='https://xxxxx.okta.com',
)
engine = create_engine(url)
connection = engine.connect()
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
df = pd.read_sql(query, connection)
There is now a method .fetch_pandas.all() for this, no need for SQL Alchemy anymore.
Note that you need to install snowflake.connector for pandas by doing this
pip install snowflake-connector-python[pandas]
Full documentation here
import pandas as pd
import snowflake.connector
conn = snowflake.connector.connect(
user="xxx",
password="xxx",
account="xxx",
warehouse="xxx",
database="MYDB",
schema="MYSCHEMA"
)
cur = conn.cursor()
# Execute a statement that will generate a result set.
sql = "select * from MYTABLE limit 10"
cur.execute(sql)
# Fetch the result set from the cursor and deliver it as the Pandas DataFrame.
df = cur.fetch_pandas_all()
I just want to leave here a small change made to the code to ensure that the columns have correct names (in my case the fetch call returned long column names that included information beyond the name itself). I leave it here, in case someone needs it:
import snowflake.connector
import pandas as pd
def fetch_pandas(cur, sql):
cur.execute(sql)
rows = 0
while True:
dat = cur.fetchmany(n)
if not dat:
break
a = [cursor.description[i][0] for i in range(len(cursor.description))]
df = pd.DataFrame(dat, columns=a)
rows += df.shape[0]
return df
n = 100000
conn = snowflake.connector.connect(
user='xxxxx',
password='yyyyyy',
account='zzzzz',
warehouse = 'wwwww',
database = 'mmmmmm',
schema = 'nnnnn'
)
cursor = conn.cursor()
fetch_pandas(cursor, 'select * from "mmmmmm"."wwwww"."table"')