Inserting values into postgresql table error using psycopg2 - pandas

I have a postgresql table with this schema:
id terminated code
string boolean integer
I want to add values from a pandas dataframe using this code:
param_dic = {
"host" : "xxx",
"database" : "xxxx",
"user" : "xxxxx",
"password" : "xxxx"
}
def connect(params_dic):
""" Connect to the PostgreSQL database server """
conn = None
try:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(**params_dic)
except (Exception, psycopg2.DatabaseError) as error:
print(error)
sys.exit(1)
return conn
conn = connect(param_dic)
def single_insert(conn, insert_req):
""" Execute a single INSERT request """
cursor = conn.cursor()
try:
cursor.execute(insert_req)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print("Error: %s" % error)
conn.rollback()
cursor.close()
return 1
cursor.close()
and then I am using:
for i in df.index:
query = """
INSERT into status(id, terminated, code) values('%s','%s','%s');
""" % (df['id'], df['terminated'], df['code'])
single_insert(conn, query)
# Close the connection
conn.close()
But I am getting this error msg:
Error: invalid input syntax for type boolean: "0 1
16 1
28 1
44 1
51 1
..
1604 1
1615 1
Can anyone help me with this?

Related

Loop a SQL query in R

I'm hoping to find a unique address identifier from a SQL table. The table on SQL server is too large for me to pull it into R. I have the connection set up, but I don't know how to query each row of the table that I have in R according to each column.
So I have one table loaded into R that has postcode, street, house number etc.
In SQL I have the same table, but with a unique identifier that is missing from what I have in R.
How can I pull just the unique identifiers into my table in R?
Below is one line of how it looks on SQL Server, in R I have the same table but without the UDPRN code. How can I get UDPRN into my R data for each row?
UDPRN
BUILDING_NUMBER
THROUGHFARE
POST_TOWN
POSTCODE
59
LONG ROAD
LONDON
N1 2GT
My apologize,R should:
library(tidyverse)
library(RMariaDB)
library(DBI)
conn <- dbConnect(RMariaDB::MariaDB(),
host = 'xxx.xxx.xxx.xxx',
port = 3306,
user = 'xxxxxx',
password = 'xxxxxx',
dbname = 'high_school')
dbListTables(conn)
res <- dbSendQuery(conn, "SELECT * FROM exam WHERE stud_id = 21")
dbDisconnect(conn)
Just an example that I fetch one student record from MySQL database. which the stud_id is a unique key.
import pymysql
import pandas as pd
from pandas import Series
def conn_mysql(conn_arg, qry):
try:
conn = pymysql.connect(**conn_arg)
result = pd.read_sql(qry, conn)
return result
except ConnectionRefusedError:
print('connect to MySQL failed.')
finally:
conn.close()
def main():
conn_arg = {
'host': 'xxx.xxx.xxx.xxx',
'port': 3306,
'user': 'xxxxxx',
'password': 'xxxxxx',
'charset': 'utf8mb4',
'use_unicode': True
}
qry = ('''SELECT * FROM high_school.exam where stud_id = 21''')
result_set = conn_mysql(conn_arg, qry)
if __name__ == '__main__':
main()

Creating sqlite db in Python 3 with constructed f string and receiving: sqlite3.OperationalError: near "(": syntax error

I am trying to create a sqlite3 db table using a constructed f string in python 3, however I am receiving the below error:
sqlite3.OperationalError: near "(": syntax error
I had hoped that I wouldn't need to ask here for a syntax error but I have been searching on stackoverflow as well as generally online to identify the issue with no success.
I have compared the code to other samples and equally do not see any difference to the construction, except for that it doesn't appear to be common to use f strings.
I have read the pros/cons of passing parameters and would prefer this f string unless it is the root cause.
I expect the issue might be obvious, however any pointers would be greatly appreciated.
Below is the full code:
import sqlite3
import pandas as pd
db_path = [PATH TO DATABASE]
db_table_name = [TABLE NAME]
header_source = [PATH TO .XLSX]
def ReadHeaders():
df = pd.read_excel(header_source)
col_list = list(df.columns.values)
prep_col_list = [item.replace(" ", "_") for item in col_list]
col_string = " TEXT, _".join(prep_col_list)
final_col_string = col_string.replace("Primary_ID TEXT", "Primary_ID PRIMARY KEY")
return final_col_string
def CreateSQLdb():
cols = ReadHeaders()
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute(f""" CREATE TABLE IF NOT EXISTS {db_table_name} ({cols}) """)
conn.commit()
conn.close()
A sample of the string that is created for the table headers is:
_link TEXT, _Primary_ID PRIMARY KEY, _Status_Description TEXT, _Price_List_Status TEXT, _Brand TEXT, _36_Character_Description TEXT
Solved
After breaking everything down, the root cause was the constructed string. I was able to identify it when trying to export the constructed string to a .txt file and received a unicode error.
Code before:
return final_col_string
Code after:
return final_col_string.encode(encoding="utf-8")
I also added a simple check of the table info for confirmation
def ShowTable(c):
c.execute(f"PRAGMA table_info({db_table_name})")
print (c.fetchall())
Complete code encase anyone else comes across this issue:
import sqlite3
import pandas as pd
db_path = [PATH TO DATABASE]
db_table_name = [TABLE NAME]
header_source = [PATH TO .XLSX]
def ReadHeaders():
df = pd.read_excel(header_source)
col_list = list(df.columns.values)
prep_col_list = [item.replace(" ", "_") for item in col_list]
col_string = " TEXT, _".join(prep_col_list)
final_col_string = col_string.replace("Primary_ID TEXT", "Primary_ID PRIMARY KEY")
return final_col_string.encode(encoding="utf-8")
def CreateSQLdb():
cols = ReadHeaders()
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute(f""" CREATE TABLE IF NOT EXISTS {db_table_name} ({cols}) """)
conn.commit()
conn.close()
def ShowTable(c):
c.execute(f"PRAGMA table_info({db_table_name})")
print (c.fetchall())
if __name__ == "__main__":
CreateSQLdb()

Python Dbf append to memory indexed table fails

I'm using Python dbf-0.99.1 library from Ethan Furman. This approach to add record to table fails:
tab = dbf.Table( "MYTABLE" )
tab.open(mode=dbf.READ_WRITE)
idx = tab.create_index(lambda rec: (rec.id if not is_deleted(rec) else DoNotIndex ) ) # without this, append works
rec = { "id":id, "col2": val2 } # some values, id is numeric and is not None
tab.append( rec ) # fails here
My table contains various character and numeric columns. This is just an example. The exceptions is:
line 5959, in append
newrecord = Record(recnum=header.record_count, layout=meta, kamikaze=kamikaze)
line 3102, in __new__
record._update_disk()
line 3438, in _update_disk
index(self)
line 7550, in __call__
vindex = bisect_right(self._values, key)
TypeError: '<' not supported between instances of 'NoneType' and 'int'
Any help appreciated. Thanks.
EDIT: Here is testing script
import dbf
from dbf import is_deleted, DoNotIndex
tab = dbf.Table('temptable', "ID N(12,0)" )
tab.open(mode=dbf.READ_WRITE)
rc = { "id":1 }
tab.append( rc ) # need some data without index first
idx = tab.create_index(lambda rec: (rec.id if not is_deleted(rec) else DoNotIndex ) )
rc = { "id":2 }
tab.append( rc ) # fails here

Get data from DB2 with Flask Pyodbc with parameter

So, I was working on API with flask. The data is in DB2. I tried to connect with pyodbc as below
#app.route('/api/acrdkl/all', methods=['GET'])
def api_all():
conn = pyodbc.connect("DSN=AS400;UID=....;PWD=....")
cur = conn.cursor()
all_books = cur.execute(""" select trim(dkkdcb), trim(dkkdps), trim(dkcob), trim(dkureg), trim(dkbktg), trim(dkblrg), trim(dkthrg)
from simdta.ACRDKL where dkkdcb=1402 and dkblrg=10 and dkthrg=2020""")
rows = cur.fetchall()
result = []
for dt in rows:
result.append([x for x in dt])
return jsonify(result)
Result are shown as JSON.
But when I tried to use some parameter as below
#app.route('/api/acrdkl/filter', methods=['GET'])
def api_filter():
dkkdcb = request.args.get('DKKDCB', 0)
dkblrg = request.args.get('DKBLRG', 0)
dkthrg = request.args.get('DKTHRG', 0)
query = """selecttrim(dkkdcb),trim(dkkdps),trim(dkcob),trim(dkureg),
trim(dkbktg), trim(dkblrg), trim(dkthrg)
from simdta.ACRDKL WHERE """
conn = pyodbc.connect("DSN=AS400;UID=.....;PWD=.....")
cur = conn.cursor()
rows = cur.execute(query, [int(dkkdcb), int(dkblrg), int(dkthrg)])
rows.fetchall()
print("rows 2 ", rows)
result = []
for dt in rows:
result.append([x for x in dt])
return jsonify(results)
And I go to this http://127.0.0.1:5000/api/acrdkl/filter?DKKDCB=1402&DKBLRG=10&DKTHRG=2020 and it throws error like this
pyodbc.DataError: ('22023', '[22023] [Microsoft][ODBC DB2 Driver]Data
exception - SQLSTATE 22023, SQLCODE -302. SQLSTATE: 22023, SQLCODE:
-302 (-302) (SQLExecDirectW)')
How do I get the desired result? Where is my mistake? Any help would be appreciate. Thanks
I don't see that you are accessing the request data provided by Flask, e.g.:
dkbrlg=request.args.get('dkbrlg',0)

I just want to load 5GB from MySql into BigQuery

Long time no see. I'd want to get 5GB of data from MySql into BigQuery. My best bet seems to be some sort of CSV export / import. Which doesn't work for various reasons, see:
agile-coral-830:splitpapers1501200518aa150120052659
agile-coral-830:splitpapers1501200545aa150120055302
agile-coral-830:splitpapers1501200556aa150120060231
This is likely because I don't have the right MySql incantation able to generate perfect CSV in accordance with RFC 4180. However, instead of arguing RFC 4180 minutia, this whole load business could be solved in five minutes by supporting customizable multi-character field separators and multi-character line separators. I'm pretty sure my data doesn't contain either ### nor ###, so the following would work like a charm:
mysql> select * from $TABLE_NAME
into outfile '$DATA.csv'
fields terminated by '###'
enclosed by ''
lines terminated by '###'
$ bq load --nosync -F '###' -E '###' $TABLE_NAME $DATA.csv $SCHEMA.json
Edit: Fields contain '\n', '\r', ',' and '"'. They also contain NULLs, which MySql represents as [escape]N, in the example "N. Sample row:
"10.1.1.1.1483","5","9074080","Candidate high myopia loci on chromosomes 18p and 12q do not play a major role in susceptibility to common myopia","Results
There was no strong evidence of linkage of common myopia to these candidate regions: all two-point and multipoint heterogeneity LOD scores were < 1.0 and non-parametric linkage p-values were > 0.01. However, one Amish family showed slight evidence of linkage (LOD>1.0) on 12q; another 3 Amish families each gave LOD >1.0 on 18p; and 3 Jewish families each gave LOD >1.0 on 12q.
Conclusions
Significant evidence of linkage (LOD> 3) of myopia was not found on chromosome 18p or 12q loci in these families. These results suggest that these loci do not play a major role in the causation of common myopia in our families studied.","2004","BMC MEDICAL GENETICS","JOURNAL","N,"5","20","","","","0","1","USER","2007-11-19 05:00:00","rep1","PDFLib TET","0","2009-05-24 20:33:12"
I found loading through a CSV very difficult. More restrictions and complications. I have been messing around this morning with moving data from MySQL to BigQuery.
Bellow is a Python script that will build the table decorator and stream the data directly into the BigQuery table.
My db is in the Cloud so you may need to change the connection string. Fill in the missing values for your particular situation then call it by:
SQLToBQBatch(tableName, limit)
I put the limit in to test with. For my final test I sent 999999999 for the limit and everything worked fine.
I would recommend using a backend module to run this over 5g.
Use "RowToJSON" to clean up and invalid characters (ie anything non utf8).
I haven't tested on 5gb but it was able to do 50k rows in about 20 seconds. The same load in CSV was over 2 minutes.
I wrote this to test things, so please excuse the bad codding practices and mini hacks. It works so feel free to clean it up for any production level work.
import MySQLdb
import logging
from apiclient.discovery import build
from oauth2client.appengine import AppAssertionCredentials
import httplib2
OAUTH_SCOPE = 'https://www.googleapis.com/auth/bigquery'
PROJECT_ID =
DATASET_ID =
TABLE_ID =
SQL_DATABASE_NAME =
SQL_DATABASE_DB =
SQL_USER =
SQL_PASS =
def Connect():
return MySQLdb.connect(unix_socket='/cloudsql/' + SQL_DATABASE_NAME, db=SQL_DATABASE_DB, user=SQL_USER, passwd=SQL_PASS)
def RowToJSON(cursor, row, fields):
newData = {}
for i, value in enumerate(row):
try:
if fields[i]["type"] == bqTypeDict["int"]:
value = int(value)
else:
value = float(value)
except:
if value is not None:
value = value.replace("\x92", "'") \
.replace("\x96", "'") \
.replace("\x93", '"') \
.replace("\x94", '"') \
.replace("\x97", '-') \
.replace("\xe9", 'e') \
.replace("\x91", "'") \
.replace("\x85", "...") \
.replace("\xb4", "'") \
.replace('"', '""')
newData[cursor.description[i][0]] = value
return newData
def GetBuilder():
return build('bigquery', 'v2',http = AppAssertionCredentials(scope=OAUTH_SCOPE).authorize(httplib2.Http()))
bqTypeDict = { 'int' : 'INTEGER',
'varchar' : 'STRING',
'double' : 'FLOAT',
'tinyint' : 'INTEGER',
'decimal' : 'FLOAT',
'text' : 'STRING',
'smallint' : 'INTEGER',
'char' : 'STRING',
'bigint' : 'INTEGER',
'float' : 'FLOAT',
'longtext' : 'STRING'
}
def BuildFeilds(table):
conn = Connect()
cursor = conn.cursor()
cursor.execute("DESCRIBE %s;" % table)
tableDecorator = cursor.fetchall()
fields = []
for col in tableDecorator:
field = {}
field["name"] = col[0]
colType = col[1].split("(")[0]
if colType not in bqTypeDict:
logging.warning("Unknown type detected, using string: %s", str(col[1]))
field["type"] = bqTypeDict.get(colType, "STRING")
if col[2] == "YES":
field["mode"] = "NULLABLE"
fields.append(field)
return fields
def SQLToBQBatch(table, limit=3000):
logging.info("****************************************************")
logging.info("Starting SQLToBQBatch. Got: Table: %s, Limit: %i" % (table, limit))
bqDest = GetBuilder()
fields = BuildFeilds(table)
try:
responce = bqDest.datasets().insert(projectId=PROJECT_ID, body={'datasetReference' :
{'datasetId' : DATASET_ID} }).execute()
logging.info("Added Dataset")
logging.info(responce)
except Exception, e:
logging.info(e)
if ("Already Exists: " in str(e)):
logging.info("Dataset already exists")
else:
logging.error("Error creating dataset: " + str(e), "Error")
try:
responce = bqDest.tables().insert(projectId=PROJECT_ID, datasetId=DATASET_ID, body={'tableReference' : {'projectId' : PROJECT_ID,
'datasetId' : DATASET_ID,
'tableId' : TABLE_ID},
'schema' : {'fields' : fields}}
).execute()
logging.info("Added Table")
logging.info(responce)
except Exception, e:
logging.info(e)
if ("Already Exists: " in str(e)):
logging.info("Table already exists")
else:
logging.error("Error creating table: " + str(e), "Error")
conn = Connect()
cursor = conn.cursor()
logging.info("Starting load loop")
count = -1
cur_pos = 0
total = 0
batch_size = 1000
while count != 0 and cur_pos < limit:
count = 0
if batch_size + cur_pos > limit:
batch_size = limit - cur_pos
sqlCommand = "SELECT * FROM %s LIMIT %i, %i" % (table, cur_pos, batch_size)
logging.info("Running: %s", sqlCommand)
cursor.execute(sqlCommand)
data = []
for _, row in enumerate(cursor.fetchall()):
data.append({"json": RowToJSON(cursor, row, fields)})
count += 1
logging.info("Read complete")
if count != 0:
logging.info("Sending request")
insertResponse = bqDest.tabledata().insertAll(
projectId=PROJECT_ID,
datasetId=DATASET_ID,
tableId=TABLE_ID,
body={"rows":data}).execute()
cur_pos += batch_size
total += count
logging.info("Done %i, Total: %i, Response: %s", count, total, insertResponse)
if "insertErrors" in insertResponse:
logging.error("Error inserting data index: %i", insertResponse["insertErrors"]["index"])
for error in insertResponse["insertErrors"]["errors"]:
logging.error(error)
else:
logging.info("No more rows")
• Generate google service account key
o IAM & Admin > Service account > create_Service_account
o Once created then create key , download and save It to the project folder on local machine – google_key.json
• Run the code in pycharm environment after installing the packages.
NOTE : The table data in mysql remains intact. Also , if one uses preview in BQ to see that you won’t see. Go to console and fire the query.
o CODE
o import MySQLdb
from google.cloud import bigquery
import mysql.connector
import logging
import os
from MySQLdb.converters import conversions
import click
import MySQLdb.cursors
from google.cloud.exceptions import ServiceUnavailable
import sys
bqTypeDict = {'int': 'INTEGER',
'varchar': 'STRING',
'double': 'FLOAT',
'tinyint': 'INTEGER',
'decimal': 'FLOAT',
'text': 'STRING',
'smallint': 'INTEGER',
'char': 'STRING',
'bigint': 'INTEGER',
'float': 'FLOAT',
'longtext': 'STRING',
'datetime': 'TIMESTAMP'
}
def conv_date_to_timestamp(str_date):
import time
import datetime
date_time = MySQLdb.times.DateTime_or_None(str_date)
unix_timestamp = (date_time - datetime.datetime(1970, 1, 1)).total_seconds()
return unix_timestamp
def Connect(host, database, user, password):
return mysql.connector.connect(host='',
port='',
database='recommendation_spark',
user='',
password='')
def BuildSchema(host, database, user, password, table):
logging.debug('build schema for table %s in database %s' % (table, database))
conn = Connect(host, database, user, password)
cursor = conn.cursor()
cursor.execute("DESCRIBE %s;" % table)
tableDecorator = cursor.fetchall()
schema = []
for col in tableDecorator:
colType = col[1].split("(")[0]
if colType not in bqTypeDict:
logging.warning("Unknown type detected, using string: %s", str(col[1]))
field_mode = "NULLABLE" if col[2] == "YES" else "REQUIRED"
field = bigquery.SchemaField(col[0], bqTypeDict.get(colType, "STRING"), mode=field_mode)
schema.append(field)
return tuple(schema)
def bq_load(table, data, max_retries=5):
logging.info("Sending request")
uploaded_successfully = False
num_tries = 0
while not uploaded_successfully and num_tries < max_retries:
try:
insertResponse = table.insert_data(data)
for row in insertResponse:
if 'errors' in row:
logging.error('not able to upload data: %s', row['errors'])
uploaded_successfully = True
except ServiceUnavailable as e:
num_tries += 1
logging.error('insert failed with exception trying again retry %d', num_tries)
except Exception as e:
num_tries += 1
logging.error('not able to upload data: %s', str(e))
#click.command()
#click.option('-h', '--host', default='tempus-qa.hashmapinc.com', help='MySQL hostname')
#click.option('-d', '--database', required=True, help='MySQL database')
#click.option('-u', '--user', default='root', help='MySQL user')
#click.option('-p', '--password', default='docker', help='MySQL password')
#click.option('-t', '--table', required=True, help='MySQL table')
#click.option('-i', '--projectid', required=True, help='Google BigQuery Project ID')
#click.option('-n', '--dataset', required=True, help='Google BigQuery Dataset name')
#click.option('-l', '--limit', default=0, help='max num of rows to load')
#click.option('-s', '--batch_size', default=1000, help='max num of rows to load')
#click.option('-k', '--key', default='key.json',help='Location of google service account key (relative to current working dir)')
#click.option('-v', '--verbose', default=0, count=True, help='verbose')
def SQLToBQBatch(host, database, user, password, table, projectid, dataset, limit, batch_size, key, verbose):
# set to max verbose level
verbose = verbose if verbose < 3 else 3
loglevel = logging.ERROR - (10 * verbose)
logging.basicConfig(level=loglevel)
logging.info("Starting SQLToBQBatch. Got: Table: %s, Limit: %i", table, limit)
## set env key to authenticate application
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.getcwd(), key)
print('file found')
# Instantiates a client
bigquery_client = bigquery.Client()
print('Project id created')
try:
bq_dataset = bigquery_client.dataset(dataset)
bq_dataset.create()
logging.info("Added Dataset")
except Exception as e:
if ("Already Exists: " in str(e)):
logging.info("Dataset already exists")
else:
logging.error("Error creating dataset: %s Error", str(e))
bq_table = bq_dataset.table(table)
bq_table.schema = BuildSchema(host, database, user, password, table)
print('Creating schema using build schema')
bq_table.create()
logging.info("Added Table %s", table)
conn = Connect(host, database, user, password)
cursor = conn.cursor()
logging.info("Starting load loop")
cursor.execute("SELECT * FROM %s" % (table))
cur_batch = []
count = 0
for row in cursor:
count += 1
if limit != 0 and count >= limit:
logging.info("limit of %d rows reached", limit)
break
cur_batch.append(row)
if count % batch_size == 0 and count != 0:
bq_load(bq_table, cur_batch)
cur_batch = []
logging.info("processed %i rows", count)
# send last elements
bq_load(bq_table, cur_batch)
logging.info("Finished (%i total)", count)
print("table created")
if __name__ == '__main__':
# run the command
SQLToBQBatch()
o Command to run the file : python mysql_to_bq.py -d 'recommendation_spark' -t temp_market_store -i inductive-cocoa-250507 -n practice123 -k key.json