Creating sqlite db in Python 3 with constructed f string and receiving: sqlite3.OperationalError: near "(": syntax error - sql

I am trying to create a sqlite3 db table using a constructed f string in python 3, however I am receiving the below error:
sqlite3.OperationalError: near "(": syntax error
I had hoped that I wouldn't need to ask here for a syntax error but I have been searching on stackoverflow as well as generally online to identify the issue with no success.
I have compared the code to other samples and equally do not see any difference to the construction, except for that it doesn't appear to be common to use f strings.
I have read the pros/cons of passing parameters and would prefer this f string unless it is the root cause.
I expect the issue might be obvious, however any pointers would be greatly appreciated.
Below is the full code:
import sqlite3
import pandas as pd
db_path = [PATH TO DATABASE]
db_table_name = [TABLE NAME]
header_source = [PATH TO .XLSX]
def ReadHeaders():
df = pd.read_excel(header_source)
col_list = list(df.columns.values)
prep_col_list = [item.replace(" ", "_") for item in col_list]
col_string = " TEXT, _".join(prep_col_list)
final_col_string = col_string.replace("Primary_ID TEXT", "Primary_ID PRIMARY KEY")
return final_col_string
def CreateSQLdb():
cols = ReadHeaders()
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute(f""" CREATE TABLE IF NOT EXISTS {db_table_name} ({cols}) """)
conn.commit()
conn.close()
A sample of the string that is created for the table headers is:
_link TEXT, _Primary_ID PRIMARY KEY, _Status_Description TEXT, _Price_List_Status TEXT, _Brand TEXT, _36_Character_Description TEXT

Solved
After breaking everything down, the root cause was the constructed string. I was able to identify it when trying to export the constructed string to a .txt file and received a unicode error.
Code before:
return final_col_string
Code after:
return final_col_string.encode(encoding="utf-8")
I also added a simple check of the table info for confirmation
def ShowTable(c):
c.execute(f"PRAGMA table_info({db_table_name})")
print (c.fetchall())
Complete code encase anyone else comes across this issue:
import sqlite3
import pandas as pd
db_path = [PATH TO DATABASE]
db_table_name = [TABLE NAME]
header_source = [PATH TO .XLSX]
def ReadHeaders():
df = pd.read_excel(header_source)
col_list = list(df.columns.values)
prep_col_list = [item.replace(" ", "_") for item in col_list]
col_string = " TEXT, _".join(prep_col_list)
final_col_string = col_string.replace("Primary_ID TEXT", "Primary_ID PRIMARY KEY")
return final_col_string.encode(encoding="utf-8")
def CreateSQLdb():
cols = ReadHeaders()
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute(f""" CREATE TABLE IF NOT EXISTS {db_table_name} ({cols}) """)
conn.commit()
conn.close()
def ShowTable(c):
c.execute(f"PRAGMA table_info({db_table_name})")
print (c.fetchall())
if __name__ == "__main__":
CreateSQLdb()

Related

Table Does Not Exist Error When Using write_pandas

I have a very frustrating issue. At the bottom of this post is a function I created to (1) create a table in snowflake and (2) store a dataframe to that table.
The creation of the table is work fine. The issue is happening specifically with writepandas the code snippet:
write_pandas(
conn=conn,
df=df,
table_name=table_name,
database=database,
schema=schema
)
I keep getting an error that the table I created "doesn't exist" because the naming convention is off .. for instance in the database the table is created as "DATABASE"."SCHEMA"."TABLE" but the error message says 'DATABASE.SCHEMA."TABLE"' does not exist
I know this is a simple issue but Im stuck for the moment. Any help would be appreciated.
from datetime import datetime, timedelta, date
from airflow import DAG
from airflow.providers.snowflake.operators.snowflake import SnowflakeOperator
from sqlalchemy import create_engine
import requests
from pandas.io.json import json_normalize
import numpy as np
from sqlalchemy.types import Integer, Text, String, DateTime
from IPython.display import display, HTML
from flatten_json import flatten
from snowflake.connector import connect
from snowflake.connector.pandas_tools import write_pandas
from airflow.operators.python_operator import PythonOperator
import os
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
def create_store_snowflake(df,table):
#quick transforms
df = df.rename(columns=str.upper)
df.columns = df.columns.str.replace('[-,/]','')
#Define the table name, schema, and database you want to write to
#Note: the table, schema, and database need to already exist in Snowflake
#Define the table name, schema, and database you want to write to
table_name = table
schema = 'schema'
database = 'database'
#Connect to Snowflake using the required user
conn = connect(
user="user",
password="password",
account="account",
role="role",
database = "database",
schema = 'schema'
)
#reroute raw data to dataframe variable
dataframe = df
#Create the SQL statement to create or replace the table
create_tbl_statement = "CREATE OR REPLACE TABLE " + database + "." + schema + "." + table_name + " (\n"
# Loop through each column finding the datatype and adding it to the statement
for column in dataframe.columns:
if (
dataframe[column].dtype.name == "int"
or dataframe[column].dtype.name == "int64"
):
create_tbl_statement = create_tbl_statement + column + " int"
elif dataframe[column].dtype.name == "object":
create_tbl_statement = create_tbl_statement + column + " varchar(16777216)"
elif dataframe[column].dtype.name == "datetime64[ns]":
create_tbl_statement = create_tbl_statement + column + " datetime"
elif dataframe[column].dtype.name == "float64":
create_tbl_statement = create_tbl_statement + column + " float8"
elif dataframe[column].dtype.name == "bool":
create_tbl_statement = create_tbl_statement + column + " boolean"
else:
create_tbl_statement = create_tbl_statement + column + " varchar(16777216)"
# If column is not last column, add comma, else end sql-query
if dataframe[column].name != dataframe.columns[-1]:
create_tbl_statement = create_tbl_statement + ",\n"
else:
create_tbl_statement = create_tbl_statement + ")"
#Execute the SQL statement to create the table
conn.cursor().execute(create_tbl_statement)
print(f"{table_name} created!")
#write df to created table
write_pandas(
conn=conn,
df=df,
table_name=table_name,
database=database,
schema=schema
)
print(df.shape[0],f"rows written to {table_name} in Snowflake")
just had to make sure the tablename was CAPITALIZED as everything stored to Snowflake is apparently capitalized ::face-palm:: instead of create_store_snowflake(df,'mynewtable') it has to be create_store_snowflake(df,'MYNEWTABLE')
When the table identifier is wrapped with " during creation the followin rules applies:
create_tbl_statement= "CREATE OR REPLACE TABLE " + database + "." + schema + "." + table_name
Double-quoted Identifiers:
Delimited identifiers (i.e. identifiers enclosed in double quotes) are case-sensitive and can start with and contain any valid characters
Important
If an object is created using a double-quoted identifier, when referenced in a query or any other SQL statement, the identifier must be specified exactly as created, including the double quotes. Failure to include the quotes might result in an Object does not exist error (or similar type of error).

ProgrammingError when trying to skip duplicate data in postgres sql

PostGres SQL will not accept data which is in violation of primary key. To ignore the duplicate data, I have this code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo',start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(Symbol, Date, Volume, Open, Close, High, Low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
I am getting this error:
ProgrammingError: (psycopg2.errors.UndefinedColumn) column "symbol" of relation "stockprices" does not exist
LINE 2: (Symbol,Date, Volume, Open, Close, H...
^
[SQL: INSERT INTO stockprices
Could you please advise as to what does this error mean? I got rid of all the double quotes as advised in the comment.
I had used this code to create the table:
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['Symbol'] = symbol
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
create_price_table('amzn')
Also as was mentioned in the comment. I used this to check the table name:
SELECT table_name
FROM information_schema.tables
WHERE table_schema='public'
AND table_type='BASE TABLE';
Edit 1:
I changed the code as suggested in the comment, now the column name is in small case. Below is the code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['symbol'] = symbol
df = df.rename(columns= {'Open':'open'})
df = df.rename(columns= {'Close':'close'})
df = df.rename(columns= {'High':'high'})
df = df.rename(columns= {'Low':'low'})
df = df.rename(columns= {'Volume':'volume'})
df = df.rename(columns= {'Adj Close':'adj_close'})
df.index.name ='date'
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
# create_price_table('amzn')
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(symbol, date, volume, open, close, high, low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
This code however is producing a new error:
DataError: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for type bigint: "166.14999389648438"
LINE 4: ('aapl','2022-02-23 00:00:00','166.14999...
^
Per my comment you have two issues:
You are trying to INSERT a float value(166.14999389648438) into an integer field. First thing to figure out is why the mismatch? Do really want the database field to be an integer? Second thing is that trying to force a float into an integer will work if the value is being entered as a float/numeric:
select 166.14999389648438::bigint; 166
Though as you see it gets truncated.
It will not work if entered as a string:
ERROR: invalid input syntax for type bigint: "166.14999389648438"
Which is what you are doing. This leads to the second issue below.
You are not using proper Parameter passing as shown in the link. Where among other things is the warning:
Warning
Never, never, NEVER use Python string concatenation (+) or string parameters interpolation (%) to pass variables to a SQL query string. Not even at gunpoint.
For the purposes of this question the important part is that using parameter passing will result in proper type adaptation.

How to use Python dataframe and SQLAlchemy to create MySQL tables

I have to create few tables in database called 'data_immo'. To that purpose, I'm using data frames (each DataFrame corresponding to one table), MySql and SqlAlchemy.
Here is my code:
header_Bien_Immo = ['LotNb']
df_Bien_Immo = df1.loc[:, header_Bien_Immo]
df_Bien_Immo['IdLot'] = IdLot
df_Bien_Immo.index.names = ['IdBien']
def con_engine():url = "mysql+pymysql://{user}:{password}#localhost"
engine = create_engine(url.format(user='root', password='Fer458it'))
engine.execute("CREATE DATABASE IF NOT EXISTS data_immo")
return
def create_table(df):
df.to_sql(name='bien_immo', con=con_engine(), if_exists='append', index=False,
dtype={'LotNb': Integer,
'IdLot': Integer,
'IdBien': Integer})
return
con_engine()
create_table(df_Bien_Immo)'''
The execution of that code returns an error:
line 2214, in to_sql
raise ValueError(f"{col} ({my_type}) not a string")
ValueError: LotNb (<class 'sqlalchemy.sql.sqltypes.Integer'>) not a string.
Would you have any idea what the problem is ?
Thank you
con_engine() function provides elements to create connection and database itself. Then, to create table we have to provide engine with database name. This is what was missing.
def con_engine():
url = "mysql+pymysql://{user}:{password}#localhost"
engine = create_engine(url.format(user='root', password='Fer458it'))
engine.execute("CREATE DATABASE IF NOT EXISTS data_immo")
return
def create_table(df):
url = "mysql+pymysql://{user}:{password}#localhost/{db}"
engine = create_engine(url.format(user='root', password='Fer458it', db='data_immo'))
df.to_sql(name='Lot', con=engine, if_exists='append', index=True,
dtype={'LotNom': VARCHAR(10), 'LotType': Text, 'LotValeurFonciere': Integer, 'LotSurfBati': Integer,
'LotSurfCarrez': DECIMAL(6, 2), 'LotNbPieces': Integer, 'LotSuperJardin': Integer})
return
con_engine()
create_table(df_Lot)

Writing Data from pandas dataframe to PostgreSQL gives error of 'DataFrame' objects are mutable, thus they cannot be hashed

i am trying to save a data frame which was first imported in pandas from postgresql as dfraw and then do some manipulation and create another dataframe as df and save it back in postgresql same database using sql alchemy. but when i am trying to save it back its giving error of 'DataFrame' objects are mutable, thus they cannot be hashed
PFB code below
import psycopg2
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy import create_engine
# connect the database to python
# Update connection string information
host = "something.something.azure.com"
dbname = "abcd"
user = "abcd"
password = "abcd"
sslmode = "require"
schema = 'xyz'
# Construct connection string
conn_string = "host={0} user={1} dbname={2} password={3} sslmode={4}".format(host, user, dbname, password, sslmode)
conn = psycopg2.connect(conn_string)
print("Connection established")
cursor = conn.cursor()
# Fetch all rows from table
cursor.execute("SELECT * FROM xyz.abc;")
rows = cursor.fetchall()
# Convert the tuples in dataframes
dfraw = pd.DataFrame(rows, columns =["ID","Timestamp","K","S","H 18","H 19","H 20","H 21","H 22","H 23","H 24","H 2zzz","H zzz4","H zzzzzz","H zzz6","H zzz7","H zzz8","H zzz9","H 60","H zzz0","H zzz2"])
dfraw[["S","H 18","H 19","H 20","H 21","H 22","H 23","H 24","H 2zzz","H zzz4","H zzzzzz","H zzz6","H zzz7","H zzz8","H zzz9","H 60","H zzz0","H zzz2"]] = dfraw[["S","H 18","H 19","H 20","H 21","H 22","H 23","H 24","H 2zzz","H zzz4","H zzzzzz","H zzz6","H zzz7","H zzz8","H zzz9","H 60","H zzz0","H zzz2"]].apply(pd.to_numeric)
dfraw[["Timestamp","K"]]=dfraw[["Timestamp","K"]].apply(pd.to_datetime)
# Creating temp files
temp1 = dfraw
dfraw = temp1
# creating some fucntions for data manipulation and imputations
def remZero(df,dropCol):
for k in df.drop(dropCol,axis=1):
if all(df[k] == 0):
continue
if any(df[k] == 0):
print(k)
df[k] = df[k].replace(to_replace=0, method='ffill')
return df
# Drop Columns function
dropCol = ['Timestamp','K','ID','H','C','S']
dropCol2 = ['Timestamp','K','ID','Shift']
df = remZero(dfraw,dropCol)
from sqlalchemy import create_engine
engine = create_engine('postgresql://abcd:abcd#something.something.azure.com:5432/abcd')
df.to_sql(name = df,
con=engine,
index = False,
if_exists= 'replace'
)
Error Message
Found basic error in the code I just missed putting the inverted comma before the data frame name to be published. The basic hygiene was missed
df.to_sql(name = "df",
con=engine,
index = False,
if_exists= 'replace'
)

Python3: Can't write to text file with obtained data from SQL table

I think I am stucked on an easy job as a beginner but have to ask this question.
My objective is to create another list from data obtained from a SQL Table.
This list will be created in acocrdance with the input data by user.
The SQL table has no problem, but I couldn't write on a txt file despite not receiving an error message.
Where is the problem?
import sqlite3
db = sqlite3.connect("air2.sql")
cs = db.cursor()
epcs = dict()
for i in range(19):
epcs[i] = 0
def addepc():
epcNo = input("EPC No:")
a = "SELECT * FROM 'epcval5' WHERE epc='EPC{}'".format(epcNo)
cs.execute(a)
data = cs.fetchone()
print("You have selected this EPC:")
for i in data:
print(i)
b = "SELECT value FROM 'epcval5' as float WHERE epc='EPC{}'".format(epcNo)
cs.execute(b)
epcv = cs.fetchone()
res = str('.'.join(str(ele) for ele in epcv))
print(type(res))
with open('epcs.txt', 'w') as f:
epcs[epcNo] = res
f.write(epcs[epcNo])
addepc()
print("Done! Press ENTER to continue")
input()
You could use pandas to write it to a csv file
with sqlite3.connect(DB_FILENAME) as con:
df = pd.read_sql_query('your sql query',con)
df.to_csv('file_name')