how to insert blob or clob column in informix - blob

I am new to informix database. I want to know how to insert blob and clob type of column in informix. I need sample query for those two type of column. If someone help, I will appreciate...

This is my Jython code that uses JDBC and prepared statements to test both INSERT and SELECT of blob from Informix database:
#!/usr/bin/env jython
# -*- coding: utf8 -*-
import time
from java.sql import DriverManager
from java.lang import Class
from java.io import FileInputStream
from java.io import ByteArrayOutputStream
Class.forName("com.informix.jdbc.IfxDriver")
"""
create table _blob_test_so (
id serial,
txt varchar(30),
column_blob blob
);
"""
def test_blob_insert(db):
print('inserting gif picture into blob table...')
blob = FileInputStream('snoopy_comics20121023.gif')
insert_stmt = db.prepareStatement("INSERT INTO _blob_test_so (txt, column_blob) VALUES (?, ?)")
insert_stmt.setString(1, 'test %s' % (time.strftime('%Y-%m-%d %H:%M:%S')))
insert_stmt.setBinaryStream(2, blob)
rec_cnt = insert_stmt.executeUpdate()
blob.close()
insert_stmt.close()
print('records changed: %d' % (rec_cnt))
def test_blob_select(db):
print('selecting data from blob table...')
pstm = db.prepareStatement("SELECT id, txt, column_blob FROM _blob_test_so")
rs = pstm.executeQuery()
while (rs.next()):
id = rs.getInt(1)
txt = rs.getString(2)
image_stream = rs.getBinaryStream(3)
fout = ByteArrayOutputStream()
while 1:
b = image_stream.read()
if b < 0:
break
fout.write(b)
arr = fout.toByteArray()
fname_out = 'test_%s.gif' % (id)
print('%d:%s: fname: %s %d [b]' % (id, txt, fname_out, len(arr)))
f = open(fname_out, 'wb')
f.write(arr)
f.close()
rs.close()
db = DriverManager.getConnection('jdbc:informix-sqli://test-informix:9088/test:informixserver=ol_testifx;DB_LOCALE=pl_PL.CP1250;CLIENT_LOCALE=pl_PL.CP1250;charSet=CP1250', 'user', 'passwd')
test_blob_insert(db)
test_blob_select(db)

Related

Creating sqlite db in Python 3 with constructed f string and receiving: sqlite3.OperationalError: near "(": syntax error

I am trying to create a sqlite3 db table using a constructed f string in python 3, however I am receiving the below error:
sqlite3.OperationalError: near "(": syntax error
I had hoped that I wouldn't need to ask here for a syntax error but I have been searching on stackoverflow as well as generally online to identify the issue with no success.
I have compared the code to other samples and equally do not see any difference to the construction, except for that it doesn't appear to be common to use f strings.
I have read the pros/cons of passing parameters and would prefer this f string unless it is the root cause.
I expect the issue might be obvious, however any pointers would be greatly appreciated.
Below is the full code:
import sqlite3
import pandas as pd
db_path = [PATH TO DATABASE]
db_table_name = [TABLE NAME]
header_source = [PATH TO .XLSX]
def ReadHeaders():
df = pd.read_excel(header_source)
col_list = list(df.columns.values)
prep_col_list = [item.replace(" ", "_") for item in col_list]
col_string = " TEXT, _".join(prep_col_list)
final_col_string = col_string.replace("Primary_ID TEXT", "Primary_ID PRIMARY KEY")
return final_col_string
def CreateSQLdb():
cols = ReadHeaders()
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute(f""" CREATE TABLE IF NOT EXISTS {db_table_name} ({cols}) """)
conn.commit()
conn.close()
A sample of the string that is created for the table headers is:
_link TEXT, _Primary_ID PRIMARY KEY, _Status_Description TEXT, _Price_List_Status TEXT, _Brand TEXT, _36_Character_Description TEXT
Solved
After breaking everything down, the root cause was the constructed string. I was able to identify it when trying to export the constructed string to a .txt file and received a unicode error.
Code before:
return final_col_string
Code after:
return final_col_string.encode(encoding="utf-8")
I also added a simple check of the table info for confirmation
def ShowTable(c):
c.execute(f"PRAGMA table_info({db_table_name})")
print (c.fetchall())
Complete code encase anyone else comes across this issue:
import sqlite3
import pandas as pd
db_path = [PATH TO DATABASE]
db_table_name = [TABLE NAME]
header_source = [PATH TO .XLSX]
def ReadHeaders():
df = pd.read_excel(header_source)
col_list = list(df.columns.values)
prep_col_list = [item.replace(" ", "_") for item in col_list]
col_string = " TEXT, _".join(prep_col_list)
final_col_string = col_string.replace("Primary_ID TEXT", "Primary_ID PRIMARY KEY")
return final_col_string.encode(encoding="utf-8")
def CreateSQLdb():
cols = ReadHeaders()
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute(f""" CREATE TABLE IF NOT EXISTS {db_table_name} ({cols}) """)
conn.commit()
conn.close()
def ShowTable(c):
c.execute(f"PRAGMA table_info({db_table_name})")
print (c.fetchall())
if __name__ == "__main__":
CreateSQLdb()

Python Dbf append to memory indexed table fails

I'm using Python dbf-0.99.1 library from Ethan Furman. This approach to add record to table fails:
tab = dbf.Table( "MYTABLE" )
tab.open(mode=dbf.READ_WRITE)
idx = tab.create_index(lambda rec: (rec.id if not is_deleted(rec) else DoNotIndex ) ) # without this, append works
rec = { "id":id, "col2": val2 } # some values, id is numeric and is not None
tab.append( rec ) # fails here
My table contains various character and numeric columns. This is just an example. The exceptions is:
line 5959, in append
newrecord = Record(recnum=header.record_count, layout=meta, kamikaze=kamikaze)
line 3102, in __new__
record._update_disk()
line 3438, in _update_disk
index(self)
line 7550, in __call__
vindex = bisect_right(self._values, key)
TypeError: '<' not supported between instances of 'NoneType' and 'int'
Any help appreciated. Thanks.
EDIT: Here is testing script
import dbf
from dbf import is_deleted, DoNotIndex
tab = dbf.Table('temptable', "ID N(12,0)" )
tab.open(mode=dbf.READ_WRITE)
rc = { "id":1 }
tab.append( rc ) # need some data without index first
idx = tab.create_index(lambda rec: (rec.id if not is_deleted(rec) else DoNotIndex ) )
rc = { "id":2 }
tab.append( rc ) # fails here

Python3: Can't write to text file with obtained data from SQL table

I think I am stucked on an easy job as a beginner but have to ask this question.
My objective is to create another list from data obtained from a SQL Table.
This list will be created in acocrdance with the input data by user.
The SQL table has no problem, but I couldn't write on a txt file despite not receiving an error message.
Where is the problem?
import sqlite3
db = sqlite3.connect("air2.sql")
cs = db.cursor()
epcs = dict()
for i in range(19):
epcs[i] = 0
def addepc():
epcNo = input("EPC No:")
a = "SELECT * FROM 'epcval5' WHERE epc='EPC{}'".format(epcNo)
cs.execute(a)
data = cs.fetchone()
print("You have selected this EPC:")
for i in data:
print(i)
b = "SELECT value FROM 'epcval5' as float WHERE epc='EPC{}'".format(epcNo)
cs.execute(b)
epcv = cs.fetchone()
res = str('.'.join(str(ele) for ele in epcv))
print(type(res))
with open('epcs.txt', 'w') as f:
epcs[epcNo] = res
f.write(epcs[epcNo])
addepc()
print("Done! Press ENTER to continue")
input()
You could use pandas to write it to a csv file
with sqlite3.connect(DB_FILENAME) as con:
df = pd.read_sql_query('your sql query',con)
df.to_csv('file_name')

How can I populate a pandas DataFrame with the result of a Snowflake sql query?

Using the Python Connector I can query Snowflake:
import snowflake.connector
# Gets the version
ctx = snowflake.connector.connect(
user=USER,
password=PASSWORD,
account=ACCOUNT,
authenticator='https://XXXX.okta.com',
)
ctx.cursor().execute('USE warehouse MY_WH')
ctx.cursor().execute('USE MYDB.MYSCHEMA')
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
cur = ctx.cursor().execute(query)
The result is a snowflake.connector.cursor.SnowflakeCursor. How can I convert that to a pandas DataFrame?
You can use DataFrame.from_records() or pandas.read_sql() with snowflake-sqlalchemy. The snowflake-alchemy option has a simpler API
pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])
will return a DataFrame with proper column names taken from the SQL result. The iter(cur) will convert the cursor into an iterator and cur.description gives the names and types of the columns.
So the complete code will be
import snowflake.connector
import pandas as pd
# Gets the version
ctx = snowflake.connector.connect(
user=USER,
password=PASSWORD,
account=ACCOUNT,
authenticator='https://XXXX.okta.com',
)
ctx.cursor().execute('USE warehouse MY_WH')
ctx.cursor().execute('USE MYDB.MYSCHEMA')
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
cur = ctx.cursor().execute(query)
df = pd.DataFrame.from_records(iter(cur), columns=[x[0] for x in cur.description])
If you prefer using pandas.read_sql then you can
import pandas as pd
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL
url = URL(
account = 'xxxx',
user = 'xxxx',
password = 'xxxx',
database = 'xxx',
schema = 'xxxx',
warehouse = 'xxx',
role='xxxxx',
authenticator='https://xxxxx.okta.com',
)
engine = create_engine(url)
connection = engine.connect()
query = '''
select * from MYDB.MYSCHEMA.MYTABLE
LIMIT 10;
'''
df = pd.read_sql(query, connection)
There is now a method .fetch_pandas.all() for this, no need for SQL Alchemy anymore.
Note that you need to install snowflake.connector for pandas by doing this
pip install snowflake-connector-python[pandas]
Full documentation here
import pandas as pd
import snowflake.connector
conn = snowflake.connector.connect(
user="xxx",
password="xxx",
account="xxx",
warehouse="xxx",
database="MYDB",
schema="MYSCHEMA"
)
cur = conn.cursor()
# Execute a statement that will generate a result set.
sql = "select * from MYTABLE limit 10"
cur.execute(sql)
# Fetch the result set from the cursor and deliver it as the Pandas DataFrame.
df = cur.fetch_pandas_all()
I just want to leave here a small change made to the code to ensure that the columns have correct names (in my case the fetch call returned long column names that included information beyond the name itself). I leave it here, in case someone needs it:
import snowflake.connector
import pandas as pd
def fetch_pandas(cur, sql):
cur.execute(sql)
rows = 0
while True:
dat = cur.fetchmany(n)
if not dat:
break
a = [cursor.description[i][0] for i in range(len(cursor.description))]
df = pd.DataFrame(dat, columns=a)
rows += df.shape[0]
return df
n = 100000
conn = snowflake.connector.connect(
user='xxxxx',
password='yyyyyy',
account='zzzzz',
warehouse = 'wwwww',
database = 'mmmmmm',
schema = 'nnnnn'
)
cursor = conn.cursor()
fetch_pandas(cursor, 'select * from "mmmmmm"."wwwww"."table"')

I just want to load 5GB from MySql into BigQuery

Long time no see. I'd want to get 5GB of data from MySql into BigQuery. My best bet seems to be some sort of CSV export / import. Which doesn't work for various reasons, see:
agile-coral-830:splitpapers1501200518aa150120052659
agile-coral-830:splitpapers1501200545aa150120055302
agile-coral-830:splitpapers1501200556aa150120060231
This is likely because I don't have the right MySql incantation able to generate perfect CSV in accordance with RFC 4180. However, instead of arguing RFC 4180 minutia, this whole load business could be solved in five minutes by supporting customizable multi-character field separators and multi-character line separators. I'm pretty sure my data doesn't contain either ### nor ###, so the following would work like a charm:
mysql> select * from $TABLE_NAME
into outfile '$DATA.csv'
fields terminated by '###'
enclosed by ''
lines terminated by '###'
$ bq load --nosync -F '###' -E '###' $TABLE_NAME $DATA.csv $SCHEMA.json
Edit: Fields contain '\n', '\r', ',' and '"'. They also contain NULLs, which MySql represents as [escape]N, in the example "N. Sample row:
"10.1.1.1.1483","5","9074080","Candidate high myopia loci on chromosomes 18p and 12q do not play a major role in susceptibility to common myopia","Results
There was no strong evidence of linkage of common myopia to these candidate regions: all two-point and multipoint heterogeneity LOD scores were < 1.0 and non-parametric linkage p-values were > 0.01. However, one Amish family showed slight evidence of linkage (LOD>1.0) on 12q; another 3 Amish families each gave LOD >1.0 on 18p; and 3 Jewish families each gave LOD >1.0 on 12q.
Conclusions
Significant evidence of linkage (LOD> 3) of myopia was not found on chromosome 18p or 12q loci in these families. These results suggest that these loci do not play a major role in the causation of common myopia in our families studied.","2004","BMC MEDICAL GENETICS","JOURNAL","N,"5","20","","","","0","1","USER","2007-11-19 05:00:00","rep1","PDFLib TET","0","2009-05-24 20:33:12"
I found loading through a CSV very difficult. More restrictions and complications. I have been messing around this morning with moving data from MySQL to BigQuery.
Bellow is a Python script that will build the table decorator and stream the data directly into the BigQuery table.
My db is in the Cloud so you may need to change the connection string. Fill in the missing values for your particular situation then call it by:
SQLToBQBatch(tableName, limit)
I put the limit in to test with. For my final test I sent 999999999 for the limit and everything worked fine.
I would recommend using a backend module to run this over 5g.
Use "RowToJSON" to clean up and invalid characters (ie anything non utf8).
I haven't tested on 5gb but it was able to do 50k rows in about 20 seconds. The same load in CSV was over 2 minutes.
I wrote this to test things, so please excuse the bad codding practices and mini hacks. It works so feel free to clean it up for any production level work.
import MySQLdb
import logging
from apiclient.discovery import build
from oauth2client.appengine import AppAssertionCredentials
import httplib2
OAUTH_SCOPE = 'https://www.googleapis.com/auth/bigquery'
PROJECT_ID =
DATASET_ID =
TABLE_ID =
SQL_DATABASE_NAME =
SQL_DATABASE_DB =
SQL_USER =
SQL_PASS =
def Connect():
return MySQLdb.connect(unix_socket='/cloudsql/' + SQL_DATABASE_NAME, db=SQL_DATABASE_DB, user=SQL_USER, passwd=SQL_PASS)
def RowToJSON(cursor, row, fields):
newData = {}
for i, value in enumerate(row):
try:
if fields[i]["type"] == bqTypeDict["int"]:
value = int(value)
else:
value = float(value)
except:
if value is not None:
value = value.replace("\x92", "'") \
.replace("\x96", "'") \
.replace("\x93", '"') \
.replace("\x94", '"') \
.replace("\x97", '-') \
.replace("\xe9", 'e') \
.replace("\x91", "'") \
.replace("\x85", "...") \
.replace("\xb4", "'") \
.replace('"', '""')
newData[cursor.description[i][0]] = value
return newData
def GetBuilder():
return build('bigquery', 'v2',http = AppAssertionCredentials(scope=OAUTH_SCOPE).authorize(httplib2.Http()))
bqTypeDict = { 'int' : 'INTEGER',
'varchar' : 'STRING',
'double' : 'FLOAT',
'tinyint' : 'INTEGER',
'decimal' : 'FLOAT',
'text' : 'STRING',
'smallint' : 'INTEGER',
'char' : 'STRING',
'bigint' : 'INTEGER',
'float' : 'FLOAT',
'longtext' : 'STRING'
}
def BuildFeilds(table):
conn = Connect()
cursor = conn.cursor()
cursor.execute("DESCRIBE %s;" % table)
tableDecorator = cursor.fetchall()
fields = []
for col in tableDecorator:
field = {}
field["name"] = col[0]
colType = col[1].split("(")[0]
if colType not in bqTypeDict:
logging.warning("Unknown type detected, using string: %s", str(col[1]))
field["type"] = bqTypeDict.get(colType, "STRING")
if col[2] == "YES":
field["mode"] = "NULLABLE"
fields.append(field)
return fields
def SQLToBQBatch(table, limit=3000):
logging.info("****************************************************")
logging.info("Starting SQLToBQBatch. Got: Table: %s, Limit: %i" % (table, limit))
bqDest = GetBuilder()
fields = BuildFeilds(table)
try:
responce = bqDest.datasets().insert(projectId=PROJECT_ID, body={'datasetReference' :
{'datasetId' : DATASET_ID} }).execute()
logging.info("Added Dataset")
logging.info(responce)
except Exception, e:
logging.info(e)
if ("Already Exists: " in str(e)):
logging.info("Dataset already exists")
else:
logging.error("Error creating dataset: " + str(e), "Error")
try:
responce = bqDest.tables().insert(projectId=PROJECT_ID, datasetId=DATASET_ID, body={'tableReference' : {'projectId' : PROJECT_ID,
'datasetId' : DATASET_ID,
'tableId' : TABLE_ID},
'schema' : {'fields' : fields}}
).execute()
logging.info("Added Table")
logging.info(responce)
except Exception, e:
logging.info(e)
if ("Already Exists: " in str(e)):
logging.info("Table already exists")
else:
logging.error("Error creating table: " + str(e), "Error")
conn = Connect()
cursor = conn.cursor()
logging.info("Starting load loop")
count = -1
cur_pos = 0
total = 0
batch_size = 1000
while count != 0 and cur_pos < limit:
count = 0
if batch_size + cur_pos > limit:
batch_size = limit - cur_pos
sqlCommand = "SELECT * FROM %s LIMIT %i, %i" % (table, cur_pos, batch_size)
logging.info("Running: %s", sqlCommand)
cursor.execute(sqlCommand)
data = []
for _, row in enumerate(cursor.fetchall()):
data.append({"json": RowToJSON(cursor, row, fields)})
count += 1
logging.info("Read complete")
if count != 0:
logging.info("Sending request")
insertResponse = bqDest.tabledata().insertAll(
projectId=PROJECT_ID,
datasetId=DATASET_ID,
tableId=TABLE_ID,
body={"rows":data}).execute()
cur_pos += batch_size
total += count
logging.info("Done %i, Total: %i, Response: %s", count, total, insertResponse)
if "insertErrors" in insertResponse:
logging.error("Error inserting data index: %i", insertResponse["insertErrors"]["index"])
for error in insertResponse["insertErrors"]["errors"]:
logging.error(error)
else:
logging.info("No more rows")
• Generate google service account key
o IAM & Admin > Service account > create_Service_account
o Once created then create key , download and save It to the project folder on local machine – google_key.json
• Run the code in pycharm environment after installing the packages.
NOTE : The table data in mysql remains intact. Also , if one uses preview in BQ to see that you won’t see. Go to console and fire the query.
o CODE
o import MySQLdb
from google.cloud import bigquery
import mysql.connector
import logging
import os
from MySQLdb.converters import conversions
import click
import MySQLdb.cursors
from google.cloud.exceptions import ServiceUnavailable
import sys
bqTypeDict = {'int': 'INTEGER',
'varchar': 'STRING',
'double': 'FLOAT',
'tinyint': 'INTEGER',
'decimal': 'FLOAT',
'text': 'STRING',
'smallint': 'INTEGER',
'char': 'STRING',
'bigint': 'INTEGER',
'float': 'FLOAT',
'longtext': 'STRING',
'datetime': 'TIMESTAMP'
}
def conv_date_to_timestamp(str_date):
import time
import datetime
date_time = MySQLdb.times.DateTime_or_None(str_date)
unix_timestamp = (date_time - datetime.datetime(1970, 1, 1)).total_seconds()
return unix_timestamp
def Connect(host, database, user, password):
return mysql.connector.connect(host='',
port='',
database='recommendation_spark',
user='',
password='')
def BuildSchema(host, database, user, password, table):
logging.debug('build schema for table %s in database %s' % (table, database))
conn = Connect(host, database, user, password)
cursor = conn.cursor()
cursor.execute("DESCRIBE %s;" % table)
tableDecorator = cursor.fetchall()
schema = []
for col in tableDecorator:
colType = col[1].split("(")[0]
if colType not in bqTypeDict:
logging.warning("Unknown type detected, using string: %s", str(col[1]))
field_mode = "NULLABLE" if col[2] == "YES" else "REQUIRED"
field = bigquery.SchemaField(col[0], bqTypeDict.get(colType, "STRING"), mode=field_mode)
schema.append(field)
return tuple(schema)
def bq_load(table, data, max_retries=5):
logging.info("Sending request")
uploaded_successfully = False
num_tries = 0
while not uploaded_successfully and num_tries < max_retries:
try:
insertResponse = table.insert_data(data)
for row in insertResponse:
if 'errors' in row:
logging.error('not able to upload data: %s', row['errors'])
uploaded_successfully = True
except ServiceUnavailable as e:
num_tries += 1
logging.error('insert failed with exception trying again retry %d', num_tries)
except Exception as e:
num_tries += 1
logging.error('not able to upload data: %s', str(e))
#click.command()
#click.option('-h', '--host', default='tempus-qa.hashmapinc.com', help='MySQL hostname')
#click.option('-d', '--database', required=True, help='MySQL database')
#click.option('-u', '--user', default='root', help='MySQL user')
#click.option('-p', '--password', default='docker', help='MySQL password')
#click.option('-t', '--table', required=True, help='MySQL table')
#click.option('-i', '--projectid', required=True, help='Google BigQuery Project ID')
#click.option('-n', '--dataset', required=True, help='Google BigQuery Dataset name')
#click.option('-l', '--limit', default=0, help='max num of rows to load')
#click.option('-s', '--batch_size', default=1000, help='max num of rows to load')
#click.option('-k', '--key', default='key.json',help='Location of google service account key (relative to current working dir)')
#click.option('-v', '--verbose', default=0, count=True, help='verbose')
def SQLToBQBatch(host, database, user, password, table, projectid, dataset, limit, batch_size, key, verbose):
# set to max verbose level
verbose = verbose if verbose < 3 else 3
loglevel = logging.ERROR - (10 * verbose)
logging.basicConfig(level=loglevel)
logging.info("Starting SQLToBQBatch. Got: Table: %s, Limit: %i", table, limit)
## set env key to authenticate application
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(os.getcwd(), key)
print('file found')
# Instantiates a client
bigquery_client = bigquery.Client()
print('Project id created')
try:
bq_dataset = bigquery_client.dataset(dataset)
bq_dataset.create()
logging.info("Added Dataset")
except Exception as e:
if ("Already Exists: " in str(e)):
logging.info("Dataset already exists")
else:
logging.error("Error creating dataset: %s Error", str(e))
bq_table = bq_dataset.table(table)
bq_table.schema = BuildSchema(host, database, user, password, table)
print('Creating schema using build schema')
bq_table.create()
logging.info("Added Table %s", table)
conn = Connect(host, database, user, password)
cursor = conn.cursor()
logging.info("Starting load loop")
cursor.execute("SELECT * FROM %s" % (table))
cur_batch = []
count = 0
for row in cursor:
count += 1
if limit != 0 and count >= limit:
logging.info("limit of %d rows reached", limit)
break
cur_batch.append(row)
if count % batch_size == 0 and count != 0:
bq_load(bq_table, cur_batch)
cur_batch = []
logging.info("processed %i rows", count)
# send last elements
bq_load(bq_table, cur_batch)
logging.info("Finished (%i total)", count)
print("table created")
if __name__ == '__main__':
# run the command
SQLToBQBatch()
o Command to run the file : python mysql_to_bq.py -d 'recommendation_spark' -t temp_market_store -i inductive-cocoa-250507 -n practice123 -k key.json