import and mapping csv to sqlalchemy dynamically - pandas

I am creating database using sqlalchemy in flask app and filling the database with existing CSV with selected columns from it so I use pandas here is my classes creation
I need to add company objects and commit them in dynamic way , but that way does not work , the csv file is not small about 20,000 record I can not add them manually ,so any suggestions to add them in dynamic way?
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine
from flask import jsonify
Base = declarative_base()
class Company(Base):
__tablename__ = 'forbesglobal2000_2016'
id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
profits = Column(String(250), nullable=False)
marketValue = Column(String(250), nullable=False)
revenue = Column(String(250), nullable=False)
industry = Column(String(250), nullable=False)
class SIC(Base):
__tablename__ = "SIC"
id = Column(Integer, primary_key=True)
SIC = Column(Integer, nullable=False)
Industry_name = Column(String(250),ForeignKey('forbesglobal2000_2016.industry'))
Indusrty = relationship(Company)
# configuration part
engine = create_engine('sqlite:///CompainesData.db')
Base.metadata.create_all(engine)
import sqlalchemy
from sqlalchemy.orm import sessionmaker
from database_setup import *
import pandas as pd
# opening connection with database
engine = create_engine('sqlite:///CompainesData.db')
Base.metadata.bind = engine
# Clear database
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
DBSession = sessionmaker(bind=engine)
session = DBSession()
df = pd.read_csv("forbesglobal2000-2016.csv")
df1 = pd.read_csv("SIC.csv")
# market valuation, revenue, profits and industry
profit_column = df.profits
name_column = df.name
industry_column = df.industry
revenue_column = df.revenue
marketvalue_column = df.marketValue
industry_column_f = df1.Description
SIC_column = df1.SICCode
company = []
i = 1
while i < name_column.__len__():
company[i] = Company(name = name_column[i] , industry=industry_column[i], marketValue = marketvalue_column[i] , profits = profit_column[i] ,
revenue = revenue_column[i] )
i = i +1
for i in company:
session.add(i)
session.commit()
# printing test
com = session.query(Company).all()
for f in com:
print(f.name)
print(f.industry)
print(f.profits)
print(f.revenue)
print(f.marketValue)

If you want to load data from csv files to database just use df.to_sql() function it allows you to do that. For example :
df.to_sql(con=engine, name=airlines.__tablename__, if_exists='replace',index=False)
Pay attention to index=False, it's used to ignore pandas id column.

I think the index will start at 0 and not 1:
i = 1
should be
i = 0
can you try that?

Related

delete a record in flask sql_alchemy [duplicate]

I'm trying to get a server for an app working, but I'm getting an error upon login:
[!] Object '<User at 0x7f12bc185a90>' is already attached to session '2' (this is '3')
It seems the session I'm adding is already on the database. This is the snippet of code that is causing the problem:
#app.route('/login', methods=['POST'])
def login():
u = User.query.filter(User.username == request.form["username"]).first()
if not u or u.password != request.form["password"]:
return error("E1")
s = Session.get_by_user(u)
if s is not None:
db_session.delete(s)
db_session.commit()
print db_session.execute("SELECT * FROM sessions").fetchall()
s = Session(u)
db_session.add(s)
db_session.commit()
return jsonify(s.values)
As you can see, I'm printing the content from the sessions table before trying to add anything, and it is empty! ([])
What else could be causing this?
Here is the 'Session' implementation:
class Session(Base):
__tablename__ = "sessions"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey('users.id'), unique=True)
user = relationship(User)
key = Column(String(50), unique=True)
created = Column(DateTime)
def __init__(self, user=None):
self.user = user
self.key = base64.encodestring(os.urandom(24)).strip()
self.created = datetime.now()
def __repr__(self):
return '<Session %r>' % (self.key)
#property
def values(self):
return {"username" : self.user.username,
"key" : self.key,
"created" : str(self.created),
}
#classmethod
def get_by_key(cls, key):
s = cls.query.filter(cls.key == key).first()
#print datetime.now() - s.created
if s and datetime.now() - s.created > settings.SESSION_LIFETIME:
s = None
return s
#classmethod
def get_by_user(cls, user):
s = cls.query.filter(cls.user == user).first()
if s and datetime.now() - s.created > settings.SESSION_LIFETIME:
s.query.delete()
db_session.commit()
s = None
return s
As #marcinkuzminski mentioned, you can't add an object that is already attached to another session. Just pulling in the original session from the object with object_session() is risky, though, if you aren't sure that session originated in the same thread context you're currently operating in. A thread-safe method is to use merge():
local_object = db_session.merge(original_object)
db_session.add(local_object)
db_session.commit()
Object you're trying to modify is already attached to another session.
Maybe you have wrong imports, and db_session is a new instance.
A good workaround to this is to extract the current bound session and use it:
Instead of:
db_session.add(s)
Do:
current_db_sessions = db_session.object_session(s)
current_db_sessions.add(s)
This db session issue will arise if you are having server.py and model.py importing each other
server.py
from flask import Flask
import os
import models as appmod #################### importing models here in server.py<----------
app = Flask(__name__) # L1
app.config.from_object(os.environ['APP_SETTINGS']) # L2
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False # L3
database = SQLAlchemy(app) # L4
db = database # L5
#app.route('/item_delete/<id>', methods=['DELETE'])
def remove_method(id = None):
data_rec = appmod.Employee.query.get(id)
db.session.delete(data_rec)
db.session.commit()
return "DELETE"
if __name__ == '__main__':
app.run(port=5000, host='0.0.0.0',debug=True,threaded=True)
models.py
from server import db #################### importing server in models.py here <------------
from sqlalchemy.dialects.mysql import JSON
class Employee(db.Model):
__tablename__ = 'employe_flask'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(128))
datetime = db.Column(db.DateTime)
designation = db.Column(db.String(128))
def __init__(self, name, datetime, designation):
self.name = name
self.datetime = datetime
self.designation = designation
#staticmethod
def delete_rec(data_rec):
db.session.delete(data_rec)#.delete
db.session.commit()
def __repr__(self):
record = {"name":self.name,"date":self.datetime.ctime(),"designation":self.designation}.__str__()
return record
Remove the line L1 to L5 from server.py and place it in common file like settings.py
and import 'app' and 'db' to server.py and import db in models.py
like this files below
server.py
from flask import Flask
import os
import models as appmod
from settings import app, db
#app.route('/item_delete/<id>', methods=['DELETE'])
def remove_method(id = None):
data_rec = appmod.Employee.query.get(id)
db.session.delete(data_rec)
db.session.commit()
return "DELETE"
if __name__ == '__main__':
app.run(port=5000, host='0.0.0.0',debug=True,threaded=True)
settings.py
import os
from flask import Flask
from flask.ext.sqlalchemy import SQLAlchemy
app = Flask(__name__) # L1
app.config.from_object(os.environ['APP_SETTINGS']) # L2
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False # L3
database = SQLAlchemy(app) # L4
db = database # L5
models.py
from settings import db
from sqlalchemy.dialects.mysql import JSON
class Employee(db.Model):
__tablename__ = 'employe_flask'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(128))
datetime = db.Column(db.DateTime)
designation = db.Column(db.String(128))
def __init__(self, name, datetime, designation):
self.name = name
self.datetime = datetime
self.designation = designation
#staticmethod
def delete_rec(data_rec):
db.session.delete(data_rec)#.delete
db.session.commit()
def __repr__(self):
record = {"name":self.name,"date":self.datetime.ctime(),"designation":self.designation}.__str__()
return record
This error means the record you are handling is attached to 2 different session(db)!
One of the reasons is that you may define your model with one db = SQLAlchemy(app) and add/insert/modify the database with another!
My solution is UNIFORMING THE DB!
try this:
u = db.session.query(User).filter(User.username == request.form["username"]).first()
Instead of this:
u = User.query.filter(User.username == request.form["username"]).first()
I had this problem too.
I created a test_file.py and added this code:
from app import app
from models import Tovar
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy(app)
tovardel = Tovar.query.filter(Tovar.region == 1 and Tovar.price == 12).first()
db.session.delete(tovardel)
tovar = Tovar.query.filter(Tovar.region == 1 and Tovar.price == 12).first()
print(tovar.description)
and when I ran the code I got this error:
Object '<Tovar at 0x7f09cbf74208>' is already attached to session '1' (this is '2')
PROBLEM SOLVING:
If you have db = SQLAlchemy(app) in, for example, text_file.py, and in app.py, you get this problem all time. You should del db = SQLAlchemy(app), and import db from app from app import db
I faced the same issue. I was defining models in a separate file and I had to call SQLAlchemy twice. That's why there were two different sessions were running.
I solved this by doing following:
In case you are trying to remove an object from db:
Just create the removeObject function inside the model

IntegrityError: datatype mismatch when trying to import a row from a dataframe into a sqlite database using CRUD

First of all, this is my first time working with sqlite and sqlalchemy, so I'm quite a noob. I've been encountering an IntegrityError: datatype mismatch when trying to import a row from a dataframe into a sqlite database using CRUD. This is the code I've been using.
import sqlite3
import csv
import pandas as pd
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Enum, Float, ForeignKey
engine = create_engine('sqlite:///records.sqlite', echo = True)
meta = MetaData()
Base = declarative_base()
Session = sessionmaker(bind=engine)
session = Session()
df = pd.read_csv('records.csv')
class Records(Base):
__tablename__ = 'Records'
id = Column(Integer, primary_key = True)
description = Column(String)
userid = Column(Integer)
money = Column(Integer)
team = Column(Integer)
points = Column(Integer)
state = Column(String)
Base.metadata.create_all(engine)
row = Records(id = df['id'].iloc[0],
description = df['description'].iloc[0],
userid = df['userid'].iloc[0],
money = df['money'].iloc[0],
team = df['team'].iloc[0],
points = df['points'].iloc[0],
state = df['state'].iloc[0])
session.add(row)
session.commit()
It always ends with an IntegrityError: datatype mismatch. I've checked the type of the data by printing them out and I'm getting <class 'str'> for both 'description' and 'state, and <class 'numpy.int64'> for the rest, just as intended, so I'm not sure what the problem is here. Please advise, thank you.

SqlAlchemy outerjoin query problem filtering [duplicate]

This question already has answers here:
how left outer join in sqlalchemy?
(1 answer)
sqlalchemy filter children in query, but not parent
(2 answers)
Closed 2 years ago.
I'm using sqlalchemy, and I have problem with this specific query.
I have data_template, devices_data, and device. Each device have value for each data in data_template. Those values are stored in devices_data. I want to list data_template for one device with values that this device has. If there is no value for some data_template, show None.
It has something to do with outerjoin. Here is my model:
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class DataTemplate(Base):
__tablename__ = 'data_template'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<DataTemplate(name={self.name})>"
class Device(Base):
__tablename__ = 'device'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<Device(name={self.name})>"
class DeviceData(Base):
__tablename__ = 'device_data'
id = Column(Integer, primary_key=True)
value = Column(Integer, nullable=False)
data_name_id = Column(Integer, ForeignKey(DataTemplate.id), nullable=False)
device_id = Column(Integer, ForeignKey(Device.id), nullable=False)
data_template = relationship('DataTemplate', backref='device_data')
device = relationship('Device', backref='device_data')
def __repr__(self):
return f"<DeviceData(device={self.device.name}, data_template={self.data_template.name}, value={self.value})>"
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
Session.configure(bind=engine)
session = Session()
dev1 = Device(name='Receiver')
dev2 = Device(name='TV')
dat_temp1 = DataTemplate(name="height")
dat_temp2 = DataTemplate(name="width")
dat_temp3 = DataTemplate(name="length")
session.add_all([dev1, dev2, dat_temp1, dat_temp2, dat_temp3])
dd1 = DeviceData(value=100, data_template=dat_temp1, device=dev1)
dd2 = DeviceData(value=50, data_template=dat_temp2, device=dev1)
dd3 = DeviceData(value=200, data_template=dat_temp1, device=dev2)
dd4 = DeviceData(value=40, data_template=dat_temp2, device=dev2)
dd5 = DeviceData(value=30, data_template=dat_temp3, device=dev2)
session.add_all([dd1, dd2, dd3, dd4, dd5])
s = session.query(DataTemplate, DeviceData).outerjoin(DeviceData).filter(DeviceData.device==dev1)
for x in s:
print(x)
with this outerjoin I'm getting:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
and is equal to:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id"
WHERE "val"."device_id" = 1
but I want to get:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
(<DataTemplate(name=length)>, None)
and that query should be:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id" AND "val"."device_id" = 1
how do I write this specific query?

Nested fields with mashmallow_sqlalchemy and flask_marshmallow

I am trying to get a nested serialized output, but all I get is the 'many' side of the many-to-one's table's primary key.
Expected output:
[{'part_numbers':
{'part_number': '23103048', 'description': 'blue product'},
'machines': 2,
'percent_complete': 5.0,
'serial_number': '001',
'timestamp_created': 2020-03-12T15:4:23.135098},
{'part_numbers':
{'part_number': '44444009', 'description': 'red product'},
'machines': 1,
'percent_complete': 60.0,
'serial_number': '002',
'timestamp_created': '2020-03-12T15:44:23.135098'}]
Actual output:
[{'id': 3,
'machines': 2,
'percent_complete': 5.0,
'serial_number': '001',
'timestamp_created': 2020-03-12T15:4:23.135098},
{'id': 1,
'machines': 1,
'percent_complete': 60.0,
'serial_number': '0002',
'timestamp_created': '2020-03-12T15:44:23.135098'}]
These are my files. I have tried adding part_numbers = ma.Nested(PartNumbers) to the Machine schema inside models.py, but it didn't change the result!
my_app
|--- my_app
|--- \__init\__.py
|--- models.py
|--- views.py
models.py
from flask_sqlalchemy import SQLAlchemy
from flask_marshmallow import Marshmallow
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
db = SQLAlchemy()
ma = Marshmallow()
### Models ###
class Machine(db.Model):
id = db.Column(db.Integer, primary_key=True)
serial_number = db.Column(db.Text, unique=True)
percent_complete = db.Column(db.Float)
part_number_id = db.Column(db.Integer, db.ForeignKey('PartNumbers.id'))
timestamp_created = db.Column(db.DateTime, default=datetime.utcnow)
class PartNumbers(db.Model):
__tablename__ = 'PartNumbers'
id = db.Column(db.Integer, primary_key=True)
part_number = db.Column(db.Text, unique=True)
description = db.Column(db.Text)
machines = db.relationship('Machines', backref='machines', lazy='dynamic')
### Schemas ###
class PartNumbersSchema(SQLAlchemyAutoSchema):
class Meta:
model = PartNumbers
include_fk = True
class MachineSchema(SQLAlchemyAutoSchema):
class Meta:
model = Machines
include_relationships = True
__init__.py
from flask import Flask
from .models import db, ma
from .views import my_app
app = Flask(__name__)
db.init_app(app)
ma.init_app(app)
app.register_blueprint(my_app)
views.py
from flask import Blueprint
from .models import db, Machines, MachineSchema, PartNumbers
my_app = Blueprint("my_app", __name__)
machines_schema = MachineSchema(many=True)
#my_app.route('/')
def home()
machines = db.session.query(Machines).all()
return machines_schema.dump(machines)
Define part_number in the Machine class:
class Machine(db.Model):
id = db.Column(db.Integer, primary_key=True)
serial_number = db.Column(db.Text, unique=True)
percent_complete = db.Column(db.Float)
part_number_id = db.Column(db.Integer, db.ForeignKey('PartNumbers.id'))
timestamp_created = db.Column(db.DateTime, default=datetime.utcnow)
part_numbers = db.relationship('PartNumbers')
You have to define part_numbers in the MachineSchema explicitly:
from marshmallow_sqlalchemy import fields
class MachineSchema(SQLAlchemyAutoSchema):
class Meta:
model = Machines
part_numbers = fields.Nested(PartNumbersSchema)
I assumed part_numbers is a single object (1:1 relationship). Otherwise, you have to add many=True to part_numbers.
More details: https://marshmallow.readthedocs.io/en/stable/nesting.html
[edited to incorporate correction in comment]

Sqlalchemy: Propagation of updates across multiple (linked) relationships

I show here an (artificial) example of three linked tables: ParentA, ChildA, and ChildAA. ChildA is related to the primary key (PK) of ParentA via foreign key, and ChildAA relates to the same key in ChildA. In this way ChildAA links to the primary key of the ParentA. I would expect that when I make a change to the ParentA PK this change propagates back to the corresponding ChildAA's attribute, but it doesn't.
Thanks in advance!
(I apologize if this has been answered or documented before, I really couldn't find anything.)
The Code:
from sqlalchemy import *
from sqlalchemy import orm
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class ParentA(Base):
__tablename__ = 'tbl_parentA'
pid = Column(Integer, primary_key=True)
childA = orm.relationship("ChildA", passive_updates=False, backref='parentA')
class ChildA(Base):
__tablename__ = 'tbl_childA'
attrib1 = Column(String, nullable=True)
parentA_id = Column(Integer, ForeignKey(ParentA.pid), primary_key=True)
childAA = orm.relationship("ChildAA", passive_updates=False, backref="childA")
# This class is related to Parents through ChildA
class ChildAA(Base):
__tablename__ = 'tbl_childAA'
cid = Column(Integer, primary_key=True)
attrib1 = Column(String, nullable=True)
parentA_id = Column(Integer, ForeignKey(ChildA.parentA_id))
def clear_db(db):
tmp = db.echo
db.echo = False
metadata = MetaData(bind=db)
metadata.reflect(db)
for table in reversed(metadata.sorted_tables):
table.drop(db)
metadata.clear()
db.echo = tmp
if __name__ == '__main__':
# SQLite Connection
db = create_engine('sqlite:///linked_updates.db')
# db.echo = True
# Initalize Objects
pa1 = ParentA()
ca1 = ChildA(attrib1='ca1 str')
caa1= ChildAA(attrib1='caa1 str')
# Assign a parent to ChildA
ca1.parentA = pa1
# Assign a parent to ChildAA
caa1.childA = ca1
# Initialize clean DB & session
clear_db(db)
Base.metadata.create_all(db)
session = orm.create_session(db)
# Write to DB
session.add_all([pa1, ca1, caa1])
session.flush()
print 'After flush, we have: ', caa1.parentA_id, '==', caa1.childA.parentA_id
# Induce change, check propagation
pa1.pid = 2
session.flush()
print 'I expect: ', caa1.parentA_id, '==', caa1.childA.parentA_id
print 'END'