SqlAlchemy outerjoin query problem filtering [duplicate] - sql

This question already has answers here:
how left outer join in sqlalchemy?
(1 answer)
sqlalchemy filter children in query, but not parent
(2 answers)
Closed 2 years ago.
I'm using sqlalchemy, and I have problem with this specific query.
I have data_template, devices_data, and device. Each device have value for each data in data_template. Those values are stored in devices_data. I want to list data_template for one device with values that this device has. If there is no value for some data_template, show None.
It has something to do with outerjoin. Here is my model:
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class DataTemplate(Base):
__tablename__ = 'data_template'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<DataTemplate(name={self.name})>"
class Device(Base):
__tablename__ = 'device'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<Device(name={self.name})>"
class DeviceData(Base):
__tablename__ = 'device_data'
id = Column(Integer, primary_key=True)
value = Column(Integer, nullable=False)
data_name_id = Column(Integer, ForeignKey(DataTemplate.id), nullable=False)
device_id = Column(Integer, ForeignKey(Device.id), nullable=False)
data_template = relationship('DataTemplate', backref='device_data')
device = relationship('Device', backref='device_data')
def __repr__(self):
return f"<DeviceData(device={self.device.name}, data_template={self.data_template.name}, value={self.value})>"
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
Session.configure(bind=engine)
session = Session()
dev1 = Device(name='Receiver')
dev2 = Device(name='TV')
dat_temp1 = DataTemplate(name="height")
dat_temp2 = DataTemplate(name="width")
dat_temp3 = DataTemplate(name="length")
session.add_all([dev1, dev2, dat_temp1, dat_temp2, dat_temp3])
dd1 = DeviceData(value=100, data_template=dat_temp1, device=dev1)
dd2 = DeviceData(value=50, data_template=dat_temp2, device=dev1)
dd3 = DeviceData(value=200, data_template=dat_temp1, device=dev2)
dd4 = DeviceData(value=40, data_template=dat_temp2, device=dev2)
dd5 = DeviceData(value=30, data_template=dat_temp3, device=dev2)
session.add_all([dd1, dd2, dd3, dd4, dd5])
s = session.query(DataTemplate, DeviceData).outerjoin(DeviceData).filter(DeviceData.device==dev1)
for x in s:
print(x)
with this outerjoin I'm getting:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
and is equal to:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id"
WHERE "val"."device_id" = 1
but I want to get:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
(<DataTemplate(name=length)>, None)
and that query should be:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id" AND "val"."device_id" = 1
how do I write this specific query?

Related

Convert PostgreSQL COUNT … FILTER query to SQL Alchemy

I'm new to SQLAlchemy, and I would like to convert this PostgreSQL query:
SELECT product.*
, COUNT(feedback.like) FILTER (WHERE feedback.like = '1') AS like
, COUNT(feedback.like) FILTER (WHERE feedback.like = '-1') AS unlike
FROM feedback, product
WHERE product.id = feedback.product_id
GROUP BY product.id
ORDER BY product.id;
I have already tried this:
products = db.session.query(
Product,
func.count(Feedback.like > 0).label('like'),
func.count(Feedback.like < 0).label('unlike')
).filter(Product.guide_name_id==id)
.filter(Product.id == Feedback.product_id)
.group_by(Product.id)
.order_by(Product.id)
.all()
Thank you in advance for your help
Thanks to #IljaEverilä's comment, here is a more direct answer:
class Product(Base):
__tablename__ = "product"
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
def __repr__(self):
return f"<Product(name='{self.name}')>"
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
product_id = Column(Integer, ForeignKey(Product.id))
like = Column(Integer)
product = relationship(Product)
Base.metadata.create_all(engine)
with Session(engine) as session:
# set up test data
widget = Product(name="widget")
session.add_all(
[
widget,
Feedback(product=widget, like=1),
Feedback(product=widget, like=1),
Feedback(product=widget, like=-1),
Product(name="gadget"),
]
)
# run the query
query = (
select(
Product,
func.count(Feedback.like)
.filter(Feedback.like == 1)
.label("likes"),
func.count(Feedback.like)
.filter(Feedback.like == -1)
.label("dislikes"),
)
.select_from(Product)
.outerjoin(Feedback)
.group_by(Product)
)
results = session.execute(query).fetchall()
print(results)
# [(<Product(name='gadget')>, 0, 0), (<Product(name='widget')>, 2, 1)]
(Original answer)
I'm not sure if SQLAlchemy's postgresql dialect specifically handles COUNT … FILTER, but you can accomplish the same thing using SUM and CASE:
from sqlalchemy import __version__ as sa_version, case, Column, ForeignKey, func, Integer, String
from sqlalchemy.orm import Session
print(sa_version) # 1.4.0b2
class Product(Base):
__tablename__ = "product"
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
product_id = Column(Integer, ForeignKey(Product.id))
like = Column(Integer)
product = relationship(Product)
Base.metadata.create_all(engine)
with Session(engine, future=True) as session:
widget = Product(name="widget")
session.add_all(
[
widget,
Feedback(product=widget, like=1),
Feedback(product=widget, like=1),
Feedback(product=widget, like=-1),
Product(name="gadget"),
]
)
results = (
session.query(
Product.name,
func.sum(case((Feedback.like > 0, 1), else_=0)).label(
"likes"
),
func.sum(case((Feedback.like < 0, 1), else_=0)).label(
"dislikes"
),
)
.select_from(Product)
.outerjoin(Feedback)
.group_by(Product)
.all()
)
print(results) # [('widget', 2, 1), ('gadget', 0, 0)]

import and mapping csv to sqlalchemy dynamically

I am creating database using sqlalchemy in flask app and filling the database with existing CSV with selected columns from it so I use pandas here is my classes creation
I need to add company objects and commit them in dynamic way , but that way does not work , the csv file is not small about 20,000 record I can not add them manually ,so any suggestions to add them in dynamic way?
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine
from flask import jsonify
Base = declarative_base()
class Company(Base):
__tablename__ = 'forbesglobal2000_2016'
id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
profits = Column(String(250), nullable=False)
marketValue = Column(String(250), nullable=False)
revenue = Column(String(250), nullable=False)
industry = Column(String(250), nullable=False)
class SIC(Base):
__tablename__ = "SIC"
id = Column(Integer, primary_key=True)
SIC = Column(Integer, nullable=False)
Industry_name = Column(String(250),ForeignKey('forbesglobal2000_2016.industry'))
Indusrty = relationship(Company)
# configuration part
engine = create_engine('sqlite:///CompainesData.db')
Base.metadata.create_all(engine)
import sqlalchemy
from sqlalchemy.orm import sessionmaker
from database_setup import *
import pandas as pd
# opening connection with database
engine = create_engine('sqlite:///CompainesData.db')
Base.metadata.bind = engine
# Clear database
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
DBSession = sessionmaker(bind=engine)
session = DBSession()
df = pd.read_csv("forbesglobal2000-2016.csv")
df1 = pd.read_csv("SIC.csv")
# market valuation, revenue, profits and industry
profit_column = df.profits
name_column = df.name
industry_column = df.industry
revenue_column = df.revenue
marketvalue_column = df.marketValue
industry_column_f = df1.Description
SIC_column = df1.SICCode
company = []
i = 1
while i < name_column.__len__():
company[i] = Company(name = name_column[i] , industry=industry_column[i], marketValue = marketvalue_column[i] , profits = profit_column[i] ,
revenue = revenue_column[i] )
i = i +1
for i in company:
session.add(i)
session.commit()
# printing test
com = session.query(Company).all()
for f in com:
print(f.name)
print(f.industry)
print(f.profits)
print(f.revenue)
print(f.marketValue)
If you want to load data from csv files to database just use df.to_sql() function it allows you to do that. For example :
df.to_sql(con=engine, name=airlines.__tablename__, if_exists='replace',index=False)
Pay attention to index=False, it's used to ignore pandas id column.
I think the index will start at 0 and not 1:
i = 1
should be
i = 0
can you try that?

Join after outerjoin in SQLAlchemy

Suppose I have a one-to-many relationship, where the parents and children are grouped by some group_id.
Note: this example is a stripped down version of my code, which is actually a many-to-many relationship. There may be some errors unrelated to the question.
class Node(Base):
__tablename__ = 'node'
id = Column(GUID, default=uuid.uuid4, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID)
title = Column(Text, nullable=False)
class Leaf(Base):
__tablename__ = 'leaf'
id = Column(GUID, nullable=False, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID, nullable=False)
The group_id is used as a way to create new versions - so nodes and leaves with the same id can exist in multiple groups.
What I want to do is compare two groups, and find all the leaves whose parents have changed. I am trying to use an outer join to do the comparison, and then two joins to filter the parent nodes:
def find_changed_leaves(group_id_a, group_id_b, session):
NodeA = model.Node
NodeB = aliased(model.Node, name='node_b')
LeafA = model.Leaf
LeafB = aliased(model.Leaf, name='leaf_b')
query = (session.query(LeafA, LeafB)
.outerjoin(LeafB, LeafA.id == LeafB.id)
.join(NodeA, (LeafA.group_id == NodeA.group_id) &
(LeafA.parent_id == NodeA.id))
.join(NodeB, (LeafB.group_id == NodeB.group_id) &
(LeafB.parent_id == NodeB.id))
# Group membership
.filter(LeafA.group_id == group_id_a,
LeafB.group_id == group_id_b)
# Filter for modified parents
.filter(NodeA.title != NodeB.title)
)
return query.all()
This works, but it doesn't show leaves that are only in one of the groups (e.g. if a leaf was added to a node in the new group). How can I show all the leaves, returning None for a leaf that is missing from one of the groups?
Edit: I see there are perils mixing join with outer join. I tried naively changing it to .outerjoin(NodeA, ..., but it didn't help.
As mentioned in the comment, it is not entirely clear what needs to be achieved. Nonetheless, the code below should at least give you some directions.
First of all, I would not try to combine it all in one query (potentially using full joins and subqueries), but split it into 3 separate queries:
get LeafA, LeafB whose' parents have changed
get LaefA that do not have corresponding LeafB
get LaefB that do not have corresponding LeafA
Below is the code which should run as it is in both sqlite and postgresql. Note that I have added relationships and use them in the queries. But you could do the same with explicit join conditions as in your code snippet.
import uuid
from sqlalchemy import (
create_engine, Column, Integer, String, ForeignKey, Text, and_,
ForeignKeyConstraint, UniqueConstraint, exists
)
from sqlalchemy.orm import sessionmaker, relationship, eagerload, aliased
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.dialects.postgresql import UUID as GUID
_db_uri = 'sqlite:///:memory:'; GUID = String
# _db_uri = "postgresql://aaa:bbb#localhost/mytestdb"
engine = create_engine(_db_uri, echo=True)
Session = sessionmaker(bind=engine)
Base = declarative_base(engine)
newid = lambda: str(uuid.uuid4())
# define object model
class Node(Base):
__tablename__ = 'node'
id = Column(GUID, default=newid, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
# parent_id = Column(GUID)
title = Column(Text, nullable=False)
class Leaf(Base):
__tablename__ = 'leaf'
id = Column(GUID, nullable=False, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID, nullable=False)
title = Column(Text, nullable=False)
# define relationships - easier test data creation and querying
parent = relationship(
Node,
primaryjoin=and_(Node.id == parent_id, Node.group_id == group_id),
backref="children",
)
__table_args__ = (
ForeignKeyConstraint(
['parent_id', 'group_id'], ['node.id', 'node.group_id']
),
)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
session = Session()
g1, g2, l1, l2, l3 = [newid() for _ in range(5)]
# Create test data
def _add_test_data():
n11 = Node(
title="node1", group_id=g1,
children=[
Leaf(id=l1, title="g1 only"),
Leaf(id=l3, title="both groups"),
]
)
n21 = Node(
title="node1 changed", group_id=g2,
children=[
Leaf(id=l2, title="g2 only"),
Leaf(id=l3, title="both groups"),
]
)
session.add_all([n11, n21])
session.commit()
def find_changed_leaves(group_id_a, group_id_b):
"""
Leaves which are in both versions, but a `title` for their parents is changed.
"""
NodeA = aliased(Node, name='node_a')
NodeB = aliased(Node, name='node_b')
LeafA = aliased(Leaf, name='leaf_a')
LeafB = aliased(Leaf, name='leaf_b')
query = (
session.query(LeafA, LeafB)
.filter(LeafA.group_id == group_id_a)
# #note: group membership for LeafB is part of join now
.join(LeafB, (LeafA.id == LeafB.id) & (LeafB.group_id == group_id_b))
.join(NodeA, LeafA.parent)
.join(NodeB, LeafB.parent)
# Filter for modified parents
.filter(NodeA.title != NodeB.title)
)
return query.all()
def find_orphaned_leaves(group_id_a, group_id_b):
"""
Leaves found in group A, but not in group B.
"""
LeafA = aliased(Leaf, name='leaf_a')
LeafB = aliased(Leaf, name='leaf_b')
query = (
session.query(LeafA)
.filter(~(
session.query(LeafB)
.filter(LeafA.id == LeafB.id)
.filter(group_id_b == LeafB.group_id)
.exists()
))
# Group membership
.filter(LeafA.group_id == group_id_a)
)
return query.all()
def find_deleted_leaves(group_id_a, group_id_b):
a_s = find_orphaned_leaves(group_id_a, group_id_b)
return tuple((a, None) for a in a_s)
def find_added_leaves(group_id_a, group_id_b):
b_s = find_orphaned_leaves(group_id_b, group_id_a)
return tuple((None, b) for b in b_s)
# add test data
_add_test_data()
# check the results
changed = find_changed_leaves(g1, g2)
assert 1 == len(changed)
le, ri = changed[0]
assert le.id == ri.id == l3
added = find_added_leaves(g1, g2)
assert 1 == len(added)
le, ri = added[0]
assert le is None
assert ri.id == l2
deleted = find_deleted_leaves(g1, g2)
assert 1 == len(deleted)
le, ri = deleted[0]
assert le.id == l1
assert ri is None

SQLAlchemy many to many relationships confusion

Consider the following model(from pythoncentral tutorials):
class Department(Base):
__tablename__ = 'department'
id = Column(Integer, primary_key=True)
name = Column(String)
employees = relationship(
'Employee',
secondary='department_employee_link'
)
class Employee(Base):
__tablename__ = 'employee'
id = Column(Integer, primary_key=True)
name = Column(String)
hired_on = Column(DateTime, default=func.now())
departments = relationship(
Department,
secondary='department_employee_link'
)
class DepartmentEmployeeLink(Base):
__tablename__ = 'department_employee_link'
department_id = Column(Integer, ForeignKey('department.id'), primary_key=True)
employee_id = Column(Integer, ForeignKey('employee.id'), primary_key=True)
extra_data = Column(String(256))
department = relationship(Department, backref=backref("employee_assoc"))
employee = relationship(Employee, backref=backref("department_assoc"))
I understand that this piece of code establishes a many-many relationship between employees and depts.Suppose I have to insert the department_id and employee_id into the DepartmentEmployee link table, how do I do it?? The tutorial says:
>>> IT = Department(name="IT")
>>> John = Employee(name="John")
>>> John_working_part_time_at_IT = DepartmentEmployeeLink(department=IT, employee=John, extra_data='part-time')
>>> s = session()
>>> s.add(John_working_part_time_at_IT)
>>> s.commit()
But I want to do it separately. first, I want to add details into the department table, then the employee table. Finally, I need to populate the Dept-employee link where the user enters the extra_data column alone... How do I do this?
I tried doing something like this
def mapper:
que=DBSession.query(Department)
que2=DBSession.query(Strings)
rel=DepartmentEmployeeLink(extra_data=str(x))//__init__ed this
rel.department=que
rel.employee=que.employees[0].id
DBSession.add(rel)
This is how I want the insertion to happen since I already have data inside department and employee. Can anyone tell me how to accomplish this i.e., inserting into the link table if I have data in the other 2 tables?
I learnt there's a way to do this like "employees.append.xxx" but I dont understand.. Can someone point me in the right direction please? Thanks in advance.
Here is a better way to define Many to Many relationship in SQLAlchemy using association_table.
association_table = Table('department_employee_link', Base.metadata,
Column('departmant_id', Integer, ForeignKey('department.id')),
Column('employee_id', Integer, ForeignKey('employee.id'))
)
class Department(Base):
__tablename__ = 'department'
id = Column(Integer, primary_key=True)
name = Column(String)
employees = relationship(
'Employee',
secondary=association_table
)
class Employee(Base):
__tablename__ = 'employee'
id = Column(Integer, primary_key=True)
name = Column(String)
hired_on = Column(DateTime, default=func.now())
departments = relationship(
Department,
secondary=association_table
)
IT = Department(name="IT")
John = Employee(name="John")
IT.employees.append(John)
s = session()
s.add(IT)
s.commit()

Sqlalchemy: Propagation of updates across multiple (linked) relationships

I show here an (artificial) example of three linked tables: ParentA, ChildA, and ChildAA. ChildA is related to the primary key (PK) of ParentA via foreign key, and ChildAA relates to the same key in ChildA. In this way ChildAA links to the primary key of the ParentA. I would expect that when I make a change to the ParentA PK this change propagates back to the corresponding ChildAA's attribute, but it doesn't.
Thanks in advance!
(I apologize if this has been answered or documented before, I really couldn't find anything.)
The Code:
from sqlalchemy import *
from sqlalchemy import orm
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class ParentA(Base):
__tablename__ = 'tbl_parentA'
pid = Column(Integer, primary_key=True)
childA = orm.relationship("ChildA", passive_updates=False, backref='parentA')
class ChildA(Base):
__tablename__ = 'tbl_childA'
attrib1 = Column(String, nullable=True)
parentA_id = Column(Integer, ForeignKey(ParentA.pid), primary_key=True)
childAA = orm.relationship("ChildAA", passive_updates=False, backref="childA")
# This class is related to Parents through ChildA
class ChildAA(Base):
__tablename__ = 'tbl_childAA'
cid = Column(Integer, primary_key=True)
attrib1 = Column(String, nullable=True)
parentA_id = Column(Integer, ForeignKey(ChildA.parentA_id))
def clear_db(db):
tmp = db.echo
db.echo = False
metadata = MetaData(bind=db)
metadata.reflect(db)
for table in reversed(metadata.sorted_tables):
table.drop(db)
metadata.clear()
db.echo = tmp
if __name__ == '__main__':
# SQLite Connection
db = create_engine('sqlite:///linked_updates.db')
# db.echo = True
# Initalize Objects
pa1 = ParentA()
ca1 = ChildA(attrib1='ca1 str')
caa1= ChildAA(attrib1='caa1 str')
# Assign a parent to ChildA
ca1.parentA = pa1
# Assign a parent to ChildAA
caa1.childA = ca1
# Initialize clean DB & session
clear_db(db)
Base.metadata.create_all(db)
session = orm.create_session(db)
# Write to DB
session.add_all([pa1, ca1, caa1])
session.flush()
print 'After flush, we have: ', caa1.parentA_id, '==', caa1.childA.parentA_id
# Induce change, check propagation
pa1.pid = 2
session.flush()
print 'I expect: ', caa1.parentA_id, '==', caa1.childA.parentA_id
print 'END'