I'm working on FCM notification and I need to select all my push-tokens from user_notification_tokens table with filtering by other table datetime column, but there is no relation between these two tables, but they have relation with users table.
How can I do it with sqlalchemy?
simplified schema of sqlalchemy models relations:
I've tried to do it like this:
def get_tokens()
db: Session = next(db_service.get_session())
x_minutes_to_event = datetime.now(pytz.utc) + timedelta(minutes=config.MINUTES_TO_PRESCRIPTION)
tokens = [
item[0]
for item in db.query(models.UserNotificationsToken)\
.join(
models.User,
models.User.id == models.UserNotificationsToken.user_id,
)\
.join(
models.UserPrescription,
models.UserPrescription.user_id == models.User.id
)\
.filter(
models.UserPrescription.visiting_at <= x_minutes_to_event,
models.UserPrescription.visiting_at > datetime.now(pytz.utc),
)\
.values(column('token'))
]
# or
tokens = [
item[0]
for item in db.query(models.UserNotificationsToken)\
.join(
models.UserPrescription,
models.UserNotificationsToken.user_id == models.UserPrescription.user_id,
)\
.filter(
models.UserPrescription.visiting_at <= x_minutes_to_event,
models.UserPrescription.visiting_at > datetime.now(pytz.utc),
)\
.values(column('token'))
]
but I got this errors:
sqlalchemy.exc.InvalidRequestError: Don't know how to join to <Mapper at 0x7f0921681fd0; User>. Please use the .select_from() method to establish an explicit left side, as well as providing an explicit ON clause if not present already to help resolve the ambiguity.
# or
sqlalchemy.exc.InvalidRequestError: Don't know how to join to <Mapper at 0x7f54970e4970; UserPrescription>. Please use the .select_from() method to establish an explicit left side, as well as providing an explicit ON clause if not present already to help resolve the ambiguity.
class UserNotificationToken(Base):
__tablename__ = 'user_notification_tokens'
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey('users.id'), nullable=False)
token = Column(String)
user = relationship('User', back_populates='tokens')
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
tokens = relationship(UserNotificationToken, back_populates='user')
prescriptions = relationship('UserPrescription', back_populates='user')
class UserPrescription(Base):
__tablename__ = 'user_prescriptions'
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey('users.id'), nullable=False)
visiting_at = Column(DateTime)
user = relationship(User, back_populates='prescriptions')
metadata.create_all(engine)
class Config:
MINUTES_TO_PRESCRIPTION = 60
with Session(engine) as session, session.begin():
now = datetime.now(timezone.utc)
x_minutes_to_event = now + timedelta(minutes=60)
# Example 1 with explicit joins.
query = session.query(
UserNotificationToken.token
).join(
User, UserNotificationToken.user_id == User.id
).join(
UserPrescription, User.id == UserPrescription.user_id
).where(
and_(
UserPrescription.visiting_at <= x_minutes_to_event,
UserPrescription.visiting_at > now))
for (token,) in query.all():
print (token)
# Example 2 with joins inferred from relationships.
query = session.query(
UserNotificationToken.token
).join(
UserNotificationToken.user
).join(
User.prescriptions
).where(
and_(
UserPrescription.visiting_at <= x_minutes_to_event,
UserPrescription.visiting_at > now))
for (token,) in query.all():
print (token)
# Example 3 join directly between tables with user_id FKs, use distinct
query = session.query(
UserNotificationToken.token
).join(
UserPrescription, UserNotificationToken.user_id == UserPrescription.user_id
).where(
and_(
UserPrescription.visiting_at <= x_minutes_to_event,
UserPrescription.visiting_at > now)).distinct()
for (token,) in query.all():
print (token)
Related
I'm trying to use an SQLite database via SQLAlchemy 1.4 ORM to filter data based on relationships.
Data
My example data consists of groups and members in a simple many-to-many schema:
[...]
member_f = Member(name="F")
group_1 = Group(name="1", members=[member_a, member_b, member_c]) <-- find this group via member names
group_2 = Group(name="2", members=[member_a, member_b, member_f])
group_3 = Group(name="3", members=[member_a, member_c, member_d])
group_4 = Group(name="4", members=[member_d, member_e, member_f])
[...]
Full running example code (schema, objects, queries):
from sqlalchemy import Table, Column, Integer, String, ForeignKey
from sqlalchemy import create_engine, select, func, text, and_
from sqlalchemy.orm import relationship, declarative_base, sessionmaker
engine = create_engine("sqlite+pysqlite:///:memory:", future=True, echo=False)
Session = sessionmaker(bind=engine, future=True)
# Schema
Base = declarative_base()
groups_members = Table("groups_members", Base.metadata,
Column("group_id", ForeignKey("groups.id")),
Column("member_name", ForeignKey("members.name")),
)
class Group(Base):
__tablename__ = "groups"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
members = relationship("Member", secondary=groups_members, backref="groups", lazy="subquery")
def __repr__(self):
return f"<Group: {self.name}>"
class Member(Base):
__tablename__ = "members"
name = Column(String, primary_key=True)
def __repr__(self):
return f"<Member: {self.name}>"
Base.metadata.create_all(engine)
# Objects
member_a = Member(name="A")
member_b = Member(name="B")
member_c = Member(name="C")
member_d = Member(name="D")
member_e = Member(name="E")
member_f = Member(name="F")
group_1 = Group(name="1", members=[member_a, member_b, member_c])
group_2 = Group(name="2", members=[member_a, member_b, member_f])
group_3 = Group(name="3", members=[member_a, member_c, member_d])
group_4 = Group(name="4", members=[member_d, member_e, member_f])
print(f"{member_a}: {member_a.groups}") # OK
with Session() as session:
session.add(group_1)
session.add(group_2)
session.add(group_3)
session.add(group_4)
session.commit()
print(session.query(Group).all()) # OK
# Query users example
def get_members_in_more_than_2_groups():
with Session() as session:
return session.execute(
select(Member, func.count(groups_members.columns.group_id).label('group_members_count'))
.join(groups_members)
.group_by(Member.name)
.having(text('group_members_count > 2'))
).all()
for m in get_members_in_more_than_2_groups():
print(m) # OK
# Query groups problem: associated with A and B but not with E or F
def get_groups_by_member_names(member_names_included, member_names_excluded):
with Session() as session:
included = session.execute(select(Member).where(Member.name.in_(member_names_included))).all()
excluded = session.execute(select(Member).where(Member.name.in_(member_names_excluded))).all()
return session.execute(
select(Group)
.join(Group.members)
.where(
and_(
Group.members.contains(included),
~Group.members.contains(excluded),
)
)
.group_by(Group.id)
).scalars().all()
for g in get_groups_by_member_names(member_names_included=["A", "B"], member_names_excluded=["E", "F"]):
print(g) # Expected output: <Group: 1>
Goal
Now I'm trying to find all groups that
have both members with the names A and B (that's groups 1 and 2)
and don't have any member named E or F (removing group 2)
resulting in just group 1.
Problem
The relevant (and failing) function in the example code is get_groups_by_member_names and with my lack of database knowledge, I'm quite stuck.
Most existing questions that I could find on SO only need to filter by one relationship value. But I need them to consider the lists of included and excluded member names.
I have tried to get the members as SQLAlchemy objects first and inserting those into the query but without any luck. I may have done that completely wrong, though.
I also tried joining the tables, filtering with the names list and counting the grouped results... It's hard for me to tell whether I'm on the right track or not at all.
Running over all groups in Python and applying the filtering there would be my fallback workaround. But with many items, the database can probably handle it more efficiently.
Any help greatly appreciated, I am happy with anything that works. I could probably also work my way up from a functioning SQL statement.
Thanks for your time!
Edit 1:
I found this answer https://stackoverflow.com/a/21104689/5123171 and while it works on small data sets, it's terribly slow on larger ones (about 60 seconds for 500 members and 10k groups):
def get_group_by_members(member_names_included, member_names_excluded):
with Session() as session:
return session.query(Group).join(groups_members).filter(
groups_members.columns.member_name.in_(member_names_included)).group_by(Group.id).having(func.count(groups_members.columns.member_name) == len(member_names_included),
).filter(
~Group.members.any(Member.name.in_(member_names_excluded)),
).all()
Ok, here's what I ended up with via trial & error in an SQL editor. This is faster than the previous attempt (50 milliseconds on the same data).
The comments in the code correspond to the following steps:
Find groups containing excluded members
Filter those out of the assignment table
Filter remaining assignment table by included members
Group by group IDs
Return all remaining groups matching the number of included members
SQL
SELECT * FROM groups
WHERE id IN (
SELECT group_id FROM groups_members
WHERE member_name IN ("A", "B") // 3.
AND group_id NOT IN ( // 2
SELECT group_id FROM groups_members
WHERE member_name IN ("E", "F") // 1.
)
GROUP BY group_id // 4.
HAVING count(member_name) == 2 // 5.
)
SQLAlchemy
session.query(Group)
.where(Group.id.in_(
session.query(groups_members.c.group_id)
.where(
groups_members.c.member_name.in_(member_names_included), # 3.
groups_members.c.group_id.not_in( # 2.
session.query(groups_members.c.group_id).where(
groups_members.c.member_name.in_(member_names_excluded) # 1.
)
),
)
.group_by(groups_members.c.group_id) # 4.
.having(
func.count(groups_members.c.member_name)
== len(member_names_included) # 5.
))
)
.all()
And the full running example in one piece:
from sqlalchemy import Table, Column, Integer, String, ForeignKey
from sqlalchemy import create_engine, select, func, text, and_, not_
from sqlalchemy.orm import relationship, declarative_base, sessionmaker
engine = create_engine("sqlite+pysqlite:///:memory:", future=True, echo=False)
Session = sessionmaker(bind=engine, future=True)
# Schema
Base = declarative_base()
groups_members = Table(
"groups_members",
Base.metadata,
Column("group_id", ForeignKey("groups.id")),
Column("member_name", ForeignKey("members.name")),
)
class Group(Base):
__tablename__ = "groups"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
members = relationship(
"Member", secondary=groups_members, backref="groups", lazy="subquery"
)
def __repr__(self):
return f"<Group {self.id}: {self.name}>"
class Member(Base):
__tablename__ = "members"
name = Column(String, primary_key=True)
def __repr__(self):
return f"<Member: {self.name}>"
Base.metadata.create_all(engine)
# Objects
member_a = Member(name="A")
member_b = Member(name="B")
member_c = Member(name="C")
member_d = Member(name="D")
member_e = Member(name="E")
member_f = Member(name="F")
group_1 = Group(name="1", members=[member_a, member_b, member_c])
group_2 = Group(name="2", members=[member_a, member_b, member_f])
group_3 = Group(name="3", members=[member_a, member_c, member_d])
group_4 = Group(name="4", members=[member_d, member_e, member_f])
with Session() as session:
session.add(group_1)
session.add(group_2)
session.add(group_3)
session.add(group_4)
session.commit()
# Query
member_names_included = ["A", "B"]
member_names_excluded = ["E", "F"]
# Raw SQL variant
with Session() as session:
result = session.execute(
f"""
SELECT * FROM groups
WHERE id IN (
SELECT group_id FROM groups_members
WHERE member_name IN ("A", "B")
AND group_id NOT IN (
SELECT group_id FROM groups_members
WHERE member_name IN ("E", "F")
)
GROUP BY group_id
HAVING count(member_name) == 2
)
"""
).all()
groups = [Group(**r) for r in result]
for r in groups:
print(f'SQL {r}')
# ORM Variant
with Session() as session:
result = (
session.query(Group)
.where(
Group.id.in_(
session.query(groups_members.c.group_id)
.where(
# Matching any included members
groups_members.c.member_name.in_(member_names_included),
# Removing any groups containing excluded members
groups_members.c.group_id.not_in(
session.query(groups_members.c.group_id).where(
groups_members.c.member_name.in_(member_names_excluded)
)
),
)
# This is to make sure that all included members exist in a group, not just a few
.group_by(groups_members.c.group_id)
.having(
func.count(groups_members.c.member_name)
== len(member_names_included)
)
)
)
.all()
)
for r in result:
print(f'ORM {r}')
I hope this is helpful to anyone and if you have suggestions for improvement, please let me know.
I'm new to SQLAlchemy, and I would like to convert this PostgreSQL query:
SELECT product.*
, COUNT(feedback.like) FILTER (WHERE feedback.like = '1') AS like
, COUNT(feedback.like) FILTER (WHERE feedback.like = '-1') AS unlike
FROM feedback, product
WHERE product.id = feedback.product_id
GROUP BY product.id
ORDER BY product.id;
I have already tried this:
products = db.session.query(
Product,
func.count(Feedback.like > 0).label('like'),
func.count(Feedback.like < 0).label('unlike')
).filter(Product.guide_name_id==id)
.filter(Product.id == Feedback.product_id)
.group_by(Product.id)
.order_by(Product.id)
.all()
Thank you in advance for your help
Thanks to #IljaEverilä's comment, here is a more direct answer:
class Product(Base):
__tablename__ = "product"
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
def __repr__(self):
return f"<Product(name='{self.name}')>"
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
product_id = Column(Integer, ForeignKey(Product.id))
like = Column(Integer)
product = relationship(Product)
Base.metadata.create_all(engine)
with Session(engine) as session:
# set up test data
widget = Product(name="widget")
session.add_all(
[
widget,
Feedback(product=widget, like=1),
Feedback(product=widget, like=1),
Feedback(product=widget, like=-1),
Product(name="gadget"),
]
)
# run the query
query = (
select(
Product,
func.count(Feedback.like)
.filter(Feedback.like == 1)
.label("likes"),
func.count(Feedback.like)
.filter(Feedback.like == -1)
.label("dislikes"),
)
.select_from(Product)
.outerjoin(Feedback)
.group_by(Product)
)
results = session.execute(query).fetchall()
print(results)
# [(<Product(name='gadget')>, 0, 0), (<Product(name='widget')>, 2, 1)]
(Original answer)
I'm not sure if SQLAlchemy's postgresql dialect specifically handles COUNT … FILTER, but you can accomplish the same thing using SUM and CASE:
from sqlalchemy import __version__ as sa_version, case, Column, ForeignKey, func, Integer, String
from sqlalchemy.orm import Session
print(sa_version) # 1.4.0b2
class Product(Base):
__tablename__ = "product"
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
product_id = Column(Integer, ForeignKey(Product.id))
like = Column(Integer)
product = relationship(Product)
Base.metadata.create_all(engine)
with Session(engine, future=True) as session:
widget = Product(name="widget")
session.add_all(
[
widget,
Feedback(product=widget, like=1),
Feedback(product=widget, like=1),
Feedback(product=widget, like=-1),
Product(name="gadget"),
]
)
results = (
session.query(
Product.name,
func.sum(case((Feedback.like > 0, 1), else_=0)).label(
"likes"
),
func.sum(case((Feedback.like < 0, 1), else_=0)).label(
"dislikes"
),
)
.select_from(Product)
.outerjoin(Feedback)
.group_by(Product)
.all()
)
print(results) # [('widget', 2, 1), ('gadget', 0, 0)]
This question already has answers here:
how left outer join in sqlalchemy?
(1 answer)
sqlalchemy filter children in query, but not parent
(2 answers)
Closed 2 years ago.
I'm using sqlalchemy, and I have problem with this specific query.
I have data_template, devices_data, and device. Each device have value for each data in data_template. Those values are stored in devices_data. I want to list data_template for one device with values that this device has. If there is no value for some data_template, show None.
It has something to do with outerjoin. Here is my model:
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class DataTemplate(Base):
__tablename__ = 'data_template'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<DataTemplate(name={self.name})>"
class Device(Base):
__tablename__ = 'device'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<Device(name={self.name})>"
class DeviceData(Base):
__tablename__ = 'device_data'
id = Column(Integer, primary_key=True)
value = Column(Integer, nullable=False)
data_name_id = Column(Integer, ForeignKey(DataTemplate.id), nullable=False)
device_id = Column(Integer, ForeignKey(Device.id), nullable=False)
data_template = relationship('DataTemplate', backref='device_data')
device = relationship('Device', backref='device_data')
def __repr__(self):
return f"<DeviceData(device={self.device.name}, data_template={self.data_template.name}, value={self.value})>"
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
Session.configure(bind=engine)
session = Session()
dev1 = Device(name='Receiver')
dev2 = Device(name='TV')
dat_temp1 = DataTemplate(name="height")
dat_temp2 = DataTemplate(name="width")
dat_temp3 = DataTemplate(name="length")
session.add_all([dev1, dev2, dat_temp1, dat_temp2, dat_temp3])
dd1 = DeviceData(value=100, data_template=dat_temp1, device=dev1)
dd2 = DeviceData(value=50, data_template=dat_temp2, device=dev1)
dd3 = DeviceData(value=200, data_template=dat_temp1, device=dev2)
dd4 = DeviceData(value=40, data_template=dat_temp2, device=dev2)
dd5 = DeviceData(value=30, data_template=dat_temp3, device=dev2)
session.add_all([dd1, dd2, dd3, dd4, dd5])
s = session.query(DataTemplate, DeviceData).outerjoin(DeviceData).filter(DeviceData.device==dev1)
for x in s:
print(x)
with this outerjoin I'm getting:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
and is equal to:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id"
WHERE "val"."device_id" = 1
but I want to get:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
(<DataTemplate(name=length)>, None)
and that query should be:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id" AND "val"."device_id" = 1
how do I write this specific query?
I'm currently struggling creating web tables using bootstrap datatables, sqlalchemy and sqlalchemy-datatables.
Sqlalchemy seems generating correct sql query, datatables is populated with correct information.
However when I'm trying to search for record in datatable search field I'm getting an error:
DataTables warning: table id=main_table - Neither 'InstrumentedAttribute' object nor 'Comparator' object associated with VrfMain.scope has an attribute 'cast'
I tried to remove any relationships from query, and it works.
So problem is with relationship somewhere. Can anyone help me please ?
Here is my sql models:
class VrfMain(db.Model):
__tablename__ = 'vrf_main'
id = Column(Integer, primary_key=True, autoincrement=True)
vrf_name = Column(String, unique=True)
rd = Column(String, unique=True)
primary_rt = Column(String, unique=True)
additional_rt = Column(String, unique=True)
description = Column(String)
scope_id = Column(Integer, ForeignKey('subnet_scopes.id'))
scope = relationship('SubnetScopes')
def __init__(self, vrf_name, rd, primary_rt, description, scope_id):
self.vrf_name = vrf_name
self.rd = rd
self.primary_rt = primary_rt
self.description = description
self.scope_id = scope_id
class SubnetScopes(db.Model):
__tablename__ = 'subnet_scopes'
id = Column(Integer, primary_key=True, autoincrement=True)
scope_name = Column(String, unique=True)
def __init__(self, scope_name):
self.scope_name = scope_name
def __repr__(self):
return str(self.scope_name)
Here is part of flask code:
# defining datatable columns
columns = [
ColumnDT(VrfMain.id),
ColumnDT(VrfMain.vrf_name),
ColumnDT(VrfMain.rd),
ColumnDT(VrfMain.primary_rt),
ColumnDT(VrfMain.additional_rt),
ColumnDT(VrfMain.description),
ColumnDT(VrfMain.scope)
]
query = VrfMain.query.\
join(SubnetScopes).\
filter(VrfMain.scope_id == SubnetScopes.id).\
with_entities(VrfMain.id, VrfMain.vrf_name, VrfMain.rd, VrfMain.primary_rt, VrfMain.additional_rt, VrfMain.description, SubnetScopes.scope_name)
print(query)
params = request.args.to_dict()
rowTable = DataTables(params, query, columns)
return jsonify(rowTable.output_result())
Here is sql query that is generated
SELECT vrf_main.id AS vrf_main_id, vrf_main.vrf_name AS vrf_main_vrf_name, vrf_main.rd AS vrf_main_rd, vrf_main.primary_rt AS vrf_main_primary_rt, vrf_main.additional_rt AS vrf_main_additional_rt, vrf_main.description AS vrf_main_description, subnet_scopes.scope_name AS subnet_scopes_scope_name
FROM vrf_main INNER JOIN subnet_scopes ON subnet_scopes.id = vrf_main.scope_id
WHERE vrf_main.scope_id = subnet_scopes.id
Here is javascript code:
$(document).ready(function() {
var table = $('#main_table').DataTable({
"processing": true,
"serverSide": true,
"ajax": {
"url": "{{ url_for('home_blueprint.get_vrf_data') }}"
},
"lengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]],
I decide not to use sqlalchemy-datatables. instead I'm will use code:
all_vrf_data = []
for row in VrfMain.query.all():
row_proccessed = dict(row.__dict__); row_proccessed.pop('_sa_instance_state', None)
all_vrf_data.append(row_proccessed)
return_data = {"data": all_vrf_data}
return json.dumps(return_data, indent=4, sort_keys=True, default=str)
Suppose I have a one-to-many relationship, where the parents and children are grouped by some group_id.
Note: this example is a stripped down version of my code, which is actually a many-to-many relationship. There may be some errors unrelated to the question.
class Node(Base):
__tablename__ = 'node'
id = Column(GUID, default=uuid.uuid4, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID)
title = Column(Text, nullable=False)
class Leaf(Base):
__tablename__ = 'leaf'
id = Column(GUID, nullable=False, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID, nullable=False)
The group_id is used as a way to create new versions - so nodes and leaves with the same id can exist in multiple groups.
What I want to do is compare two groups, and find all the leaves whose parents have changed. I am trying to use an outer join to do the comparison, and then two joins to filter the parent nodes:
def find_changed_leaves(group_id_a, group_id_b, session):
NodeA = model.Node
NodeB = aliased(model.Node, name='node_b')
LeafA = model.Leaf
LeafB = aliased(model.Leaf, name='leaf_b')
query = (session.query(LeafA, LeafB)
.outerjoin(LeafB, LeafA.id == LeafB.id)
.join(NodeA, (LeafA.group_id == NodeA.group_id) &
(LeafA.parent_id == NodeA.id))
.join(NodeB, (LeafB.group_id == NodeB.group_id) &
(LeafB.parent_id == NodeB.id))
# Group membership
.filter(LeafA.group_id == group_id_a,
LeafB.group_id == group_id_b)
# Filter for modified parents
.filter(NodeA.title != NodeB.title)
)
return query.all()
This works, but it doesn't show leaves that are only in one of the groups (e.g. if a leaf was added to a node in the new group). How can I show all the leaves, returning None for a leaf that is missing from one of the groups?
Edit: I see there are perils mixing join with outer join. I tried naively changing it to .outerjoin(NodeA, ..., but it didn't help.
As mentioned in the comment, it is not entirely clear what needs to be achieved. Nonetheless, the code below should at least give you some directions.
First of all, I would not try to combine it all in one query (potentially using full joins and subqueries), but split it into 3 separate queries:
get LeafA, LeafB whose' parents have changed
get LaefA that do not have corresponding LeafB
get LaefB that do not have corresponding LeafA
Below is the code which should run as it is in both sqlite and postgresql. Note that I have added relationships and use them in the queries. But you could do the same with explicit join conditions as in your code snippet.
import uuid
from sqlalchemy import (
create_engine, Column, Integer, String, ForeignKey, Text, and_,
ForeignKeyConstraint, UniqueConstraint, exists
)
from sqlalchemy.orm import sessionmaker, relationship, eagerload, aliased
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.dialects.postgresql import UUID as GUID
_db_uri = 'sqlite:///:memory:'; GUID = String
# _db_uri = "postgresql://aaa:bbb#localhost/mytestdb"
engine = create_engine(_db_uri, echo=True)
Session = sessionmaker(bind=engine)
Base = declarative_base(engine)
newid = lambda: str(uuid.uuid4())
# define object model
class Node(Base):
__tablename__ = 'node'
id = Column(GUID, default=newid, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
# parent_id = Column(GUID)
title = Column(Text, nullable=False)
class Leaf(Base):
__tablename__ = 'leaf'
id = Column(GUID, nullable=False, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID, nullable=False)
title = Column(Text, nullable=False)
# define relationships - easier test data creation and querying
parent = relationship(
Node,
primaryjoin=and_(Node.id == parent_id, Node.group_id == group_id),
backref="children",
)
__table_args__ = (
ForeignKeyConstraint(
['parent_id', 'group_id'], ['node.id', 'node.group_id']
),
)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
session = Session()
g1, g2, l1, l2, l3 = [newid() for _ in range(5)]
# Create test data
def _add_test_data():
n11 = Node(
title="node1", group_id=g1,
children=[
Leaf(id=l1, title="g1 only"),
Leaf(id=l3, title="both groups"),
]
)
n21 = Node(
title="node1 changed", group_id=g2,
children=[
Leaf(id=l2, title="g2 only"),
Leaf(id=l3, title="both groups"),
]
)
session.add_all([n11, n21])
session.commit()
def find_changed_leaves(group_id_a, group_id_b):
"""
Leaves which are in both versions, but a `title` for their parents is changed.
"""
NodeA = aliased(Node, name='node_a')
NodeB = aliased(Node, name='node_b')
LeafA = aliased(Leaf, name='leaf_a')
LeafB = aliased(Leaf, name='leaf_b')
query = (
session.query(LeafA, LeafB)
.filter(LeafA.group_id == group_id_a)
# #note: group membership for LeafB is part of join now
.join(LeafB, (LeafA.id == LeafB.id) & (LeafB.group_id == group_id_b))
.join(NodeA, LeafA.parent)
.join(NodeB, LeafB.parent)
# Filter for modified parents
.filter(NodeA.title != NodeB.title)
)
return query.all()
def find_orphaned_leaves(group_id_a, group_id_b):
"""
Leaves found in group A, but not in group B.
"""
LeafA = aliased(Leaf, name='leaf_a')
LeafB = aliased(Leaf, name='leaf_b')
query = (
session.query(LeafA)
.filter(~(
session.query(LeafB)
.filter(LeafA.id == LeafB.id)
.filter(group_id_b == LeafB.group_id)
.exists()
))
# Group membership
.filter(LeafA.group_id == group_id_a)
)
return query.all()
def find_deleted_leaves(group_id_a, group_id_b):
a_s = find_orphaned_leaves(group_id_a, group_id_b)
return tuple((a, None) for a in a_s)
def find_added_leaves(group_id_a, group_id_b):
b_s = find_orphaned_leaves(group_id_b, group_id_a)
return tuple((None, b) for b in b_s)
# add test data
_add_test_data()
# check the results
changed = find_changed_leaves(g1, g2)
assert 1 == len(changed)
le, ri = changed[0]
assert le.id == ri.id == l3
added = find_added_leaves(g1, g2)
assert 1 == len(added)
le, ri = added[0]
assert le is None
assert ri.id == l2
deleted = find_deleted_leaves(g1, g2)
assert 1 == len(deleted)
le, ri = deleted[0]
assert le.id == l1
assert ri is None