Query that fetch messages for a specific user - sql

I have a messages table like the following, how to fetch messages for a specific user, that is, fetch one most recent message with one user.For example, user1 has messages with user2 and user3,how to fetch one most recent message between user1 and user2, one most recent message between user1 and user3?
class Message(Base):
__tablename__ = 'messages'
id = Column(Integer(), primary_key=True)
sender_id = Column(Integer(), ForeignKey('users.id'))
body = Column(Text())
recipient_id = Column(Integer())
created_at = Column(DateTime())
updated_at = Column(DateTime())
The SQL I could figure out is following, but it is not enough.Could you help me? Thanks a lot,:).
session.query(Message).\
filter(or_(Message.user_id==user.id, Message.recipient_id==user.id))

Try
from sqlalchemy import desc
session.query(Message).\
filter(or_(Message.user_id==user.id, Message.recipient_id==user.id)).\
order_by(desc(Message.created_at)).\
first()

Code below should give you an example using created_at as the decision column for the latest. You can easily change it to be updated_at.
Note that for two users (A and B) it will return both latest messages: latest from A->B and latest B->A.
from datetime import date
from sqlalchemy import create_engine, Column, Text, Integer, DateTime, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy.sql import func, and_
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///:memory:', echo=True)
session = sessionmaker(bind=engine)()
Base = declarative_base(engine)
class User(Base):
__tablename__ = 'users'
id = Column(Integer(), primary_key=True)
name = Column(Text())
class Message(Base):
__tablename__ = 'messages'
id = Column(Integer(), primary_key=True)
sender_id = Column(Integer(), ForeignKey('users.id'))
body = Column(Text())
recipient_id = Column(Integer(), ForeignKey('users.id'))
created_at = Column(DateTime())
updated_at = Column(DateTime())
sender = relationship(User, backref="messages_sent", foreign_keys=sender_id)
recipient = relationship(User, backref="messages_recv", foreign_keys=recipient_id)
Base.metadata.create_all()
users = u1, u2, u3 = [
User(name='user1'),
User(name='user2'),
User(name='user3'),
]
session.add_all(users)
session.add_all([
Message(sender=u1, recipient=u2, body="bod12-a", created_at = date(2014, 1, 13)),
Message(sender=u1, recipient=u2, body="bod12-X", created_at = date(2014, 1, 3)),
Message(sender=u2, recipient=u3, body="bod23-X", created_at = date(2014, 1, 13)),
Message(sender=u2, recipient=u3, body="bod23-a", created_at = date(2013, 1, 13)),
])
session.flush()
subq = (session
.query(Message.sender_id, Message.recipient_id,
func.max(Message.created_at).label("max_created_at"))
.group_by(Message.sender_id, Message.recipient_id)
).subquery("subq")
q = (session
.query(Message)
.join(subq, and_(
Message.sender_id == subq.c.sender_id,
Message.recipient_id == subq.c.recipient_id,
Message.created_at == subq.c.max_created_at)
)
).all()
for x in q:
print(x.sender_id, x.recipient_id, x.body)

Related

SQLAlchemy ORM: Filter by multiple include/exclude matches on many-to-many relationship

I'm trying to use an SQLite database via SQLAlchemy 1.4 ORM to filter data based on relationships.
Data
My example data consists of groups and members in a simple many-to-many schema:
[...]
member_f = Member(name="F")
group_1 = Group(name="1", members=[member_a, member_b, member_c]) <-- find this group via member names
group_2 = Group(name="2", members=[member_a, member_b, member_f])
group_3 = Group(name="3", members=[member_a, member_c, member_d])
group_4 = Group(name="4", members=[member_d, member_e, member_f])
[...]
Full running example code (schema, objects, queries):
from sqlalchemy import Table, Column, Integer, String, ForeignKey
from sqlalchemy import create_engine, select, func, text, and_
from sqlalchemy.orm import relationship, declarative_base, sessionmaker
engine = create_engine("sqlite+pysqlite:///:memory:", future=True, echo=False)
Session = sessionmaker(bind=engine, future=True)
# Schema
Base = declarative_base()
groups_members = Table("groups_members", Base.metadata,
Column("group_id", ForeignKey("groups.id")),
Column("member_name", ForeignKey("members.name")),
)
class Group(Base):
__tablename__ = "groups"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
members = relationship("Member", secondary=groups_members, backref="groups", lazy="subquery")
def __repr__(self):
return f"<Group: {self.name}>"
class Member(Base):
__tablename__ = "members"
name = Column(String, primary_key=True)
def __repr__(self):
return f"<Member: {self.name}>"
Base.metadata.create_all(engine)
# Objects
member_a = Member(name="A")
member_b = Member(name="B")
member_c = Member(name="C")
member_d = Member(name="D")
member_e = Member(name="E")
member_f = Member(name="F")
group_1 = Group(name="1", members=[member_a, member_b, member_c])
group_2 = Group(name="2", members=[member_a, member_b, member_f])
group_3 = Group(name="3", members=[member_a, member_c, member_d])
group_4 = Group(name="4", members=[member_d, member_e, member_f])
print(f"{member_a}: {member_a.groups}") # OK
with Session() as session:
session.add(group_1)
session.add(group_2)
session.add(group_3)
session.add(group_4)
session.commit()
print(session.query(Group).all()) # OK
# Query users example
def get_members_in_more_than_2_groups():
with Session() as session:
return session.execute(
select(Member, func.count(groups_members.columns.group_id).label('group_members_count'))
.join(groups_members)
.group_by(Member.name)
.having(text('group_members_count > 2'))
).all()
for m in get_members_in_more_than_2_groups():
print(m) # OK
# Query groups problem: associated with A and B but not with E or F
def get_groups_by_member_names(member_names_included, member_names_excluded):
with Session() as session:
included = session.execute(select(Member).where(Member.name.in_(member_names_included))).all()
excluded = session.execute(select(Member).where(Member.name.in_(member_names_excluded))).all()
return session.execute(
select(Group)
.join(Group.members)
.where(
and_(
Group.members.contains(included),
~Group.members.contains(excluded),
)
)
.group_by(Group.id)
).scalars().all()
for g in get_groups_by_member_names(member_names_included=["A", "B"], member_names_excluded=["E", "F"]):
print(g) # Expected output: <Group: 1>
Goal
Now I'm trying to find all groups that
have both members with the names A and B (that's groups 1 and 2)
and don't have any member named E or F (removing group 2)
resulting in just group 1.
Problem
The relevant (and failing) function in the example code is get_groups_by_member_names and with my lack of database knowledge, I'm quite stuck.
Most existing questions that I could find on SO only need to filter by one relationship value. But I need them to consider the lists of included and excluded member names.
I have tried to get the members as SQLAlchemy objects first and inserting those into the query but without any luck. I may have done that completely wrong, though.
I also tried joining the tables, filtering with the names list and counting the grouped results... It's hard for me to tell whether I'm on the right track or not at all.
Running over all groups in Python and applying the filtering there would be my fallback workaround. But with many items, the database can probably handle it more efficiently.
Any help greatly appreciated, I am happy with anything that works. I could probably also work my way up from a functioning SQL statement.
Thanks for your time!
Edit 1:
I found this answer https://stackoverflow.com/a/21104689/5123171 and while it works on small data sets, it's terribly slow on larger ones (about 60 seconds for 500 members and 10k groups):
def get_group_by_members(member_names_included, member_names_excluded):
with Session() as session:
return session.query(Group).join(groups_members).filter(
groups_members.columns.member_name.in_(member_names_included)).group_by(Group.id).having(func.count(groups_members.columns.member_name) == len(member_names_included),
).filter(
~Group.members.any(Member.name.in_(member_names_excluded)),
).all()
Ok, here's what I ended up with via trial & error in an SQL editor. This is faster than the previous attempt (50 milliseconds on the same data).
The comments in the code correspond to the following steps:
Find groups containing excluded members
Filter those out of the assignment table
Filter remaining assignment table by included members
Group by group IDs
Return all remaining groups matching the number of included members
SQL
SELECT * FROM groups
WHERE id IN (
SELECT group_id FROM groups_members
WHERE member_name IN ("A", "B") // 3.
AND group_id NOT IN ( // 2
SELECT group_id FROM groups_members
WHERE member_name IN ("E", "F") // 1.
)
GROUP BY group_id // 4.
HAVING count(member_name) == 2 // 5.
)
SQLAlchemy
session.query(Group)
.where(Group.id.in_(
session.query(groups_members.c.group_id)
.where(
groups_members.c.member_name.in_(member_names_included), # 3.
groups_members.c.group_id.not_in( # 2.
session.query(groups_members.c.group_id).where(
groups_members.c.member_name.in_(member_names_excluded) # 1.
)
),
)
.group_by(groups_members.c.group_id) # 4.
.having(
func.count(groups_members.c.member_name)
== len(member_names_included) # 5.
))
)
.all()
And the full running example in one piece:
from sqlalchemy import Table, Column, Integer, String, ForeignKey
from sqlalchemy import create_engine, select, func, text, and_, not_
from sqlalchemy.orm import relationship, declarative_base, sessionmaker
engine = create_engine("sqlite+pysqlite:///:memory:", future=True, echo=False)
Session = sessionmaker(bind=engine, future=True)
# Schema
Base = declarative_base()
groups_members = Table(
"groups_members",
Base.metadata,
Column("group_id", ForeignKey("groups.id")),
Column("member_name", ForeignKey("members.name")),
)
class Group(Base):
__tablename__ = "groups"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
members = relationship(
"Member", secondary=groups_members, backref="groups", lazy="subquery"
)
def __repr__(self):
return f"<Group {self.id}: {self.name}>"
class Member(Base):
__tablename__ = "members"
name = Column(String, primary_key=True)
def __repr__(self):
return f"<Member: {self.name}>"
Base.metadata.create_all(engine)
# Objects
member_a = Member(name="A")
member_b = Member(name="B")
member_c = Member(name="C")
member_d = Member(name="D")
member_e = Member(name="E")
member_f = Member(name="F")
group_1 = Group(name="1", members=[member_a, member_b, member_c])
group_2 = Group(name="2", members=[member_a, member_b, member_f])
group_3 = Group(name="3", members=[member_a, member_c, member_d])
group_4 = Group(name="4", members=[member_d, member_e, member_f])
with Session() as session:
session.add(group_1)
session.add(group_2)
session.add(group_3)
session.add(group_4)
session.commit()
# Query
member_names_included = ["A", "B"]
member_names_excluded = ["E", "F"]
# Raw SQL variant
with Session() as session:
result = session.execute(
f"""
SELECT * FROM groups
WHERE id IN (
SELECT group_id FROM groups_members
WHERE member_name IN ("A", "B")
AND group_id NOT IN (
SELECT group_id FROM groups_members
WHERE member_name IN ("E", "F")
)
GROUP BY group_id
HAVING count(member_name) == 2
)
"""
).all()
groups = [Group(**r) for r in result]
for r in groups:
print(f'SQL {r}')
# ORM Variant
with Session() as session:
result = (
session.query(Group)
.where(
Group.id.in_(
session.query(groups_members.c.group_id)
.where(
# Matching any included members
groups_members.c.member_name.in_(member_names_included),
# Removing any groups containing excluded members
groups_members.c.group_id.not_in(
session.query(groups_members.c.group_id).where(
groups_members.c.member_name.in_(member_names_excluded)
)
),
)
# This is to make sure that all included members exist in a group, not just a few
.group_by(groups_members.c.group_id)
.having(
func.count(groups_members.c.member_name)
== len(member_names_included)
)
)
)
.all()
)
for r in result:
print(f'ORM {r}')
I hope this is helpful to anyone and if you have suggestions for improvement, please let me know.

Issue when creating on-to-many relationships in SQLAlchemy database

I created a Flask app with a database, with following classes:
db = SQLAlchemy(app)
class Category(db.Model):
__tablename__ = 'Category'
children = relationship("Child")
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
name = db.Column(db.Text())
icon = db.Column(db.Text())
subcategories = db.relationship('Subcategory', backref="category")
def __init__(self, name, subcategories, icon):
self.name = name
self.color = icon
self.subcategories = subcategories
class Subcategory(db.Model):
__tablename__ = 'Subcategory'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
name = db.Column(db.Text())
color = db.Column(db.Text())
reward_points = db.Column(db.Integer())
category = db.Column(db.Text())
tasks = db.relationship('Task', backref="subcategory")
category_id = db.Column(db.Integer, db.ForeignKey('category.id'))
def __init__(self, name, color, reward_points, category, tasks, category_id):
self.name = name
self.color = color
self.reward_points = reward_points
self.category = category
self.tasks = tasks
self.category_id = category_id
class Task(db.Model):
__tablename__ = 'Task'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
task = db.Column(db.Text())
start = db.Column(db.DateTime())
end = db.Column(db.DateTime())
duration = db.Column(db.Integer)
scheduled = db.Column(db.DateTime())
created = db.Column(db.DateTime())
status = db.Column(db.Text())
category = db.Column(db.Text())
subcategory = db.Column(db.Text())
tags = db.Column(db.Text())
subcategory_id = db.Column(db.Integer, db.ForeignKey('subcategory.id'))
def __init__(self, task, start, end, duration, category, subcategory, tags, created, status, scheduled, subcategory_id):
self.task = task
self.start = start
self.end = end
self.duration = duration
self.category = category
self.subcategory = subcategory
self.tags = tags
self.created = created
self.status = status
self.scheduled = scheduled
self.subcategory_id = subcategory_id
My goal is to create one to many relationships between the classes. I am trying to run the db.create_all() command, but am getting following error:
sqlalchemy.exc.NoReferencedTableError: Foreign key associated with column 'Subcategory.category_id' could not find table 'category' with which to generate a foreign key to target column 'id'
What am I doing wrong? Other questions on Stackoverflow with similar issues did not resolve my error.
Changing
category_id = db.Column(db.Integer, db.ForeignKey('category.id'))
to
category_id = db.Column(db.Integer, db.ForeignKey('Category.id'))
solved the issue.
Changing "category.id" to "category.c.id" solved the issue as well. If anyone knows why, pls let me know ;-)

Convert PostgreSQL COUNT … FILTER query to SQL Alchemy

I'm new to SQLAlchemy, and I would like to convert this PostgreSQL query:
SELECT product.*
, COUNT(feedback.like) FILTER (WHERE feedback.like = '1') AS like
, COUNT(feedback.like) FILTER (WHERE feedback.like = '-1') AS unlike
FROM feedback, product
WHERE product.id = feedback.product_id
GROUP BY product.id
ORDER BY product.id;
I have already tried this:
products = db.session.query(
Product,
func.count(Feedback.like > 0).label('like'),
func.count(Feedback.like < 0).label('unlike')
).filter(Product.guide_name_id==id)
.filter(Product.id == Feedback.product_id)
.group_by(Product.id)
.order_by(Product.id)
.all()
Thank you in advance for your help
Thanks to #IljaEverilä's comment, here is a more direct answer:
class Product(Base):
__tablename__ = "product"
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
def __repr__(self):
return f"<Product(name='{self.name}')>"
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
product_id = Column(Integer, ForeignKey(Product.id))
like = Column(Integer)
product = relationship(Product)
Base.metadata.create_all(engine)
with Session(engine) as session:
# set up test data
widget = Product(name="widget")
session.add_all(
[
widget,
Feedback(product=widget, like=1),
Feedback(product=widget, like=1),
Feedback(product=widget, like=-1),
Product(name="gadget"),
]
)
# run the query
query = (
select(
Product,
func.count(Feedback.like)
.filter(Feedback.like == 1)
.label("likes"),
func.count(Feedback.like)
.filter(Feedback.like == -1)
.label("dislikes"),
)
.select_from(Product)
.outerjoin(Feedback)
.group_by(Product)
)
results = session.execute(query).fetchall()
print(results)
# [(<Product(name='gadget')>, 0, 0), (<Product(name='widget')>, 2, 1)]
(Original answer)
I'm not sure if SQLAlchemy's postgresql dialect specifically handles COUNT … FILTER, but you can accomplish the same thing using SUM and CASE:
from sqlalchemy import __version__ as sa_version, case, Column, ForeignKey, func, Integer, String
from sqlalchemy.orm import Session
print(sa_version) # 1.4.0b2
class Product(Base):
__tablename__ = "product"
id = Column(Integer, primary_key=True)
name = Column(String(50), nullable=False)
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Integer, primary_key=True)
product_id = Column(Integer, ForeignKey(Product.id))
like = Column(Integer)
product = relationship(Product)
Base.metadata.create_all(engine)
with Session(engine, future=True) as session:
widget = Product(name="widget")
session.add_all(
[
widget,
Feedback(product=widget, like=1),
Feedback(product=widget, like=1),
Feedback(product=widget, like=-1),
Product(name="gadget"),
]
)
results = (
session.query(
Product.name,
func.sum(case((Feedback.like > 0, 1), else_=0)).label(
"likes"
),
func.sum(case((Feedback.like < 0, 1), else_=0)).label(
"dislikes"
),
)
.select_from(Product)
.outerjoin(Feedback)
.group_by(Product)
.all()
)
print(results) # [('widget', 2, 1), ('gadget', 0, 0)]

SqlAlchemy outerjoin query problem filtering [duplicate]

This question already has answers here:
how left outer join in sqlalchemy?
(1 answer)
sqlalchemy filter children in query, but not parent
(2 answers)
Closed 2 years ago.
I'm using sqlalchemy, and I have problem with this specific query.
I have data_template, devices_data, and device. Each device have value for each data in data_template. Those values are stored in devices_data. I want to list data_template for one device with values that this device has. If there is no value for some data_template, show None.
It has something to do with outerjoin. Here is my model:
from sqlalchemy.orm import sessionmaker, relationship
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class DataTemplate(Base):
__tablename__ = 'data_template'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<DataTemplate(name={self.name})>"
class Device(Base):
__tablename__ = 'device'
id = Column(Integer, primary_key=True)
name = Column(String)
def __repr__(self):
return f"<Device(name={self.name})>"
class DeviceData(Base):
__tablename__ = 'device_data'
id = Column(Integer, primary_key=True)
value = Column(Integer, nullable=False)
data_name_id = Column(Integer, ForeignKey(DataTemplate.id), nullable=False)
device_id = Column(Integer, ForeignKey(Device.id), nullable=False)
data_template = relationship('DataTemplate', backref='device_data')
device = relationship('Device', backref='device_data')
def __repr__(self):
return f"<DeviceData(device={self.device.name}, data_template={self.data_template.name}, value={self.value})>"
engine = create_engine('sqlite://')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
Session.configure(bind=engine)
session = Session()
dev1 = Device(name='Receiver')
dev2 = Device(name='TV')
dat_temp1 = DataTemplate(name="height")
dat_temp2 = DataTemplate(name="width")
dat_temp3 = DataTemplate(name="length")
session.add_all([dev1, dev2, dat_temp1, dat_temp2, dat_temp3])
dd1 = DeviceData(value=100, data_template=dat_temp1, device=dev1)
dd2 = DeviceData(value=50, data_template=dat_temp2, device=dev1)
dd3 = DeviceData(value=200, data_template=dat_temp1, device=dev2)
dd4 = DeviceData(value=40, data_template=dat_temp2, device=dev2)
dd5 = DeviceData(value=30, data_template=dat_temp3, device=dev2)
session.add_all([dd1, dd2, dd3, dd4, dd5])
s = session.query(DataTemplate, DeviceData).outerjoin(DeviceData).filter(DeviceData.device==dev1)
for x in s:
print(x)
with this outerjoin I'm getting:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
and is equal to:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id"
WHERE "val"."device_id" = 1
but I want to get:
(<DataTemplate(name=height)>, <DeviceData(device=Receiver, data_template=height, value=100)>)
(<DataTemplate(name=width)>, <DeviceData(device=Receiver, data_template=width, value=50)>)
(<DataTemplate(name=length)>, None)
and that query should be:
SELECT "d"."id", "val"."id"
FROM "DataTemplate" "d"
LEFT JOIN "DeviceData" "val"
ON "d"."id" = "val"."data_name_id" AND "val"."device_id" = 1
how do I write this specific query?

import and mapping csv to sqlalchemy dynamically

I am creating database using sqlalchemy in flask app and filling the database with existing CSV with selected columns from it so I use pandas here is my classes creation
I need to add company objects and commit them in dynamic way , but that way does not work , the csv file is not small about 20,000 record I can not add them manually ,so any suggestions to add them in dynamic way?
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import create_engine
from flask import jsonify
Base = declarative_base()
class Company(Base):
__tablename__ = 'forbesglobal2000_2016'
id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
profits = Column(String(250), nullable=False)
marketValue = Column(String(250), nullable=False)
revenue = Column(String(250), nullable=False)
industry = Column(String(250), nullable=False)
class SIC(Base):
__tablename__ = "SIC"
id = Column(Integer, primary_key=True)
SIC = Column(Integer, nullable=False)
Industry_name = Column(String(250),ForeignKey('forbesglobal2000_2016.industry'))
Indusrty = relationship(Company)
# configuration part
engine = create_engine('sqlite:///CompainesData.db')
Base.metadata.create_all(engine)
import sqlalchemy
from sqlalchemy.orm import sessionmaker
from database_setup import *
import pandas as pd
# opening connection with database
engine = create_engine('sqlite:///CompainesData.db')
Base.metadata.bind = engine
# Clear database
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
DBSession = sessionmaker(bind=engine)
session = DBSession()
df = pd.read_csv("forbesglobal2000-2016.csv")
df1 = pd.read_csv("SIC.csv")
# market valuation, revenue, profits and industry
profit_column = df.profits
name_column = df.name
industry_column = df.industry
revenue_column = df.revenue
marketvalue_column = df.marketValue
industry_column_f = df1.Description
SIC_column = df1.SICCode
company = []
i = 1
while i < name_column.__len__():
company[i] = Company(name = name_column[i] , industry=industry_column[i], marketValue = marketvalue_column[i] , profits = profit_column[i] ,
revenue = revenue_column[i] )
i = i +1
for i in company:
session.add(i)
session.commit()
# printing test
com = session.query(Company).all()
for f in com:
print(f.name)
print(f.industry)
print(f.profits)
print(f.revenue)
print(f.marketValue)
If you want to load data from csv files to database just use df.to_sql() function it allows you to do that. For example :
df.to_sql(con=engine, name=airlines.__tablename__, if_exists='replace',index=False)
Pay attention to index=False, it's used to ignore pandas id column.
I think the index will start at 0 and not 1:
i = 1
should be
i = 0
can you try that?