Conditional bulk update in Django using grouping - sql

Suppose I have a list of transactions with the following model definition:
class Transaction(models.Model):
amount = models.FloatField()
client = models.ForeignKey(Client)
date = models.DateField()
description = models.CharField()
invoice = models.ForeignKey(Invoice, null=True)
Now I want to create invoices for each client at the end of the month. The invoice model looks like this:
class Invoice(models.Model):
client = models.ForeignKey(Client)
invoice_date = models.DateField()
invoice_number = models.CharField(unique=True)
def amount_due(self):
return self.transaction_set.aggregate(Sum('amount'))
def create_invoices(invoice_date):
for client in Client.objects.all():
transactions = Transaction.objects.filter(client=client)
if transactions.exists():
invoice = Invoice(client=client, number=get_invoice_number(), date=invoice_date)
invoice.save()
transactions.update(invoice=invoice)
I know I can create all the invoices with a bulk create in 1 query with bulk create, but I would still have to the set the invoice field in the transaction model individually.
Is it possible to set the invoice field of all the Transaction models with a single query after I've created all the invoices? Preferably in using the ORM but happy to use raw SQL if required.
I know I can also use group by client on the transaction list to get the total per client, but then the individual entries are not linked to the invoice.

You could try constructing a conditional update query if you are able to generate a mapping from clients to invoices before:
from django.db.models import Case, Value, When
# generate this after creating the invoices
client_invoice_mapping = {
# client: invoice
}
cases = [When(client_id=client.pk, then=Value(invoice.pk))
for client, invoice in client_invoice_mapping.items()]
Transaction.objects.update(invoice_id=Case(*cases))
Note that Conditional Queries are available since Django 1.8. Otherwise you may look into constructing something similar using raw SQL.

To complement #Bernhard Vallant answer. You can use only 3 queries.
def create_invoices(invoice_date):
# Maybe use Exists clause here instead of subquery,
# do some tests for your case if the query is slow
clients_with_transactions = Client.objects.filter(
id__in=Transaction.objects.values('client')
)
invoices = [
Invoice(client=client, number=get_invoice_number(), date=invoice_date)
for client in clients_with_transactions
]
# With PostgreSQL Django can populate id's here
invoices = Invoice.objects.bulk_create(invoices)
# And now use a conditional update
cases = [
When(client_id=invoice.client_id, then=Value(invoice.pk))
for invoice in invoices
]
Transaction.objects.update(invoice_id=Case(*cases))

Related

Django complex filter and order

I have 4 model like this
class Site(models.Model):
name = models.CharField(max_length=200)
def get_lowest_price(self, mm_date):
'''This method returns lowest product price on a site at a particular date'''
class Category(models.Model):
name = models.CharField(max_length=200)
site = models.ForeignKey(Site)
class Product(models.Model):
name = models.CharField(max_length=200)
category = models.ForeignKey(Category)
class Price(models.Model):
date = models.DateField()
price = models.IntegerField()
product = models.ForeignKey(Product)
Here every have many category, every category have many product. Now product price can change every day so price model will hold the product price and date.
My problem is I want list of site filter by price range. This price range will depends on the get_lowest_price method and can be sort Min to Max and Max to Min. Already I've used lambda expression to do that but I think it's not appropriate
sorted(Site.objects.all(), key=lambda x: x.get_lowest_price(the_date))
Also I can get all site within a price range by running a loop but this is also not a good idea. Please help my someone to do the query in right manner.
If you still need more clear view of the question please see the first comment from "Ishtiaque Khan", his assumption is 100% right.
*In these models writing frequency is low and reading frequency is high.
1. Using query
If you just wanna query using a specific date. Here is how:
q = Site.objects.filter(category__product__price__date=mm_date) \
.annotate(min_price=Min('category__product__price__price')) \
.filter(min_price__gte=min_price, min_price__lte=max_price)
It will return a list of Site with lowest price on mm_date fall within range of min_price - max_price. You can also query for multiple date using query like so:
q = Site.objects.values('name', 'category__product__price__date') \
.annotate(min_price=Min('category__product__price__price')) \
.filter(min_price__gte=min_price, min_price__lte=max_price)
2. Eager/pre-calculation, you can use post_save signal. Since the write frequency is low this will not be expensive
Create another Table to hold lowest prices per date. Like this:
class LowestPrice(models.Model):
date = models.DateField()
site = models.ForeignKey(Site)
lowest_price = models.IntegerField(default=0)
Use post_save signal to calculate and update this every time there. Sample code (not tested)
from django.db.models.signals import post_save
from django.dispatch import receiver
#receiver(post_save, sender=Price)
def update_price(sender, instance, **kwargs):
cur_price = LowestPrice.objects.filter(site=instance.product.category.site, date=instance.date).first()
if not cur_price:
new_price = LowestPrice()
new_price.site = instance.product.category.site
new_price.date = instance.date
else:
new_price = cur_price
# update price only if needed
if instance.price<new_price.lowest_price:
new_price.lowest_price = instance.price
new_price.save()
Then just query directly from this table when needed:
LowestPrice.objects.filter(date=mm_date, lowest_price__gte=min_price, lowest_price__lte=max_price)
Solution:
from django.db.models import Min
Site.objects.annotate(
price_min=Min('categories__products__prices__price')
).filter(
categories__products__prices__date=the_date,
).distinct().order_by('price_min') # prefix '-' for descending order
For this to work, you need to modify the models by adding a related_name attribute to the ForeignKey fields.
Like this -
class Category(models.Model):
# rest of the fields
site = models.ForeignKey(Site, related_name='categories')
Similary, for Product and Price models, add related_name as products and prices in the ForeignKey fields.
Explanation:
Starting with related_name, it describes the reverse relation from one model to another.
After the reverse relationship is setup, you can use them to inner join the tables.
You can use the reverse relationships to get the price of a product of a category on a site and annotate the min price, filtered by the_date. I have used the annotated value to order by min price of the product, in ascending order. You can use '-' as a prefix character to do in descending order.
Do it with django queryset operations
Price.objects.all().order_by('price') #add [0] for only the first object
or
Price.objects.all().order_by('-price') #add [0] for only the first object
or
Price.objects.filter(date= ... ).order_by('price') #add [0] for only the first object
or
Price.objects.filter(date= ... ).order_by('-price') #add [0] for only the first object
or
Price.objects.filter(date= ... , price__gte=lower_limit, price__lte=upper_limit ).order_by('price') #add [0] for only the first object
or
Price.objects.filter(date= ... , price__gte=lower_limit, price__lte=upper_limit ).order_by('-price') #add [0] for only the first object
I think this ORM query could do the job ...
from django.db.models import Min
sites = Site.objects.annotate(price_min= Min('category__product__price'))
.filter(category__product__price=mm_date).unique().order_by('price_min')
or /and for reversing the order :
sites = Site.objects.annotate(price_min= Min('category__product__price'))
.filter(category__product__price=mm_date).unique().order_by('-price_min')

Django retrieve results by date operation query

I have one model to make my Catalog like this
class Product(models.Model):
item = models.CharField(max_length=10)
description = models.CharField(max_length=140)
unit = models.CharField(max_length=5)
expiration= models.IntegerField(default=365) # Days after admission
expiration_warning= models.IntegerField(default=30) # Days before expiration
...
and I have my Inventory like this:
class Inventory(models.Model):
product = models.ForeignKey(Product)
quantity= models.DecimalField(default=0, decimal_places=2, max_digits=10)
admission = models.DateTimeField(auto_now_add=True)
...
Now I want to retrieve all Inventory objects that its expiration date is upcoming or has been reached.
With a RAW SQL query, it will be something like:
SELECT product,quantity,admission,expiration,expiration_warning
FROM Inventory as I JOIN Product as P on I.product=P.id
WHERE DATEADD(day,(expiration_warning*-1),DATEADD(day,expiration,admission))<=GETDATE()
I haven't tried this query yet, it is just an example
I want to achieve this using Django query syntax, can you help me please?
Maybe something like this can achieve what you need:
from datetime import datetime, timedelta
expiring_inventory = []
expired_inventory = []
for inventory in Inventory.objects.select_related('product').all(): # 1 query
p = inventory.product
expire_datetime = inventory.admission + timedelta(days=p.expiration)
expiring_datetime = expire_datetime - timedelta(days=p.expiration_warning)
if expiring_datetime <= datetime.now() < expire_datetime: # not quite expired
expiring_inventory.append(inventory)
if expire_datetime <= datetime.now(): # expired
expired_inventory.append(inventory)
You can read more about Query Sets in the django docs.

Django - Making a SQL query on a many to many relationship with PostgreSQL Inner Join

I am looking for a perticular raw SQL query using Inner Join.
I have those models:
class EzMap(models.Model):
layers = models.ManyToManyField(Shapefile, verbose_name='Layers to display', null=True, blank=True)
class Shapefile(models.Model):
filename = models.CharField(max_length=255)
class Feature(models.Model):
shapefile = models.ForeignKey(Shapefile)
I would like to make a SQL Query valid with PostgreSQL that would be like this one:
select id from "table_feature" where' shapefile_ezmap_id = 1 ;
but I dont know how to use the INNER JOIN to filter features where the shapefile they belongs to are related to a particular ezmap object
Something like this:
try:
id = Feature.objects.get(shapefile__ezmap__id=1).id
except Feature.DoesNotExist:
id = 0 # or some other action when no result is found
You will need to use filter (instead of get) if you want to deal with multiple Feature results.

Django sql order by

I'm really struggling on this one.
I need to be able to sort my user by the number of positive vote received on their comment.
I have a table userprofile, a table comment and a table likeComment.
The table comment has a foreign key to its user creator and the table likeComment has a foreign key to the comment liked.
To get the number of positive vote a user received I do :
LikeComment.objects.filter(Q(type = 1), Q(comment__user=user)).count()
Now I want to be able to get all the users sorted by the ones that have the most positive votes. How do I do that ? I tried to use extra and JOIN but this didn't go anywhere.
Thank you
It sounds like you want to perform a filter on an annotation:
class User(models.Model):
pass
class Comment(models.Model):
user = models.ForeignKey(User, related_name="comments")
class Like(models.Model):
comment = models.ForeignKey(Comment, related_name="likes")
type = models.IntegerField()
users = User \
.objects \
.all()
.extra(select = {
"positive_likes" : """
SELECT COUNT(*) FROM app_like
JOIN app_comment on app_like.comment_id = app_comment.id
WHERE app_comment.user_id = app_user.id AND app_like.type = 1 """})
.order_by("positive_likes")
models.py
class UserProfile(models.Model):
.........
def like_count(self):
LikeComment.objects.filter(comment__user=self.user, type=1).count()
views.py
def getRanking( anObject ):
return anObject.like_count()
def myview(request):
users = list(UserProfile.objects.filter())
users.sort(key=getRanking, reverse=True)
return render(request,'page.html',{'users': users})
Timmy's suggestion to use a subquery is probably the simplest way to solve this kind of problem, but subqueries almost never perform as well as joins, so if you have a lot of users you may find that you need better performance.
So, re-using Timmy's models:
class User(models.Model):
pass
class Comment(models.Model):
user = models.ForeignKey(User, related_name="comments")
class Like(models.Model):
comment = models.ForeignKey(Comment, related_name="likes")
type = models.IntegerField()
the query you want looks like this in SQL:
SELECT app_user.id, COUNT(app_like.id) AS total_likes
FROM app_user
LEFT OUTER JOIN app_comment
ON app_user.id = app_comment.user_id
LEFT OUTER JOIN app_like
ON app_comment.id = app_like.comment_id AND app_like.type = 1
GROUP BY app_user.id
ORDER BY total_likes DESCENDING
(If your actual User model has more fields than just id, then you'll need to include them all in the SELECT and GROUP BY clauses.)
Django's object-relational mapping system doesn't provide a way to express this query. (As far as I know—and I'd be very happy to be told otherwise!—it only supports aggregation across one join, not across two joins as here.) But when the ORM isn't quite up to the job, you can always run a raw SQL query, like this:
sql = '''
SELECT app_user.id, COUNT(app_like.id) AS total_likes
# etc (as above)
'''
for user in User.objects.raw(sql):
print user.id, user.total_likes
I believe this can be achieved with Django's queryset:
User.objects.filter(comments__likes__type=1)\
.annotate(lks=Count('comments__likes'))\
.order_by('-lks')
The only problem here is that this query will miss users with 0 likes. Code from #gareth-rees, #timmy-omahony and #Catherine will include also 0-ranked users.

Query that joins just a single row from a ForeignKey relationship

I have the following models (simplified):
class Category(models.model):
# ...
class Product(models.model):
# ...
class ProductCategory(models.Model):
product = models.ForeignKey(Product)
category = models.ForeignKey(Category)
# ...
class ProductImage(models.Model):
product = models.ForeignKey(Product)
image = models.ImageField(upload_to=product_image_path)
sort_order = models.PositiveIntegerField(default=100)
# ...
I want to construct a query that will get all the products associated with a particular category. I want to include just one of the many associated images--the image with the lowest sort_order--in the queryset so that a single query gets all of the data needed to show all products within a category.
In raw SQL I would might use a GROUP BY something like this:
SELECT * FROM catalog_product p
LEFT JOIN catalog_productcategory c ON (p.id = c.product_id)
LEFT JOIN catalog_productimage i ON (p.id = i.product_id)
WHERE c.category_id=2
GROUP BY p.id HAVING i.sort_order = MIN(sort_order)
Can this be done without using a raw query?
Edit - I should have noted what I've tried...
# inside Category model...
products = Product.objects.filter(productcategory__category=self) \
.annotate(Min('productimage__sort_order'))
While this query does GROUP BY, I do not see any way to (a) get the right ProductImage.image into the QuerySet eg. HAVING clause. I'm effectively trying to dynamically add a field to the Product instance (or the QuerySet) from a specific ProductImage instance. This may not be the way to do it with Django.
It isn't quite a raw query, but it isn't quite public api either.
You can add a group by clause to the queryset before it is evaluated:
qs = Product.objects.filter(some__foreign__key__join=something)
qs.group_by = 'some_field'
results = list(qs)
Word of caution, though: this behaves differently depending on the db backend.
catagory = Catagory.objects.get(get_your_catagory)
qs = Product.objects.annotate(Min('productimage__sortorder').filter(productcategory__category = catagory)
This should hit the DB only once, because querysets are lazy.