Efficiently group by column aggregate - sql

SELECT date, id, sum(revenue)
FROM table
WHERE date between '2013-01-01' and '2013-01-08'
GROUP BY date, id
HAVING sum(revenue)>1000
Returns rows that have revenue>1000.
SELECT date, id, sum(revenue)
FROM table
WHERE date between '2013-01-01' and '2013-01-08'
AND id IN (SELECT id FROM table where date between '2013-01-01' and '2013-01-08' GROUP BY id HAVING sum(revenue)>1000)
GROUP BY date, id
Returns rows for id's whose total revenue over the date period is >1000 as desired. But this query is much slower. Any quicker way to do this?

Make sure you have indexes on the date and id columns, and try this variation:
select t.date, t.id, sum(t.revenue)
from table t
inner join (
select id
from table
where date between '2013-01-01' and '2013-01-08'
group by id
having sum(revenue) > 1000
) ts on t.id = ts.id
where t.date between '2013-01-01' and '2013-01-08'
group by t.date, t.id

it's not MySQL, it's Vertica ;)
Cris, what projection and order by you using in CREATE TABLE ???
Do you try using database designer
see http://my.vertica.com/docs/6.1.x/HTML/index.htm#14415.htm

Related

Filter SQL Server Records by Latest Date on Every Year

How would I filter this SQL server database so only the green records are left aka the last recorded date every year for each Customer ID field.
If you want to get the rows, not only the date values, using ROW_NUMBER() is an option (you only need to use the appropriate PARTITON BY and ORDER BY clauses):
SELECT *
FROM (
SELECT
CustomerId,
[Date],
ROW_NUMBER() OVER (PARTITION BY CustomerId, YEAR[Date] ORDER BY [Date] DESC) AS Rn
FROM YourTable
) t
WHERE Rn = 1
To check the maximum date in the year, you can write a query to get for each year the date where not exists another (in the same year), as follow:
SELECT *
FROM yourtable t1
WHERE NOT EXISTS
(SELECT 1
FROM yourtable t2
WHERE t1.customerID = t2.customerID
AND t1.date > t2.date
AND DATEPART(YEAR, t1) = DATEPART(YEAR, t2))
If you have only two columns, then you can just use aggregation:
select customer_id, max(date)
from t
group by customer_id, year(date);

Find the max date to last one year transaction for each group

I have to query in sql server where I have to find for each id it's volume such that we have last 1 year date for each id with it's volume.
for example below is my data ,
for each id I need to query the last 1 year transaction from when we have the entry for that id as you can see from the snippet for id 1 we have the latest date as 7/31/2020 so I need the last 1 year entry from that date for that id, The highlighted one is exclude because that date is more than 1 year from the latest date for that id
Similarly for Id 3 we have all the date range in one year from the latest date for that particular id
I tried using the below query and I can get the latest date for each id but I am not sure how to extract all the dates for each id from the latest date to one year, I would appreciate if some one could help me.
I am using Microsoft sql server would need the query which executes in sql server, Table name is emp and have millions of id
Select *
From emp as t
inner join (
Select tm.id, max(tm.date_tran) as MaxDate
From emp tm
Group by tm.id
) tm on t.id = tm.id and t.date_tran = tm.MaxDate
To exclude transactions where the date difference between the tran_date and the maximum tran_date for each id is greater than 1 year, something like this:
;with max_cte(id, max_date) as (
Select id, max(date_tran)
From emp tm
Group by id )
Select *
From emp e
join max_cte mc on e.id=mc.id
and datediff(d, e.date_tran, mc.max_date)<=365;
Update: per comments, added volume. Thnx GMB :)
;with max_cte(id, date_tran, volume, max_date) as (
Select *, dateadd(year, -1, max(date_tran) over(partition by id)) max_date
From #emp tm)
Select id, sum(volume) sum_volume
From max_cte mc
where mc.date_tran>max_date
group by id;
You can do this with window functions:
select id, sum(volume) total_volume
from (
select t.*, max(date_tran) over(partition by id) max_date_tran
from mytable t
) t
where date_tran > dateadd(year, -1, max_date_tran)
group by id
Alternatively, you can use a correlated subquery for filtering:
select id, sum(volume) total_volume
from mytable t
where t.date_tran > (
select dateadd(year, -1, max(t1.date_tran))
from mytable t1
where t1.id = t.id
)
The second query would take advantage of an index on (id, date_tran).
this should do the trick for you:
SELECT
*
FROM
emp
JOIN
(
SELECT
MAX(date_tran) max_date_tran
, Id
FROM
emp
GROUP BY
id
) emp2
ON emp2.Id = emp.Id
AND DATEADD(YEAR, -1, emp2.max_date_tran) <= emp.date_tran;
Your code is good. Just add the date difference function to get the particular time in between the transaction, like the following:
Select *
From emp as t
inner join ( Select id as id, max(date_tran) as maxdate
From emp tm
Group by id
) tm on t.id = tm.id and datediff(d, e.date_tran, mc.maxdate)<=365;

Select latest 30 dates for each unique ID

This is a sample data file
Data Contains unique IDs with different latitudes and longitudes on multiple timestamps.I would like to select the rows of latest 30 days of coordinates for each unique ID.Please help me on how to run the query .This date is in Hive table
Regards,
Akshay
According to your example above (where no current year dates for id=2,3), you can numbering date for each id (order by date descending) using window function ROW_NUMBER(). Then just get latest 30 values:
--get all values for each id where num<=30 (get last 30 days for each day)
select * from
(
--numbering each date for each id order by descending
select *, row_number()over(partition by ID order by DATE desc)num from Table
)X
where num<=30
If you need to get only unique dates (without consider time) for each id, then can try this query:
select * from
(
--numbering date for each id
select *, row_number()over(partition by ID order by new_date desc)num
from
(
-- move duplicate using distinct
select distinct ID,cast(DATE as date)new_date from Table
)X
)Y
where num<=30
In Oracle this will be:
SELECT * FROM TEST_DATE1
WHERE DATEUPDT > SYSDATE - 30;
select * from MyTable
where
[Date]>=dateadd(d, -30, getdate());
To group by ID and perform aggregation, something like this
select ID,
count(*) row_count,
max(Latitude) max_lat,
max(Longitude) max_long
from MyTable
where
[Date]>=dateadd(d, -30, getdate())
group by ID;

SQL Server: Attempting to output a count with a date

I am trying to write a statement and just a bit puzzled what is the best way to put it together. So I am doing a UNION on a number of tables and then from there I want to produce as the output a count for the UserID within that day.
So I will have numerous tables union such as:
Order ID, USERID, DATE, Task Completed.
UNION
Order ID, USERID, DATE, Task Completed
etc
Above is layout of the table which will have 4 tables union together with same names.
Then statement output I want is for a count of USERID that occurred within the last 24 hours.
So output should be:
USERID--- COUNT OUTPUT-- DATE
I was attempting a WHERE statement but think the output is not what I am after exactly, just thinking if anyone can point me in the right direction and if there is alternative way compared to the union? Maybe a joint could be a better alternative, any help be appreciated.
I will eventually then put this into a SSRS report, so it gets updated daily.
You can try this:
select USERID, count(*) as [COUNT], cast(DATE as date) as [DATE]
from
(select USERID, DATE From SomeTable1
union all
select USERID, DATE From SomeTable2
....
) t
where DATE <= GETDATE() AND DATE >= DATEADD(hh, -24, GETDATE())
group by USERID, cast(DATE as date)
First, you should use union all rather than union. Second, you need to aggregate and use count distinct to get what you want:
So, the query you want is something like:
select count(distinct userid)
from ((select date, userid
from table1
where date >= '2015-05-26'
) union all
(select date, userid
from table2
where date >= '2015-05-26'
) union all
(select date, userid
from table3
where date >= '2015-05-26'
)
) du
Note that this hardcodes the date. In SQL Server, you would do something like:
date >= cast(getdate() - 1 as date)
And in MySQL
date >= date_sub(curdate(), interval 1 day)
EDIT:
I read the question as wanting a single day. It is easy enough to extend to all days:
select cast(date as date) as dte, count(distinct userid)
from ((select date, userid
from table1
) union all
(select date, userid
from table2
) union all
(select date, userid
from table3
)
) du
group by cast(date as date)
order by dte;
For even more readability, you could use a CTE:
;WITH cte_CTEName AS(
SELECT UserID, Date, [Task Completed] FROM Table1
UNION
SELECT UserID, Date, [Task Completed] FROM Table2
etc
)
SELECT COUNT(UserID) AS [Count] FROM cte_CTEName
WHERE Date <= GETDATE() AND Date >= DATEADD(hh, -24, GETDATE())
I think this is what you are trying to achieve...
Select
UserID,
Date,
Count(1)
from
(Select *
from table1
Union All
Select *
from table2
Union All
Select *
from table3
Union All
Select *
from table4
) a
Group by
Userid,
Date

Valid price at given date

got a table with dates and prices.
Date Price
2012-01-01 25
2012-01-05 12
2012-01-10 10
Is there some kind of function that lets me find what the current price where at '2012-01-07'? Without me knowing of the other dates.
Pseudoquery: select price where currentprice('2012-01-07')
Thanks!
MySQL:
select price from your_table
where date <= '2012-01-07'
order by date desc
limit 1
SQL Server:
select top 1 price from your_table
where date <= '2012-01-07'
order by date desc
If you don't have use of ROW_NUMBER(), and want a generic solution, you need to join on a sub-query.
Get the date you want, then get the data for that date.
SELECT
*
FROM
yourTable
INNER JOIN
(
SELECT MAX(yourDate) AS maxDate FROM yourTable WHERE yourDate <= #dateParameter
)
AS lookup
ON yourTable.yourDate = lookup.maxDate
select price
from table1 t
where t.date = ( select max(t2.date)
from table1 t2
where t2.date <= '2012-01-07' )
Note this is not the copy&paste answer, as we're not not knowing what is the datatype for your date column.