sql to find row for min date in each month - sql

I have a table, lets say "Records" with structure:
id date
-- ----
1 2012-08-30
2 2012-08-29
3 2012-07-25
I need to write an SQL query in PostgreSQL to get record_id for MIN date in each month.
month record_id
----- ---------
8 2
7 3
as we see 2012-08-29 < 2012-08-30 and it is 8 month, so we should show record_id = 2
I tried something like this,
SELECT
EXTRACT(MONTH FROM date) as month,
record_id,
MIN(date)
FROM Records
GROUP BY 1,2
but it shows 3 records.
Can anybody help?

SELECT DISTINCT ON (EXTRACT(MONTH FROM date))
id,
date
FROM Records1
ORDER BY EXTRACT(MONTH FROM date),date
SQLFiddle http://sqlfiddle.com/#!12/76ca2/3
UPD: This query:
1) Orders the records by month and date
2) For every month picks the first record (the first record has MIN(date) because of ordering)
Details here http://www.postgresql.org/docs/current/static/sql-select.html#SQL-DISTINCT

This will return multiples if you have duplicate minimum dates:
Select
minbymonth.Month,
r.record_id
From (
Select
Extract(Month From date) As Month,
Min(date) As Date
From
records
Group By
Extract(Month From date)
) minbymonth
Inner Join
records r
On minbymonth.date = r.date
Order By
1;
Or if you have CTEs
With MinByMonth As (
Select
Extract(Month From date) As Month,
Min(date) As Date
From
records
Group By
Extract(Month From date)
)
Select
m.Month,
r.record_id
From
MinByMonth m
Inner Join
Records r
On m.date = r.date
Order By
1;
http://sqlfiddle.com/#!1/2a054/3

select extract(month from date)
, record_id
, date
from
(
select
record_id
, date
, rank() over (partition by extract(month from date) order by date asc) r
from records
) x
where r=1
order by date

SQL Fiddle
select distinct on (date_trunc('month', date))
date_trunc('month', date) as month,
id,
date
from records
order by 1, 3 desc

I think you need use sub-query, something like this:
SELECT
EXTRACT(MONTH FROM r.date) as month,
r.record_id
FROM Records as r
INNER JOIN (
SELECT
EXTRACT(MONTH FROM date) as month,
MIN(date) as mindate
FROM Records
GROUP BY EXTRACT(MONTH FROM date)
) as sub on EXTRACT(MONTH FROM r.date) = sub.month and r.date = sub.mindate

Related

How to get number of IDs in the current month that also appears in the previous three months in Snowflake - SQL

I have a table in the snowflake with a time range from for example 2019.01 to 2020.01. An ID can appear multiple times (match with) on any of the dates.
For example:
my_table: two columns dddate and id
dddate
id
2019-02-03
607
2019-01-07
356
2019-08-06
491
2019-01-01
607
2019-12-17
529
2019-04-15
356
......
Is there a way I can find the total number of IDs that appeared at least one time in the current month that also appeared at least one time in the previous three months, and group by month to show each month's number count starting from 2019-04 (The first month that has previous three months data available in the table) until 2020-01.
I am thinking of some code like this:
WITH PREV_THREE AS (
SELECT
DATE_TRUNC('MONTH', dddate) AS MONTH,
ID AS CURR_ID
FROM my_table mt
INNER JOIN
(
(
SELECT
MONTH(DATEADD(DATE_TRUNC('MONTH', dddate), -1, GETDATE())) AS PREV_MONTH,
ID AS PREV_3_MON_ID
FROM my_table
)
UNION ALL
(
SELECT
MONTH(DATEADD(DATE_TRUNC('MONTH', dddate), -2, GETDATE())) AS PREV_MONTH,
ID AS PREV_3_MON_ID
FROM my_table
)
UNION ALL
(
SELECT
MONTH(DATEADD(DATE_TRUNC('MONTH', dddate), -3, GETDATE())) AS PREV_MONTH,
ID AS PREV_3_MON_ID
FROM my_table
)
) AS PREV_3_MON
ON mt.CURR_ID = PREV_3_MON.PREV_3_MON_ID
)
SELECT MONTH, COUNT(DISTINCT ID) AS COUNTER
FROM PREV_THREE
GROUP BY 1
ORDER BY 1
However, it somehow returns an error and doesn't seem working. Could anyone please help me with this? Thank you in advance!
You can use lag():
select distinct id
from (select t.*,
lag(dddate) over (partition by id order by dddate) as prev_dddate
from my_table t
) t
where dddate >= date_trunc('MONTH', current_date) and
prev_dddate < date_trunc('MONTH', current_date) and
prev_dddate >= date_trunc('MONTH', current_date) - interval '3 month';
You can do this for multiple months as:
select date_trunc('MONTH', dddate), count(distinct id)
from (select t.*,
lag(dddate) over (partition by id order by dddate) as prev_dddate
from my_table t
) t
where prev_dddate < date_trunc('MONTH', date_trunc('MONTH', dddate)) and
prev_dddate >= date_trunc('MONTH', date_trunc('MONTH', dddate)) - interval '3 month'
group by date_trunc('MONTH', dddate);
Even if an id appears multiple times in one month, one of those will be first and the lag() will identify the most recent previous month.

Count records for first day of every month in a year

I have a table with 4 columns huge number of records. It has the following structure:
DATE_ENTERED EMP_NAME DATA ORIGINATED
01-JAN-20 A 545454 APPLE
I want to calculate no of records for every first day of every month in a year
is there any way can we fetch the data for every first day of month.
In oracle you can use TRUNC function on the date as follows:
SELECT TRUNC(DATE_ENTERED), COUNT(1) AS CNT
FROM YOUR_TABLE
WHERE TRUNC(DATE_ENTERED) = TRUNC(DATE_ENTERED, 'MON')
GROUP BY TRUNC(DATE_ENTERED, 'MON')
Please note that the TRUNC(DATE_ENTERED, 'MON') returns the first day of the month for DATE_ENTERED.
Cheers!!
SELECT Year, Month, COUNT(*)
FROM
(
SELECT
YEAR(DATE_ENTERED) Year
MONTH(DATE_ENTERED) Month
DAY(DATE_ENTERED) Day
FROM your_table
WHERE DAY(DATE_ENTERED) = 1
) A
GROUP BY Year, Month
Generally WHERE DAY(DATE_ENTERED) = 1 will get you the records only for dates at the start of each month. Thus using Year and Month function you can group them by in order to get a count for each year and each month
You mean something like
SELECT COUNT(*)
FROM Table
WHERE DAY(DATE_ENTERED) = 1 AND
YEAR(DATE_ENTERED) = Some_Year
GROUP BY DATE_ENTERED
You can also use DATE_ENTERED BETWEEN 'YYYY0101' and 'YYYY1231' (replace the YYYY with the year you want to retrieve data for) instead of YEAR(DATE_ENTERED) = Some_Year, if performance is an issue.
You can use something like this:
select * from your_table
where DAY(DATE_ENTERED) = 1
and DATE_ENTERED between '2020-01-01' and '2020-12-31'
for number of count use this:
select count(*) from your_table
where DAY(DATE_ENTERED)= 1
and DATE_ENTERED between '2020-01-01' and '2020-12-31'
UPDATE
select * from your_table where Extract(day FROM DATE_ENTERED) = 1 and DATE_ENTERED between '01-JAN-20 ' and '01-DEC-20 ';
this is how the data looks like:
For the list of records
select count(*) from your_table where Extract(day FROM DATE_ENTERED) = 1 and DATE_ENTERED between '01-JAN-20 ' and '01-DEC-20 ';
UPDATE-2
select EXTRACT(month from DATE_ENTERED) as Count,
to_char(to_date(DATE_ENTERED, 'DD-MM-YYYY'), 'Month') from your_table
where Extract(day FROM DATE_ENTERED) = 1 and DATE_ENTERED between '01-JAN-20
'and '01-DEC-20 ' group by EXTRACT(month from DATE_ENTERED),
to_char(to_date(DATE_ENTERED, 'DD-MM-YYYY'), 'Month');
Here is the output:

How to replace the loop in MsSQL?

For example
If I want to check in every day last week
select count(ID) from DB where date < "2019/07/01"
select count(ID) from DB where date < "2019/07/02"
select count(ID) from DB where date < "2019/07/03"
...
select count(ID) from DB where date < "2019/07/08"
like
0701 10
0702 15
0703 23
...
0707 45
How to do this without loop and one query?
You can generate the dates using a recursive CTE (or other method) and then run the query:
with dates as (
select convert(date, '2019-07-01') as dte union all
select dateadd(day, 1, dte)
from dates
where dte < '2019-07-08'
)
select d.dte,
(select count(*) from DB where DB.date < d.dte)
from dates d;
More efficient, though, is a cumulative sum:
select db.*
from (select date, count(*) as cnt, sum(count(*)) over (order by date) as running_cnt
from db
group by date
) d
where d.date >= '2019-07-01' and d.date < '2019-07-09';
Are you just counting the number by day?
Something like
SELECT MONTH(date), DAY(date), COUNT(ID)
FROM DB
GROUP BY MONTH(date), DAY(date);
(assuming date is a DATE or DATETIME)
Do it with window Count. range between current row and current row selects exactly this day rows.
select distinct date, count(1) over (order by Date) - count(1) over (order by Date range between current row and current row)
from DB
where date between '2019-07-01' and '2019-07-08';
I assume date column is exactly DATE.

Get last data recorded of the date and group it by month

tbl_totalMonth has id,time, date and kwh column.
I want to get the last recorded data of the months and group it per month so the result would be the name of the month and kwh.
the result should be something like this:
month | kwh
------------
January | 150
February | 400
the query I tried: (but it returns the max kwh not the last kwh recorded)
SELECT DATENAME(MONTH, a.date) as monthly, max(a.kwh) as kwh
from tbl_totalMonth a
WHERE date > = DATEADD(yy,DATEDIFF(yy,0, GETDATE() -1 ),0)
group by DATENAME(MONTH, a.date)
I suspect you need something quite different:
select *
from (
select *
, row_number() over(partition by month(a.date), year(a.date) order by a.date DESC) as rn
from tbl_totalMonth a
WHERE date > = DATEADD(yy,DATEDIFF(yy,0, GETDATE() -1 ),0)
) d
where rn = 1
To get "the last kwh recorded (per month)" you need to use row_number() which - per month - will order the rows (descending) and give each one a row number. When that number is 1 you have "the most recent" row for that month, and you won't need group by at all.
You could use group by and month
select datename(month, date), sum(kwh)
from tbl_totalMonth
where date = (select max(date) from tbl_totalMonth )
group by datename(month, date)
if you need only the last row for each month then youn should use
select datename(month, date), khw
from tbl_totalMonth a
inner join (
select max(date) as max_date
from tbl_totalMonth
group by month(date)) t on t.max_date = a.date

How can I count users in a month that were not present in the month before?

I am trying to count unique users on a monthly basis that were not present in the previous month. So if a user has a record for January and then another one for February, then I would only count January for that user.
user_id time
a1 1/2/17
a1 2/10/17
a2 2/18/17
a4 2/5/17
a5 3/25/17
My results should look like this
Month User Count
January 1
February 2
March 1
I'm not really familiar with BigQuery, but here's how I would solve the problem using TSQL. I imagine that you'd be able to use similar logic in BigQuery.
1). Order the data by user_id first, and then time. In TSQL, you can accomplish this with the following and store it in a common table expression, which you will query in the step after this.
;WITH cte AS
(
select ROW_NUMBER() OVER (PARTITION BY [user_id] ORDER BY [time]) AS rn,*
from dbo.employees
)
2). Next query for only the rows with rn = 1 (the first occurrence for a particular user) and group by the month.
select DATENAME(month, [time]) AS [Month], count(*) AS user_count
from cte
where rn = 1
group by DATENAME(month, [time])
This is assuming that 2017 is the only year you're dealing with. If you're dealing with more than one year, you probably want step #2 to look something like this:
select year([time]) as [year], DATENAME(month, [time]) AS [month],
count(*) AS user_count
from cte
where rn = 1
group by year([time]), DATENAME(month, [time])
First aggregate by the user id and the month. Then use lag() to see if the user was present in the previous month:
with du as (
select date_trunc(time, month) as yyyymm, user_id
from t
group by date_trunc(time, month)
)
select yyyymm, count(*)
from (select du.*,
lag(yyyymm) over (partition by user_id order by yyyymm) as prev_yyyymm
from du
) du
where prev_yyyymm is not null or
prev_yyyymm < date_add(yyyymm, interval 1 month)
group by yyyymm;
Note: This uses the date functions, but similar functions exist for timestamp.
The way I understood question is - to exclude user to be counted in given month only if same user presented in previous month. But if same user present in few months before given, but not in previous - user should be counted.
If this is correct - Try below for BigQuery Standard SQL
#standardSQL
SELECT Year, Month, COUNT(DISTINCT user_id) AS User_Count
FROM (
SELECT *,
DATE_DIFF(time, LAG(time) OVER(PARTITION BY user_id ORDER BY time), MONTH) AS flag
FROM (
SELECT
user_id,
DATE_TRUNC(PARSE_DATE('%x', time), MONTH) AS time,
EXTRACT(YEAR FROM PARSE_DATE('%x', time)) AS Year,
FORMAT_DATE('%B', PARSE_DATE('%x', time)) AS Month
FROM yourTable
GROUP BY 1, 2, 3, 4
)
)
WHERE IFNULL(flag, 0) <> 1
GROUP BY Year, Month, time
ORDER BY time
you can test / play with above using below example with dummy data from your question
#standardSQL
WITH yourTable AS (
SELECT 'a1' AS user_id, '1/2/17' AS time UNION ALL
SELECT 'a1', '2/10/17' UNION ALL
SELECT 'a2', '2/18/17' UNION ALL
SELECT 'a4', '2/5/17' UNION ALL
SELECT 'a5', '3/25/17'
)
SELECT Year, Month, COUNT(DISTINCT user_id) AS User_Count
FROM (
SELECT *,
DATE_DIFF(time, LAG(time) OVER(PARTITION BY user_id ORDER BY time), MONTH) AS flag
FROM (
SELECT
user_id,
DATE_TRUNC(PARSE_DATE('%x', time), MONTH) AS time,
EXTRACT(YEAR FROM PARSE_DATE('%x', time)) AS Year,
FORMAT_DATE('%B', PARSE_DATE('%x', time)) AS Month
FROM yourTable
GROUP BY 1, 2, 3, 4
)
)
WHERE IFNULL(flag, 0) <> 1
GROUP BY Year, Month, time
ORDER BY time
The output is
Year Month User_Count
2017 January 1
2017 February 2
2017 March 1
Try this query:
SELECT
t1.d,
count(DISTINCT t1.user_id)
FROM
(
SELECT
EXTRACT(MONTH FROM time) AS d,
--EXTRACT(MONTH FROM time)-1 AS d2,
user_id
FROM nbitra.tmp
) t1
LEFT JOIN
(
SELECT
EXTRACT(MONTH FROM time) AS d,
user_id
FROM nbitra.tmp
) t2
ON t1.d = t2.d+1
WHERE
(
t1.user_id <> t2.user_id --User is in previous month
OR t2.user_id IS NULL --To handle january, since there is no previous month to compare to
)
GROUP BY t1.d;