Let's say I have data like this
CustomerID
Trans_date
C001
01-sep-22
C001
04-sep-22
C001
14-sep-22
C002
03-sep-22
C002
01-sep-22
C002
18-sep-22
C002
20-sep-22
C003
02-sep-22
C003
28-sep-22
C004
08-sep-22
C004
18-sep-22
But I'm unable to find the first and second transaction based on Trans_date.
I wish for the result to look like this:
CustomerID
Trans_week
first
second
C001
35
35
37
C001
35
35
37
C001
37
35
37
C002
35
35
37
C002
35
35
37
C002
37
35
37
C002
38
35
37
C003
35
35
39
C003
39
35
39
C004
36
36
37
C004
37
36
37
And for the last result will show like this:
CustomerID
first
second
C001
35
37
C002
35
37
C003
35
39
C004 didnt include because i would need who cust id who come first in their 1st week.
You may use ROW_NUMBER() function -inside a subquery- to get the first and second transaction dates for a customer, then use conditional MAX window function on the results of that subquery.
SELECT CustomerID, DATEPART(week,CustTrans) AS Trans_week,
DATEPART(week, MAX(CASE rn WHEN 1 THEN CustTrans END) OVER (PARTITION BY CustomerID)) first,
DATEPART(week, MAX(CASE rn WHEN 2 THEN CustTrans END) OVER (PARTITION BY CustomerID)) second
FROM
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY CustTrans) rn
FROM trydata
) T
ORDER BY CustomerID, Trans_week
See a demo on SQL Server.
As you requested in the comments, if you want to select only one row per customer that showing the first and second weeks, use the following query:
SELECT CustomerID,
DATEPART(week, MAX(CASE rn WHEN 1 THEN CustTrans END)) first,
DATEPART(week, MAX(CASE rn WHEN 2 THEN CustTrans END)) second
FROM
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY CustTrans) rn
FROM trydata
) T
WHERE rn <= 2
GROUP BY CustomerID
ORDER BY CustomerID
See a demo.
with cte (RN,CustomerID, FirstWeek,SecondWeek ) as
( SELECT ROW_NUMBER() over(partition by CustomerID ORDER BY CustomerID ) RN, CustomerID,FirstWeek, isnull((select TOP 1 (DATEPART(week,CustTrans))
from trydata c
where c.CustomerID = SRC.CustomerID AND DATEPART(week,C.CustTrans) > SRC.FirstWeek
ORDER BY DATEPART(week,C.CustTrans) ),'0') AS SecondWeek
FROM (
SELECT CustomerID,DATEPART(week,CustTrans) TransWeek,
(select MIN(DATEPART(week,CustTrans)) from trydata c where c.CustomerID = trydata.CustomerID) AS FirstWeek
FROM trydata
) SRC )
select CustomerID,FirstWeek,SecondWeek from cte where RN = 1
Output:
Example 2 :
WITH CTE (CustomerID,FIrstWeek,RN) AS (
SELECT CustomerID,MIN(DATEPART(week,CustTrans)) TransWeek,
ROW_NUMBER() over(partition by CustomerID ORDER BY DATEPART(week,CustTrans) asc ) FROM TryData
GROUP BY CustomerID,DATEPART(week,CustTrans)
)
SELECT CTE.CustomerID, CTE.FIrstWeek,
(select TOP 1 (DATEPART(week,c.CustTrans))
from trydata c
where c.CustomerID = CTE.CustomerID AND DATEPART(week,C.CustTrans) > CTE.FIrstWeek
) SecondWeek
FROM CTE
WHERE RN = 1
FIddle Demo
Edit: This can be done on easier way and less complex.
Related
I'm trying to joint multiple data obtained trough a query like this:
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE
from
MY_TABLE
where
FIELD = 'DESIRED_VALUE'
group by 1;
That results in data such as:
YEAR DESIRED_VALUE1
2022-09 52
2022-10 117
2022-11 95
2023-01 73
YEAR_MONTH DESIRED_VALUE2
2022-11 35
2022-12 30
2023-01 29
I want to end up with a table such as:
YEAR_MONTH DESIRED_VALUE1 DESIRED_VALUE2
2022-09 52 NULL
2022-10 117 NULL
2022-11 95 35
2022-12 53 30
2023-01 73 29
I don't have previous knowledge of which dates will be returned by each query, if that makes sense, so I can't infer if it's a left join, for instance. So I'm doing a full
with result_1 as
(
query1
),
result_2 as
(
query2
)
select *
from
result_1
full outer join result_2
on
result_1.YEAR_MONTH = result_2.YEAR_MONTH
Which gives me this:
YEAR_MONTH DESIRED_VALUE2 YEAR_MONTH_2 DESIRED_VALUE1
2023-01 29 2023-01 73
NULL NULL 2022-10 117
2022-11 35 2022-11 95
NULL NULL 2022-09 52
2022-12 30 NULL NULL
But I want do display a single YEAR_MONTH column, that shows all existent values:
YEAR_MONTH DESIRED_VALUE2 DESIRED_VALUE1
2023-01 29 73
2022-10 NULL 117
2022-11 35 95
2022-09 NULL 52
2022-12 30 NULL
To resolve that, I use:
COALESCE(DESIRED_VALUE1.YEAR_MONTH, DESIRED_VALUE2.YEAR_MONTH) as YEAR_MONTH
However, if I add more data:
with result_1 as
(
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE1
from
MY_TABLE
where
STATUS = 'DESIRED_VALUE1'
group by 1
),
result_2 as
(
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE2
from
MY_TABLE
where
STATUS = 'DESIRED_VALUE2'
group by 1
),
result3 as
(
select
TO_VARCHAR(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
COUNT(1) as DESIRED_VALUE3
from
MY_TABLE
where
STATUS = 'DESIRED_VALUE3'
and CONDITION = 'CONDITION'
group by 1
)
select
COALESCE(DESIRED_VALUE1.YEAR_MONTH, DESIRED_VALUE2.YEAR_MONTH, DESIRED_VALUE3.YEAR_MONTH) as YEAR_MONTH,
DESIRED_VALUE1,
DESIRED_VALUE2,
DESIRED_VALUE3
from
result_1
full outer join
result_2
on
result_1.YEAR_MONTH = result_2.YEAR_MONTH
full outer join
result_3
on
result_2.YEAR_MONTH = result_3.YEAR_MONTH
order by YEAR_MONTH desc;
I start getting repeated YEAR_MONTH
YEAR_MONTH DESIRED_VALUE1 DESIRED_VALUE2 DESIRED_VALUE3
2023-01 73 29 83
2022-12 53 30 57
2022-11 95 35 71
2022-10 NULL 39 NULL
2022-10 117 NULL NULL
2022-09 18 NULL NULL
2022-09 52 NULL NULL
I'm not sure what's the best way to approach this problem.
If correctly understand what you're trying to accomplish, wouldn't it be a whole lot simpler (and a lot more performant) to just say
select to_varchar(CREATE_TIME, 'yyyy-MM') as YEAR_MONTH,
sum( case STATUS when 'DESIRED_VALUE1' then 1 else 0 end ) as DESIRED_VALUE1,
sum( case STATUS when 'DESIRED_VALUE2' then 1 else 0 end ) as DESIRED_VALUE2,
sum( case STATUS when 'DESIRED_VALUE3' then 1 else 0 end ) as DESIRED_VALUE3
from MY_TABLE
where STATUS in ('DESIRED_VALUE1, DESIRED_VALUE2, DESIRED_VALUE3 )
group by 1
order by 1
try this
;with cte(date, value1, value2) as(
select date, value1, null from query1
union
select date, null, value2 from query2
)
select date
, sum(value1)
, sum(value2)
from cte
group by date
For this query :
WITH CTE (customerID,FirstWeek,RN) AS
(
SELECT
customerID, MIN(DATEPART(week, tp_date)) TransWeek,
ROW_NUMBER() OVER (PARTITION BY customerID ORDER BY DATEPART(week, tp_date) ASC)
FROM
all_table
GROUP BY
customerID, DATEPART(week, tp_date)
)
SELECT
CTE.customerID, CTE.FirstWeek,
(SELECT TOP 1 (DATEPART(week, c.tp_date))
FROM all_table c
WHERE c.customerID = CTE.customerID
AND DATEPART(week, C.tp_date) > CTE.FirstWeek) SecondWeek
FROM
CTE
WHERE
RN = 1
I get this result :
CustomerID
firstweek
secondweek
C001
35
37
C002
35
37
C003
35
39
C003
36
37
But what to do if I want the result to show only the first week in 35 and second week 37? The result should look like this:
CustomerID
firstweek
secondweek
C001
35
37
C002
35
37
Simply, run a second CTE to apply your filter in final query. Also, you may not need the ROW_NUMBER window function:
WITH agg AS (
SELECT
customerID,
MIN(DATEPART(week, tp_date)) AS FirstWeek
FROM all_table
GROUP BY
customerID
), main AS (
SELECT
customerID,
FirstWeek,
(
SELECT TOP 1 DATEPART(week, c.tp_date)
FROM all_table c
WHERE c.customerID = agg.customerID
AND DATEPART(week, c.tp_date) > agg.FirstWeek
) AS SecondWeek
FROM agg
)
SELECT *
FROM main
WHERE FirstWeek = 35
AND SecondWeek = 37
base_table
eom account_id closings checkouts
2018-11-01 1 21 147
2018-12-01 1 20 214
calendar_table
month account_id
2020-11-01 1
2014-04-01 1
Based on two tables, above, I would like to create a month-by-month cumulative closings and checkouts.
The calendar_table contains the months the account id is active. Thus, it is used as the main table (in the from clause).
with
base_table as (
select eom, account_id, closings, checkouts
from base_table bt
where account_id in (3,30,122,152,161,179)
)
,calendar_table as (
select ct.month, c.external_id as account_id
from calendar_table ct
left join customers c
on c.id = ct.organization_id
where account_id in (3,30,122,152,161,179)
)
,cumulative_table as (
select ct."month"
,list.account_id
,coalesce(bt.closings,0) as closings
,coalesce(sum(closings) OVER (PARTITION BY list.account_id
ORDER BY ct."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0)
as cum_closings
,coalesce(bt.checkouts,0) as checkouts
,coalesce(sum(checkouts) OVER (PARTITION BY list.account_id
ORDER BY ct."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0)
AS cum_checkouts
from calendar_table ct
cross join (select distinct account_id from base_table) list
left join base_table bt
on bt.account_id = list.account_id and bt.eom = ct.month
)
select *
from cumulative_table
The query above returns a cumulative table that contains duplications, probably because of the cross join.
month account_id closings cum_closings checkouts cum_checkouts
01/11/17 1 20 20 282 282
01/11/17 1 20 40 282 564
01/11/17 1 20 60 282 846
01/12/17 1 17 77 346 1192
01/12/17 1 17 94 346 1538
01/12/17 1 17 111 346 1884
I expect the query to return one month per account id.
month account_id closings cum_closings checkouts cum_checkouts
01/11/17 1 20 20 282 282
01/12/17 1 17 37 346 628
You can do more simple :
WITH list AS
( select bt.eom, bt.account_id, bt.closings, bt.checkouts
, ct.month, ct.organization_id
from base_table bt
inner join calendar_table ct
on ct.account_id = bt.account_id
where bt.account_id in (3,30,122,152,161,179)
)
select l.month, c.external_id as account_id
, coalesce(l.closings,0) as closings
, coalesce( sum(l.closings) OVER (PARTITION BY l.account_id
ORDER BY l."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
, 0
) as cum_closings
, coalesce(l.checkouts,0) as checkouts
, coalesce( sum(l.checkouts) OVER (PARTITION BY l.account_id
ORDER BY l."month"
rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
, 0
) AS cum_checkouts
from list as l
left join customers c
on c.id = l.organization_id ;
The table below represents user logins (i.e LogAction_INT = 1 is login, LogAction_INT = 0 is logout). What is the best approach to sum the time elapsed between a user's login and logout (session). Ideally I need a total of time spent per user. Everything I can think of includes while loops and it's too complex.
ID User_ID LogDate_DT LogAction_INT
1940 18 2019-04-01 13:15:06.027 1
1941 18 2019-04-01 13:47:39.010 0
1942 18 2019-04-01 15:48:46.453 1
1943 18 2019-04-01 15:54:47.520 0
1944 68 2019-04-02 15:09:20.460 1
1945 68 2019-04-02 15:53:11.223 0
1946 86 2019-04-03 12:48:14.340 1
1947 86 2019-04-03 14:49:55.400 0
1948 80 2019-04-04 08:54:48.157 1
1949 86 2019-04-04 15:26:51.917 1
1950 86 2019-04-04 15:27:53.030 0
1951 86 2019-04-04 15:28:00.920 1
1952 86 2019-04-04 15:28:30.243 0
1953 86 2019-04-04 15:28:35.490 1
1954 86 2019-04-04 15:53:41.700 0
1955 68 2019-04-04 15:54:07.720 1
1956 80 2019-04-04 16:15:55.200 0
I expect to have something like:
User TotalSessionTime
---- -----------------
18 04:45
68 10:02
80 06:12
You can enumerate each of the types and then use conditional aggregation or a join:
select user_id, seqnum,
datediff(second, min(LogDate_DT), max(LogDate_DT)) as diff_seconds
from (select t.*,
row_number() over (partition by user_id, LogAction_INT order by id) as seqnum
from t
) t
group by user_id, seqnum;
You can then sum these by user:
select user_id, sum(diff_seconds)
from (select user_id, seqnum,
datediff(second, min(LogDate_DT), max(LogDate_DT)) as diff_seconds
from (select t.*,
row_number() over (partition by user_id, LogAction_INT order by id) as seqnum
from t
) t
group by user_id, seqnum;
) t
group by user_id;
The issue with this type of problem is that the ins and outs don't usually match up quite so cleanly. That makes this a much harder problem.
In supported versions of SQL Server, I would do this using lag().
if it is always in pair, you can use row_number() to generate a running no and then group every 2 rows as 1
; with cte as
(
select *, grp = (row_number() over (partition by User_ID order by ID) - 1) / 2
from your_table
)
cte2 as
(
select User_ID, elapsed = datediff(second, min(LogDate_DT), max(LogDate_DT))
from cte
group by User_ID, grp
)
select User_ID, sum(elapsed)
from cte2
group by User_ID
Hkey | Observation dt| Retriment_dt | Name |Code | Masterkey
---------+------------+------
23 10/8/2018 01/01/3030 Sam XYZ 99
23 10/8/2018 01/01/3030 Sam XYZ 98
23 10/8/2018 01/01/3030 Sam XYZ 97
21 11/8/2018 01/01/3030 JOHN TGI 65
21 11/8/2018 01/01/3030 JOHN TGI 64
21 11/8/2018 01/01/3030 JOHN TGI 63
30 11/8/2018 01/01/3030 Chris MNY 70
Ok, so assume i have this table and my table total count is over a million i want to update the table (Observation dt and retirement dt ) for the duplicate rows - I dont want to update all the observation dates to the same date but i want them to be different by a day. I have manually inputed it below. How can i do it in Sql or SSIS or in any programming language. This is Mssql Db table. I am new to sql and would appreciate any help. Thanks!
Combination of HKey and Observation_dt is the primary key and when i apply the constraint it will throw an error, so i am trying to retire all the duplicate records by changing both retirement_dt and observation_dt. Retirement dt will be todays date and observation_dt can be any date-1 (incrementally for each duplicate date)
What it should look like when the code runs
Hkey | Observation dt| Retriment_dt | Name |Code | Masterkey
---------+------------+------
23 10/8/2018 01/01/3030 Sam XYZ 99
23 10/7/2018 12/17/2018 Sam XYZ 98
23 10/6/2018 12/17/2018 Sam XYZ 97
21 11/8/2018 01/01/3030 JOHN TGI 65
21 11/7/2018 12/17/2018 JOHN TGI 64
21 11/6/2018 12/17/2018 JOHN TGI 63
30 11/8/2018 01/01/3030 Chris MNY 70
You can use the following solution:
IF OBJECT_ID('tempdb..#YourTable') IS NOT NULL
DROP TABLE #YourTable
SELECT
V.Hkey,
[Observation dt] = CONVERT(DATE, V.[Observation dt]),
[Retriment_dt] = CONVERT(DATE, V.[Retriment_dt])
INTO
#YourTable
FROM
(VALUES
(23,'2018-08-10','3030-01-01'),
(23,'2018-08-10','3030-01-01'),
(23,'2018-08-10','3030-01-01'),
(21,'2018-08-10','3030-01-01'),
(21,'2018-08-10','3030-01-01'),
(21,'2018-08-10','3030-01-01'),
(30,'2018-08-10','3030-01-01')) V(Hkey, [Observation dt], [Retriment_dt])
;WITH DuplicateRecords AS
(
SELECT
T.HKey,
T.[Observation dt]
FROM
#YourTable T
GROUP BY
T.HKey,
T.[Observation dt]
HAVING
COUNT(1) > 1
),
RowNumber AS
(
SELECT
T.Hkey,
T.[Observation dt],
T.[Retriment_dt],
RowNumberByHkey = ROW_NUMBER() OVER (PARTITION BY T.Hkey ORDER BY T.[Observation dt], T.[Retriment_dt])
FROM
#YourTable AS T
INNER JOIN DuplicateRecords AS D ON
T.Hkey = D.Hkey AND
T.[Observation dt] = D.[Observation dt]
),
UpdatedValues AS
(
SELECT
R.Hkey,
R.[Observation dt],
R.[Retriment_dt],
NewObservationDT = DATEADD(
DAY,
-1 * (R.RowNumberByHkey - 1),
R.[Observation dt]),
NewRetirementDT = GETDATE(),
R.RowNumberByHkey
FROM
RowNumber AS R
),
RecordsToUpdate AS
(
-- Need a row number to be able to update correctly, since the record is duplicated (need an ID to join)
SELECT
T.Hkey,
T.[Observation dt],
T.[Retriment_dt],
RowNumberByHkey = ROW_NUMBER() OVER (PARTITION BY T.Hkey ORDER BY T.[Observation dt], T.[Retriment_dt])
FROM
#YourTable AS T
)
UPDATE T SET
[Observation dt] = R.NewObservationDT,
[Retriment_dt] = R.NewRetirementDT
FROM
RecordsToUpdate AS T
INNER JOIN UpdatedValues AS R ON
T.HKey = R.HKey AND
T.[Observation dt] = R.[Observation dt] AND
T.RowNumberByHkey = R.RowNumberByHkey
SELECT
*
FROM
#YourTable AS T
ORDER BY
T.Hkey,
T.[Observation dt] DESC
Result:
Hkey Observation dt Retriment_dt
21 2018-08-10 2018-12-18
21 2018-08-09 2018-12-18
21 2018-08-08 2018-12-18
23 2018-08-10 2018-12-18
23 2018-08-09 2018-12-18
23 2018-08-08 2018-12-18
30 2018-08-10 3030-01-01
It was a little tricky because you need to update duplicate records with different values each, so you need to generate some kind of unique ID (I used row number) to be able to match them.
The way to generate different days was to apply a DATEADD with the row number, which was partitioned by HKey. This generates different days with 1 day difference.
My coworker did this in a similar way, But thank you for the replies. I have posted the code used.
SELECT [healthplanentryhistory_avi_hkey]
,[effective_date]
,[expiration_date]
,[healthplanentryhistoryid]
,[hospitalmasterid]
,[plancode]
,[plangeneration]
,[code]
,[pawvalue]
,[quantitycoveredbyplan]
,[healthplanentrymasterid]
,[healthplanentryid]
,[healthplanid]
,[lastupdate]
,[origpawvalue]
,[active_ind]
,[hash_diff]
,[source_sys_id]
,[create_date]
,[update_date]
,cnt
,Rank
INTO ##tmphph
FROM (
SELECT *
,COUNT(*) OVER (PARTITION BY [healthplanentryhistory_avi_hkey]) AS cnt
,RANK() OVER (
PARTITION BY [healthplanentryhistory_avi_hkey] ORDER BY healthplanentryhistoryid DESC
) AS Rank
FROM [atf_healthplanentryhistory_avi]
) AS t
WHERE t.cnt > 1
AND t.rank > 1
ORDER BY healthplanentryhistoryid DESC;
---SELECT * FROM ##tmphph where healthplanentryhistory_avi_hkey = 0x039E7D809F8138B703FC9991E9D8F655
MERGE INTO [atf_healthplanentryhistory_avi] atf
USING ##tmphph TEMP
ON atf.healthplanentryhistory_avi_hkey = TEMP.[healthplanentryhistory_avi_hkey]
AND atf.effective_date = TEMP.effective_date
AND atf.healthplanentryhistoryid = TEMP.healthplanentryhistoryid
AND TEMP.rank > 1
WHEN MATCHED
THEN
UPDATE
SET atf.effective_date = getdate() - TEMP.rank /*This will update the effective_date to efective_date - rank#*/
,expiration_date = getdate() - TEMP.rank
,active_ind = 0;
DROP TABLE ##tmphph
Using Temp table:
Create Table #tbl
(
hkey Int,
Observation Date,
Retriment Date
)
Insert Into #tbl Values
(23,'2018-10-08','3030-01-01'),
(23,'2018-10-08','3030-01-01'),
(23,'2018-10-08','3030-01-01'),
(21,'2018-11-08','3030-01-01'),
(21,'2018-11-08','3030-01-01'),
(21,'2018-11-08','3030-01-01'),
(30,'2018-11-08','3030-01-01')
Select Row_Number() OVER(Order By (Select Null)) As raworder,* Into #temp From #tbl
Select hkey,
DateAdd(Day,-Row_Number() Over (Partition By hkey Order By hkey)+1 , Observation) As newDT,
Case When (Row_Number() Over (Partition By hkey Order By hkey) = 1) Then Retriment Else Convert(Date,GetDate()) End As Retriment
From #temp
Order By raworder
Result:
hkey newDT Retriment
23 2018-10-08 3030-01-01
23 2018-10-07 2018-12-18
23 2018-10-06 2018-12-18
21 2018-11-08 3030-01-01
21 2018-11-07 2018-12-18
21 2018-11-06 2018-12-18
30 2018-11-08 3030-01-01