SQL server pivot count function works incorrectly if used with inner query - sql

I think there is a bug in my SQL-server pivot function because I can't find any other explanation.
I am running a simple pivot and using the exact syntax shown in MSDN.
but the pivot answer shows the exact same number for all columns with the value equivalence of the count of all weeks!!!
(left is query result and right is what I want)
here is my query:
SELECT
*
FROM (SELECT
r.cutomer_id
,c.[Week]
,r.id
FROM r
JOIN c
ON r.Create_date = c.Date
WHERE Is_ride = 1
AND ((Create_date_int BETWEEN 20190302 AND 20190319)
OR (Create_date_int BETWEEN 20190406 AND 20190426))) p
PIVOT
(
COUNT(id)
FOR [Week] IN
([9], [10], [11], [12], [14], [15], [16], [17])
) AS pvt
here are some test data representing the "p" output (i just changed id numbers, weeks are the same number you get from the query)
DROP TABLE IF EXISTS #t
CREATE TABLE #t (
customer_id INT
,WEEK INT
,id INT
)
INSERT #t (customer_id, WEEK, id)
VALUES (12032, 10, 8607)
, (43551, 10, 8721051)
, (55025, 10, 81200)
, (198874, 10, 861362)
, (99675, 10, 867081)
, (19387, 10, 863656)
, (12526, 10, 8603706)
, (19503, 10, 860924)
, (37597, 10, 860909)
, (136019, 10, 8610674);
so I thought there is something wrong with my query but then I changed the query to this:
SELECT
r.cutomer_id
,c.[Week]
,r.id INTO #t
FROM r
JOIN c
ON r.Create_date = c.Date
WHERE Is_ride = 1
AND ((Create_date_int BETWEEN 20190302 AND 20190319)
OR (Create_date_int BETWEEN 20190406 AND 20190426))
SELECT
*
FROM #t
PIVOT
(
COUNT(id)
FOR [Week] IN
([9], [10], [11], [12], [14], [15], [16], [17])
) AS pvt
and it works just fine!
also if I remove the r.id column from select and change it to count(week) it works fine!!!
also if I only change my where to
WHERE Is_ride = 1
AND ((Create_date_int BETWEEN 20190302 AND 20190319)
OR (Create_date_int BETWEEN 20190406 AND 20190426))
and passenger_id in (43551,12032,136019)
) p
it works fine!!!
can someone give me an explanation?

I have no idea why you're getting the result shown on your images. There's possibly some information missing.
Here's an MVCE with code commented to create a larger set of test data.
CREATE TABLE r(
id int identity,
customer_id int,
create_date date,
Create_date_int AS CONVERT( int, CONVERT( char(8), create_date, 112)),
is_ride bit)
INSERT INTO r(customer_id, create_date, is_ride)
SELECT customer_id, '20190307', 1
FROM (VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10))x(customer_id)
--SELECT TOP 100000
-- ABS(CHECKSUM(NEWID())) % 10,
-- DATEADD( dd, ABS(CHECKSUM(NEWID())) % 120, '2019'),
-- 1
--FROM sys.all_columns a, sys.all_columns b;
CREATE TABLE c(
[Date] date,
[Week] AS DATEPART( wk, [Date])
)
INSERT INTO c([Date])
SELECT DATEADD( dd, ROW_NUMBER() OVER( ORDER BY (SELECT NULL))-1, '2019')
FROM sys.all_columns;
From here, I could run your original query that gives the expected results, just 1's in week 10.
I can also run an alternative query using cross tabs that is my preferred method to pivot data.
SELECT
r.customer_id
,COUNT( CASE WHEN c.[Week] = 9 THEN r.id END) AS wk9
,COUNT( CASE WHEN c.[Week] = 10 THEN r.id END) AS wk10
,COUNT( CASE WHEN c.[Week] = 11 THEN r.id END) AS wk11
,COUNT( CASE WHEN c.[Week] = 12 THEN r.id END) AS wk12
,COUNT( CASE WHEN c.[Week] = 14 THEN r.id END) AS wk14
,COUNT( CASE WHEN c.[Week] = 15 THEN r.id END) AS wk15
,COUNT( CASE WHEN c.[Week] = 16 THEN r.id END) AS wk16
,COUNT( CASE WHEN c.[Week] = 17 THEN r.id END) AS wk17
FROM r
JOIN c ON r.Create_date = c.Date
WHERE Is_ride = 1
AND (Create_date_int BETWEEN 20190302 AND 20190319
OR Create_date_int BETWEEN 20190406 AND 20190426)
GROUP BY r.customer_id
ORDER BY r.customer_id;
That also gives the correct information.
Both of these methods might benefit from preaggregation to improve their performance.
WITH ctePreAggregate AS(
SELECT
r.customer_id
,c.[Week]
,COUNT(*) AS weeklycount
FROM r
JOIN c
ON r.Create_date = c.Date
WHERE Is_ride = 1
AND (Create_date_int BETWEEN 20190302 AND 20190319
OR Create_date_int BETWEEN 20190406 AND 20190426)
GROUP BY r.customer_id
,c.[Week]
)
SELECT
*
FROM ctePreAggregate
PIVOT
(
SUM(weeklycount)
FOR [Week] IN
([9], [10], [11], [12], [14], [15], [16], [17])
) AS pvt
ORDER BY customer_id;
WITH ctePreAggregate AS(
SELECT
r.customer_id
,c.[Week]
,COUNT(*) AS weeklycount
FROM r
JOIN c
ON r.Create_date = c.Date
WHERE Is_ride = 1
AND (Create_date_int BETWEEN 20190302 AND 20190319
OR Create_date_int BETWEEN 20190406 AND 20190426)
GROUP BY r.customer_id
,c.[Week]
)
SELECT
customer_id
,SUM( CASE WHEN [Week] = 9 THEN weeklycount ELSE 0 END) AS wk9
,SUM( CASE WHEN [Week] = 10 THEN weeklycount ELSE 0 END) AS wk10
,SUM( CASE WHEN [Week] = 11 THEN weeklycount ELSE 0 END) AS wk11
,SUM( CASE WHEN [Week] = 12 THEN weeklycount ELSE 0 END) AS wk12
,SUM( CASE WHEN [Week] = 14 THEN weeklycount ELSE 0 END) AS wk14
,SUM( CASE WHEN [Week] = 15 THEN weeklycount ELSE 0 END) AS wk15
,SUM( CASE WHEN [Week] = 16 THEN weeklycount ELSE 0 END) AS wk16
,SUM( CASE WHEN [Week] = 17 THEN weeklycount ELSE 0 END) AS wk17
FROM ctePreAggregate
GROUP BY customer_id
ORDER BY customer_id;
The only issue is that you would need to add a series of ISNULL() in the column list to show NULLs instead of zeros for the PIVOT query.

Related

CTE function with insert statement

I have 2 queries and I need to combine them into one query with an insert statement.
This is my first query that already has an insert statement:
with q as (
select s.department
,s.months
,s.years
,count(case when s.sum_lost_time >='10:00:00' then NAME end) as RTOTALLOSTTIME
,count(case when s.sum_ot >='20' then NAME end) as ROT
from (select MONTH(STATUSIN) as [months]
,YEAR(STATUSIN) as [years]
,NIP
,NAME
,DEPARTMENT
,convert(varchar,dateadd(second,sum(datediff(second,'00:00:00',cast(TOTALLT as time))),0),108) as sum_lost_time
,SUM(CAST(OT AS FLOAT)) as sum_ot
from SUMMARYDATA b
group by MONTH(STATUSIN)
,YEAR(STATUSIN)
,NIP
,NAME
,DEPARTMENT
)s
group by s.department
,s.months
,s.years
)
INSERT INTO REPORTDATA(DEPARTMENT,MONTHS,YEARS,RTOTALLOSTTIME,ROT)
SELECT DEPARTMENT,MONTHS,YEARS,RTOTALLOSTTIME,ROT
FROM q
This is the result from first query in table REPORTDATA:
And this is my second query.
WITH cte AS
(
SELECT DISTINCT [NAME], DEPARTMENT, MONTH(STATUSIN) [MONTH], YEAR(STATUSIN) [YEAR],
SUM(CASE WHEN LATECOME = '00:00:00' THEN 0 ELSE 1 END) OVER(PARTITION BY [NAME], DEPARTMENT, MONTH(STATUSIN), YEAR(STATUSIN)) Total
,SUM(CASE WHEN EARLYLEAVE = '00:00:00' THEN 0 ELSE 1 END) OVER(PARTITION BY [NAME], DEPARTMENT, MONTH(STATUSIN), YEAR(STATUSIN)) TotalEarlyLeave
FROM SUMMARYDATA
)
SELECT SUM(CASE WHEN TOTAL > 2 THEN 1 ELSE 0 END) LATECOME,
SUM(CASE WHEN TotalEarlyLeave > 1 THEN 1 ELSE 0 END) EARLYLEAVE
FROM cte
GROUP BY DEPARTMENT, [MONTH], [YEAR]
And this is the result from second query:
I want to place it into my first query but I don't know how to combine it into one in insert statement. Can anyone solve my problems?
This is the sample to my first query: Count summary records per month with conditional SQL
and this is the sample to second query: Count records per month with condition in SQL Server
It's easy if you concatenate your queries as multiple CTEs, and finally JOIN them.
Like this :
;
with cte1 as (
select s.department
,s.months
,s.years
,count(case when s.sum_lost_time >='10:00:00' then NAME end) as RTOTALLOSTTIME
,count(case when s.sum_ot >='20' then NAME end) as ROT
from (select MONTH(STATUSIN) as [months]
,YEAR(STATUSIN) as [years]
,NIP
,NAME
,DEPARTMENT
,convert(varchar,dateadd(second,sum(datediff(second,'00:00:00',cast(TOTALLT as time))),0),108) as sum_lost_time
,SUM(CAST(OT AS FLOAT)) as sum_ot
from SUMMARYDATA b
group by MONTH(STATUSIN)
,YEAR(STATUSIN)
,NIP
,NAME
,DEPARTMENT
)s
group by s.department
,s.months
,s.years
),
cte2 as (
SELECT DISTINCT [NAME], DEPARTMENT, MONTH(STATUSIN) [MONTH], YEAR(STATUSIN) [YEAR],
SUM(CASE WHEN LATECOME = '00:00:00' THEN 0 ELSE 1 END) OVER(PARTITION BY [NAME], DEPARTMENT, MONTH(STATUSIN), YEAR(STATUSIN)) Total
,SUM(CASE WHEN EARLYLEAVE = '00:00:00' THEN 0 ELSE 1 END) OVER(PARTITION BY [NAME], DEPARTMENT, MONTH(STATUSIN), YEAR(STATUSIN)) TotalEarlyLeave
FROM SUMMARYDATA
),
cte3 as (
SELECT DEPARTMENT, [MONTH], [YEAR], SUM(CASE WHEN TOTAL > 2 THEN 1 ELSE 0 END) LATECOME,
SUM(CASE WHEN TotalEarlyLeave > 1 THEN 1 ELSE 0 END) EARLYLEAVE
FROM cte2
GROUP BY DEPARTMENT, [MONTH], [YEAR]
)
INSERT INTO REPORTDATA (DEPARTMENT, MONTHS, YEARS, RTOTALLOSTTIME, ROT, RLATECOME, REARLYLEAVE)
SELECT cte1.DEPARTMENT, cte1.MONTHS, cte1.YEARS, cte1.RTOTALLOSTTIME, cte1.ROT,
cte3.LATECOME, cte3.EARLYLEAVE
FROM cte1
LEFT JOIN cte3 ON cte3.DEPARTMENT = cte1.DEPARTMENT and cte3.[MONTH] = cte1.[MONTH] and cte3.[YEAR] = cte1.[YEAR]

How to get multiple columns in Crosstab

I would like a cross table from the following table.
The cross table should look like this
A pivot table does not seem to solve the problem, because only one column can be used at a time. But in our case we are dealing with 4 different columns. (payment, month, year and free of charge)
I solved the problem by splitting these 4 columns into four different pivot tables, using temporary tables and finally reassembling the obtained data. But this is very complicated, long and confusing, in short not very nice...
The years and months should be shown in ascending form, exactly as you can see in the cross table above.
I have been looking for a solution for quite a while but I can't find the same problem anywhere.
If someone would give me a short, elegant solution I would be very grateful.
Under http://www.sqlfiddle.com/#!18/7216f/2 you can see the problem definition.
Thank you!
You can rank records by date in a subquery with row_number(), and then pivot with conditional aggregation:
select
ClientId,
max(case when rn = 1 then Payment end) Payment1,
max(case when rn = 2 then Payment end) Payment2,
max(case when rn = 3 then Payment end) Payment3,
max(case when rn = 1 then [Month] end) Month1,
max(case when rn = 2 then [Month] end) Month2,
max(case when rn = 3 then [Month] end) Month3,
max(case when rn = 1 then [Year] end) Year1,
max(case when rn = 2 then [Year] end) Year2,
max(case when rn = 3 then [Year] end) Year3,
max(case when rn = 1 then FreeOfCharge end) FreeOfCharge1,
max(case when rn = 2 then FreeOfCharge end) FreeOfCharge2,
max(case when rn = 3 then FreeOfCharge end) FreeOfCharge3
from (
select
t.*,
row_number() over(partition by ClientId order by [Year], [Month]) rn
from mytable t
) t
group by ClientId
You can join the table with itself a few times, as in:
with p as (
select
*, row_number() over(partition by clientid order by year, month) as n
from Payment
)
select
p1.clientid,
p1.payment, p2.payment, p3.payment,
p1.month, p2.month, p3.month,
p1.year, p2.year, p3.year,
p1.freeofcharge, p2.freeofcharge, p3.freeofcharge
from p p1
left join p p2 on p2.clientid = p1.clientid and p2.n = 2
left join p p3 on p3.clientid = p1.clientid and p3.n = 3
where p1.n = 1
See Fiddle.

Get Sum of two columns having multiple pivot

I have created SQL query which returns the total number of days in a month a person was present,now I want to add the total column and the 3 type of leaves column i.e. [Casual Leave], [Paid Leave], [Complimentary Leave].
In short I want to add the total column with [Casual Leave], [Paid Leave]and [Complimentary Leave].
My query is as below:
;WITH ToPivot
AS ( SELECT
time_tracker.date,
Users.FirstName + ' ' + Users.LastName AS username,
(CASE
WHEN
(
(datepart(hour, chk_in)) is null
OR (datepart(hour, chk_out)) is null
)
then
0
WHEN
(
(datepart(hour, chk_in)) >= 12
OR (datepart(hour, chk_out)) < 16
AND ((datepart(hour, chk_out)) - (datepart(hour, chk_in))) < 6
)
THEN
0.5
ELSE
1
END) AS late, TypeOfLeaves.leave_type, Userleavetyp.no_of_days
FROM Users
INNER JOIN time_tracker ON Users.ID = time_tracker.fk_userid
INNER JOIN Userleavetyp ON Users.ID = Userleavetyp.fk_user
INNER JOIN TypeOfLeaves ON Userleavetyp.fk_tol = TypeOfLeaves.ID
WHERE (Users.FK_Status = 1)
),
LateTotals AS (
SELECT T.username, Total = SUM(late),T.leave_type,T.no_of_days
FROM ToPivot AS T
WHERE T.date BETWEEN '2018-07-01' AND '2018-07-31' GROUP BY T.username,T.leave_type,T.no_of_days
)
SELECT distinct(pv2.username) as original ,PV2.*,L.Total
FROM ToPivot AS P
PIVOT (SUM(late) FOR date IN ("2018-07-01", "2018-07-02", "2018-07-03", "2018-07-04", "2018-07-05", "2018-07-06", "2018-07-07", "2018-07-08", "2018-07-09", "2018-07-10", "2018-07-11", "2018-07-12", "2018-07-13", "2018-07-14", "2018-07-15", "2018-07-16", "2018-07-17", "2018-07-18", "2018-07-19", "2018-07-20", "2018-07-21", "2018-07-22", "2018-07-23", "2018-07-24", "2018-07-25", "2018-07-26", "2018-07-27", "2018-07-28", "2018-07-29", "2018-07-30", "2018-07-31")) AS pv1
PIVOT (SUM(no_of_days) FOR leave_type IN ([Casual Leave], [Paid Leave], [Complimentary Leave])) AS pv2
LEFT JOIN LateTotals AS L ON L.username = pv2.username
The output for this code is as below:
I believe you just need to change your select statement as follows.
From this:
SELECT distinct(pv2.username) as original ,PV2.*,L.Total
To this:
SELECT distinct(pv2.username) as original ,PV2.*,L.Total,
CASE WHEN PV2.[Casual Leave] IS NULL THEN 0 ELSE PV2.[Casual Leave] +
CASE WHEN PV2.[Paid Leave] IS NULL THEN 0 ELSE PV2.[Paid Leave] +
CASE WHEN PV2.[Complimentary Leave] IS NULL THEN 0 ELSE PV2.[Complimentary Leave]
AS [Total Leave]

SemanticException Failed to breakup Windowing invocations into Groups. At least 1 group must only depend on input columns

The below query is working fine in Oracle but it is not working in hive.
SELECT Q.tm_mo_id,
'1380' AS mrc_cd,
NVL (R.itm_profit_ctr_cd, '99') AS profit_center_cd,
MAX(CASE R.itm_profit_ctr_cd
WHEN NULL THEN 'UNASSIGN PROFIT CNTR'
ELSE R.itm_profit_ctr_ds
END) profit_center_desc,
SUM(Q.bp_grs_quota_am) AS mth_bp_plan_gts_am_usd,
SUM(Q.grs_quota_am) AS mth_ju_plan_gts_am_usd
FROM v_l_0002_gb_gds_us_quota_v_1 Q
LEFT JOIN
(SELECT * FROM
(SELECT ph_dtl_id,
itm_profit_ctr_cd,
MIN (itm_profit_ctr_ds) AS itm_profit_ctr_ds,
ROW_NUMBER () OVER (
PARTITION BY ph_dtl_id
ORDER BY COUNT(CASE profit_ctr_cd
WHEN 'JNJDUMMY' THEN NULL
WHEN '99' THEN NULL
ELSE profit_ctr_cd
END) DESC,
itm_profit_ctr_cd ASC) rn
FROM v_l_0002_gb_gds_us_sku_to_profit_center_lookup_v_1
GROUP BY ph_dtl_id,
itm_profit_ctr_cd) E
WHERE rn = 1 ) R
ON (Q.ph_dtl_id = R.ph_dtl_id)
WHERE SUBSTR (Q.tm_mo_id, 1, 4) = '2016'
GROUP BY Q.tm_mo_id,
NVL(R.itm_profit_ctr_cd, '99')

issues in Case and when Statement

I have a question about Case and when statements. I have a list of two transtypeid like 10 and 12.
I tried to take sale1 amount like if the transtypeid 11 has a sum amount !=0 means, I need to minus the amount with sum amount of transtypeid 10
I tried a lot but nothing worked.
I have these queries I tried
select
CT.CustomerCode, C.CustomerName,
sale1 = case
when (ct.TransTypeID = 11) and (sum(ct.OVAmount - ct.OVDiscount) != 0)
then sum(ct.OVAmount - ct.OVDiscount) - sum(ct.OVAmount - ct.OVDiscount)
else 0
end,
C.CountryCode, C.CityCode
from
CustomerTransactions CT
inner join
Customers C ON CT.CustomerCode = C.CustomerCode
where
ct.TransDate >= '2015-01-01'
and ct.TransDate <= '2015-12-31'
and ct.TransTypeID in (10, 11)
group by
ct.CustomerCode, c.CustomerName, c.CountryCode, c.CityCode
Try calculate sale1 with this SQL code:
CASE WHEN
SUM(CASE WHEN ct.TransTypeID = 11
THEN ct.OVAmount - ct.OVDiscount
ELSE 0 END) != 0
THEN
SUM(CASE WHEN ct.TransTypeID = 11
THEN ct.OVAmount - ct.OVDiscount
ELSE O END)
- SUM(CASE WHEN ct.TransTypeID = 10
THEN ct.OVAmount - ct.OVDiscount
ELSE 0 END)
ELSE 0 END
I'm not sure that I understand what you need. But I give it a try since you are in hurry.
Something like this, maybe?
select
CT1.CustomerCode, C.CustomerName,
sale1 =
case
when ( sum(ct1.OVAmount - ct1.OVDiscount) != 0 )
then sum( ct1.OVAmount - ct1.OVDiscount ) - sum( ct2.OVAmount - ct2.OVDiscount )
else
0
end,
C.CountryCode, C.CityCode
from
Customers c
Inner join CustomerTransactions CT1 ON ( CT1.CustomerCode = C.CustomerCode ) And ( ct1.TransTypeID = 11 )
Inner join CustomerTransactions CT2 ON ( CT2.CustomerCode = C.CustomerCode ) And ( ct2.TransTypeID = 10 )
where
( ct1.TransDate >= '2015-01-01' )
and ( ct1.TransDate < '2016-01-01' )
and ( ct2.TransDate >= '2015-01-01' )
and ( ct2.TransDate < '2016-01-01' )
group by
ct1.CustomerCode,c.CustomerName,c.CountryCode,c.CityCode
Using CTEs:
with
cte10 ( CustomerId, amount ) as (
select
customerId, sum( amount ) as amount
from
CustomerTransaction
where
( Type = 1 )
group by CustomerId
),
cte11 ( CustomerId, amount ) as (
select
customerId, sum( amount ) as amount
from
CustomerTransaction
where
( Type = 2 )
group by CustomerId
)
select
c.Id, c.Description,
sale1 =
case
when ( cte10.amount <> 0 )
then cte10.amount - cte11.amount
else
0
end
from
Customer c
Inner join cte10 on ( cte10.CustomerId = C.id )
inner join cte11 on ( cte11.Customerid = C.id )