SQL hierarchy count totals report - sql

I'm creating a report with SQL server 2012 and Report Builder which must show the total number of Risks at a high, medium and low level for each Parent Element.
Each Element contains a number of Risks which are rated at a certain level. I need the total for the Parent Elements. The total will include the number of all the Child Elements and also the number the Element itself may have.
I am using CTEs in my query- the code I have attached isn't working (there are no errors - it's just displaying the incorrect results) and I'm not sure that my logic is correct??
Hopefully someone can help. Thanks in advance.
My table structure is:
ElementTable
ElementTableId(PK) ElementName ElementParentId
RiskTable
RiskId(PK) RiskName RiskRating ElementId(FK)
My query:
WITH cte_Hierarchy(ElementId, ElementName, Generation, ParentElementId)
AS (SELECT ElementId,
NAME,
0,
ParentElementId
FROM Extract.Element AS FirtGeneration
WHERE ParentElementId IS NULL
UNION ALL
SELECT NextGeneration.ElementId,
NextGeneration.NAME,
Parent.Generation + 1,
Parent.ElementId
FROM Extract.Element AS NextGeneration
INNER JOIN cte_Hierarchy AS Parent
ON NextGeneration.ParentElementId = Parent.ElementId),
CTE_HighRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS HighRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'High'
GROUP BY r.ElementId),
CTE_LowRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS LowRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'Low'
GROUP BY r.ElementId),
CTE_MedRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS MedRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'Medium'
GROUP BY r.ElementId)
SELECT rd.ElementId,
rd.ElementName,
rd.ParentElementId,
Generation,
HighRisk,
MedRisk,
LowRisk
FROM cte_Hierarchy rd
LEFT OUTER JOIN CTE_HighRisk h
ON rd.ElementId = h.ElementId
LEFT OUTER JOIN CTE_MedRisk m
ON rd.ElementId = m.ElementId
LEFT OUTER JOIN CTE_LowRisk l
ON rd.ElementId = l.ElementId
WHERE Generation = 1
Edit:
Sample Data
ElementTableId(PK) -- ElementName -- ElementParentId
1 ------------------- Main --------------0
2 --------------------Element1-----------1
3 --------------------Element2 ----------1
4 --------------------SubElement1 -------2
RiskId(PK) RiskName RiskRating ElementId(FK)
a -------- Financial -- High ----- 2
b -------- HR --------- High ----- 3
c -------- Marketing -- Low ------- 2
d -------- Safety -----Medium ----- 4
Sample Output:
Element Name High Medium Low
Main ---------- 2 ---- 1 -------1

Here is your sample tables
SELECT * INTO #TABLE1
FROM
(
SELECT 1 ElementTableId, 'Main' ElementName ,0 ElementParentId
UNION ALL
SELECT 2,'Element1',1
UNION ALL
SELECT 3, 'Element2',1
UNION ALL
SELECT 4, 'SubElement1',2
)TAB
SELECT * INTO #TABLE2
FROM
(
SELECT 'a' RiskId, 'Fincancial' RiskName,'High' RiskRating ,2 ElementId
UNION ALL
SELECT 'b','HR','High',3
UNION ALL
SELECT 'c', 'Marketing','Low',2
UNION ALL
SELECT 'd', 'Safety','Medium',4
)TAB
We are finding the children of a parent, its count of High,Medium and Low and use cross join to show parent with all the combinations of its children's High,Medium and Low
UPDATE
The below variable can be used to access the records dynamically.
DECLARE #ElementTableId INT;
--SET #ElementTableId = 1
And use the above variable inside the query
;WITH CTE1 AS
(
SELECT *,0 [LEVEL] FROM #TABLE1 WHERE ElementTableId = #ElementTableId
UNION ALL
SELECT E.*,e2.[LEVEL]+1 FROM #TABLE1 e
INNER JOIN CTE1 e2 on e.ElementParentId = e2.ElementTableId
AND E.ElementTableId<>#ElementTableId
)
,CTE2 AS
(
SELECT E1.*,E2.*,COUNT(RiskRating) OVER(PARTITION BY RiskRating) CNT
from CTE1 E1
LEFT JOIN #TABLE2 E2 ON E1.ElementTableId=E2.ElementId
)
,CTE3 AS
(
SELECT DISTINCT T1.ElementName,C2.RiskRating,C2.CNT
FROM #TABLE1 T1
CROSS JOIN CTE2 C2
WHERE T1.ElementTableId = #ElementTableId
)
SELECT *
FROM CTE3
PIVOT(MIN(CNT)
FOR RiskRating IN ([High], [Medium],[Low])) AS PVTTable
SQL FIDDLE
RESULT
UPDATE 2
I am updating as per your new requirement
Here is sample table in which I have added extra data to test
SELECT * INTO #ElementTable
FROM
(
SELECT 1 ElementTableId, 'Main' ElementName ,0 ElementParentId
UNION ALL
SELECT 2,'Element1',1
UNION ALL
SELECT 3, 'Element2',1
UNION ALL
SELECT 4, 'SubElement1',2
UNION ALL
SELECT 5, 'Main 2',0
UNION ALL
SELECT 6, 'Element21',5
UNION ALL
SELECT 7, 'SubElement21',6
UNION ALL
SELECT 8, 'SubElement22',7
UNION ALL
SELECT 9, 'SubElement23',7
)TAB
SELECT * INTO #RiskTable
FROM
(
SELECT 'a' RiskId, 'Fincancial' RiskName,'High' RiskRating ,2 ElementId
UNION ALL
SELECT 'b','HR','High',3
UNION ALL
SELECT 'c', 'Marketing','Low',2
UNION ALL
SELECT 'd', 'Safety','Medium',4
UNION ALL
SELECT 'e' , 'Fincancial' ,'High' ,5
UNION ALL
SELECT 'f','HR','High',6
UNION ALL
SELECT 'g','HR','High',6
UNION ALL
SELECT 'h', 'Marketing','Low',7
UNION ALL
SELECT 'i', 'Safety','Medium',8
UNION ALL
SELECT 'j', 'Safety','High',8
)TAB
I have written the logic in query
;WITH CTE1 AS
(
-- Here you will find the level of every elements in the table
SELECT *,0 [LEVEL]
FROM #ElementTable WHERE ElementParentId = 0
UNION ALL
SELECT ET.*,CTE1.[LEVEL]+1
FROM #ElementTable ET
INNER JOIN CTE1 on ET.ElementParentId = CTE1.ElementTableId
)
,CTE2 AS
(
-- Filters the level and find the major parant of each child
-- ie, 100->150->200, here the main parent of 200 is 100
SELECT *,CTE1.ElementTableId MajorParentID,CTE1.ElementName MajorParentName
FROM CTE1 WHERE [LEVEL]=1
UNION ALL
SELECT CTE1.*,CTE2.MajorParentID,CTE2.MajorParentName
FROM CTE1
INNER JOIN CTE2 on CTE1.ElementParentId = CTE2.ElementTableId
)
,CTE3 AS
(
-- Since each child have columns for main parent id and name,
-- you will get the count of each element corresponding to the level you have selected directly
SELECT DISTINCT CTE2.MajorParentName,RT.RiskRating ,
COUNT(RiskRating) OVER(PARTITION BY MajorParentID,RiskRating) CNT
FROM CTE2
JOIN #RiskTable RT ON CTE2.ElementTableId=RT.ElementId
)
SELECT MajorParentName, ISNULL([High],0)[High], ISNULL([Medium],0)[Medium],ISNULL([Low],0)[Low]
FROM CTE3
PIVOT(MIN(CNT)
FOR RiskRating IN ([High], [Medium],[Low])) AS PVTTable
SQL FIDDLE

Related

BigQuery recursively join based on links between 2 ID columns

Given a table representing a many-many join between IDs like the following:
WITH t AS (
SELECT 1 AS id_1, 'a' AS id_2,
UNION ALL SELECT 2, 'a'
UNION ALL SELECT 2, 'b'
UNION ALL SELECT 3, 'b'
UNION ALL SELECT 4, 'c'
UNION ALL SELECT 5, 'c'
UNION ALL SELECT 6, 'd'
UNION ALL SELECT 6, 'e'
UNION ALL SELECT 7, 'f'
)
SELECT * FROM t
id_1
id_2
1
a
2
a
2
b
3
b
4
c
5
c
6
d
6
e
7
f
I would like to be able recursively join then aggregate rows in order to find each disconnected sub-graph represented by these links - that is each collection of IDs that are linked together:
The desired output for the example above would look something like this:
id_1_coll
id_2_coll
1, 2, 3
a, b
4, 5
c
6
d, e
7
f
where each row contains all the other IDs one could reach following the links in the table.
Note that 1 links to b even although there is no explicit link row because we can follow the path 1 --> a --> 2 --> b using the links in the first 3 rows.
One potential approach is to remodel the relationships between id_1 and id_2 such that we get all the links from id_1 to itself then use a recursive common table expression to traverse all the possible paths between id_1 values then aggregate (somewhat arbitrarily) to the lowest such value that can be reached from each id_1.
Explanation
Our steps are
Remodel the relationship into a series of self-joins for id_1
Map each id_1 to the lowest id_1 that it is linked to via a recursive CTE
Aggregate the recursive CTE using the lowest id_1s as the GROUP BY column and grabbing all the linked id_1 and id_2 values via the ARRAY_AGG() function
We can use something like this to remodel the relationships into a self join (1.):
SELECT
a.id_1, a.id_2, b.id_1 AS linked_id
FROM t as a
INNER JOIN t as b
ON a.id_2 = b.id_2
WHERE a.id_1 != b.id_1
Next - to set up the recursive table expression (2.) we can tweak the query above to also give us the lowest (LEAST) of the values for id_1 at each link then use this as the base iteration:
WITH RECURSIVE base_iter AS (
SELECT
a.id_1, b.id_1 AS linked_id, LEAST(a.id_1, b.id_1) AS lowest_linked_id
FROM t as a
INNER JOIN t as b
ON a.id_2 = b.id_2
WHERE a.id_1 != b.id_1
)
We can also grab the lowest id_1 value at this time:
id_1
linked_id
lowest_linked_id
1
2
1
2
1
1
2
3
2
3
2
2
4
5
4
5
4
4
For our recursive loop, we want to maintain an ARRAY of linked ids and join each new iteration such that the id_1 value of the n+1th iteration is equal to the linked_id value of the nth iteration AND the nth linked_id value is not in the array of previously linked ids.
We can code this as follows:
recursive_loop AS (
SELECT id_1, linked_id, lowest_linked_id, [linked_id ] AS linked_ids
FROM base_iter
UNION ALL
SELECT
prev_iter.id_1, prev_iter.linked_id,
iter.lowest_linked_id,
ARRAY_CONCAT(iter.linked_ids, [prev_iter.linked_id])
FROM base_iter AS prev_iter
JOIN recursive_loop AS iter
ON iter.id_1 = prev_iter.linked_id
AND iter.lowest_linked_id < prev_iter.lowest_linked_id
AND prev_iter.linked_id NOT IN UNNEST(iter.linked_ids )
)
Giving us the following results:
|id_1|linked_id|lowest_linked_id|linked_ids|
|----|---------|------------|---|
|3|2|1|[1,2]|
|2|3|1|[1,2,3]|
|4|5|4|[5]|
|1|2|1|[2]|
|5|4|4|[4]|
|2|3|2|[3]|
|2|1|1|[1]|
|3|2|2|[2]|
which we can now link back to the original table for the id_2 values then aggregate (3.) as shown in the complete query below
Solution
WITH RECURSIVE t AS (
SELECT 1 AS id_1, 'a' AS id_2,
UNION ALL SELECT 2, 'a'
UNION ALL SELECT 2, 'b'
UNION ALL SELECT 3, 'b'
UNION ALL SELECT 4, 'c'
UNION ALL SELECT 5, 'c'
UNION ALL SELECT 6, 'd'
UNION ALL SELECT 6, 'e'
UNION ALL SELECT 7, 'f'
),
base_iter AS (
SELECT
a.id_1, b.id_1 AS linked_id, LEAST(a.id_1, b.id_1) AS lowest_linked_id
FROM t as a
INNER JOIN t as b
ON a.id_2 = b.id_2
WHERE a.id_1 != b.id_1
),
recursive_loop AS (
SELECT id_1, linked_id, lowest_linked_id, [linked_id ] AS linked_ids
FROM base_iter
UNION ALL
SELECT
prev_iter.id_1, prev_iter.linked_id,
iter.lowest_linked_id,
ARRAY_CONCAT(iter.linked_ids, [prev_iter.linked_id])
FROM base_iter AS prev_iter
JOIN recursive_loop AS iter
ON iter.id_1 = prev_iter.linked_id
AND iter.lowest_linked_id < prev_iter.lowest_linked_id
AND prev_iter.linked_id NOT IN UNNEST(iter.linked_ids )
),
link_back AS (
SELECT
t.id_1, IFNULL(lowest_linked_id, t.id_1) AS lowest_linked_id, t.id_2
FROM t
LEFT JOIN recursive_loop
ON t.id_1 = recursive_loop.id_1
),
by_id_1 AS (
SELECT
id_1,
MIN(lowest_linked_id) AS grp
FROM link_back
GROUP BY 1
),
by_id_2 AS (
SELECT
id_2,
MIN(lowest_linked_id) AS grp
FROM link_back
GROUP BY 1
),
result AS (
SELECT
by_id_1.grp,
ARRAY_AGG(DISTINCT id_1 ORDER BY id_1) AS id1_coll,
ARRAY_AGG(DISTINCT id_2 ORDER BY id_2) AS id2_coll,
FROM
by_id_1
INNER JOIN by_id_2
ON by_id_1.grp = by_id_2.grp
GROUP BY grp
)
SELECT grp, TO_JSON(id1_coll) AS id1_coll, TO_JSON(id2_coll) AS id2_coll
FROM result ORDER BY grp
Giving us the required output:
grp
id1_coll
id2_coll
1
[1,2,3]
[a,b]
4
[4,5]
[c]
6
[6]
[d,e]
7
[7]
[f]
Limitations/Issues
Unfortunately this approach is inneficient (we have to traverse every single pathway before aggregating it back together) and fails with the real-world case where we have several million join rows. When trying to execute on this data BigQuery runs up a huge "Slot time consumed" then eventually errors out with:
Resources exceeded during query execution: Your project or organization exceeded the maximum disk and memory limit available for shuffle operations. Consider provisioning more slots, reducing query concurrency, or using more efficient logic in this job.
I hope there might be a better way of doing the recursive join such that pathways can be merged/aggregated as we go (if we have an id_1 value AND a linked_id in already in the list of linked_ids we dont need to check it further).
Using ROW_NUMBER() the query is as the follow:
WITH RECURSIVE
t AS (
SELECT 1 AS id_1, 'a' AS id_2,
UNION ALL SELECT 2, 'a'
UNION ALL SELECT 2, 'b'
UNION ALL SELECT 3, 'b'
UNION ALL SELECT 4, 'c'
UNION ALL SELECT 5, 'c'
UNION ALL SELECT 6, 'd'
UNION ALL SELECT 6, 'e'
UNION ALL SELECT 7, 'f'
),
t1 AS (
SELECT ROW_NUMBER() OVER(ORDER BY t.id_1) n, t.id_1, t.id_2 FROM t
),
t2 AS (
SELECT n, [n] n_arr, [id_1] arr_1, [id_2] arr_2, id_1, id_2 FROM t1
WHERE n IN (SELECT MIN(n) FROM t1 GROUP BY id_1)
UNION ALL
SELECT t2.n, ARRAY_CONCAT(t2.n_arr, [t1.n]),
CASE WHEN t1.id_1 NOT IN UNNEST(t2.arr_1)
THEN ARRAY_CONCAT(t2.arr_1, [t1.id_1])
ELSE t2.arr_1 END,
CASE WHEN t1.id_2 NOT IN UNNEST(t2.arr_2)
THEN ARRAY_CONCAT(t2.arr_2, [t1.id_2])
ELSE t2.arr_2 END,
t1.id_1, t1.id_2
FROM t2 JOIN t1 ON
t2.n < t1.n AND
t1.n NOT IN UNNEST(t2.n_arr) AND
(t2.id_1 = t1.id_1 OR t2.id_2 = t1.id_2) AND
(t1.id_1 NOT IN UNNEST(t2.arr_1) OR t1.id_2 NOT IN UNNEST(t2.arr_2))
),
t3 AS (
SELECT
n,
ARRAY_AGG(DISTINCT id_1 ORDER BY id_1) arr_1,
ARRAY_AGG(DISTINCT id_2 ORDER BY id_2) arr_2
FROM t2
WHERE n IN (SELECT MIN(n) FROM t2 GROUP BY id_1)
GROUP BY n
)
SELECT n, TO_JSON(arr_1), TO_JSON(arr_2) FROM t3 ORDER BY n
t1 : Append with row numbers.
t2 : Extract rows matching either id_1 or id_2 by recursive query.
t3 : Make arrays from id_1 and id_2 with ARRAY_AGG().
However, it may not help your Limitations/Issues.
The way this question is phrased makes it appear you want "show me distinct groups from a presorted list, unchained to a previous group". For that, something like this should suffice (assuming auto-incrementing order/one or both id's move to the next value):
SELECT GrpNr,
STRING_AGG(DISTINCT CAST(id_1 as STRING), ',') as id_1_coll,
STRING_AGG(DISTINCT CAST(id_2 as STRING), ',') as id_2_coll
FROM
(
SELECT id_1, id_2,
SUM(CASE WHEN a.id_1 <> a.previous_id_1 and a.id_2 <> a.previous_id_2 THEN 1 ELSE 0 END)
OVER (ORDER BY RowNr) as GrpNr
FROM
(
SELECT *,
ROW_NUMBER() OVER () as RowNr,
LAG(t.id_1, 1) OVER (ORDER BY 1) AS previous_id_1,
LAG(t.id_2, 1) OVER (ORDER BY 1) AS previous_id_2
FROM t
) a
ORDER BY RowNr
) a
GROUP BY GrpNr
ORDER BY GrpNr
I don't think this is the question you mean to ask. This seems to be a graph-walking problem as referenced in the other answers, and in the response from #GordonLinoff to the question here, which I tested (and presume works for BigQuery).
This can also be done using sequential updates as done by #RomanPekar
here (which I also tested). The main consideration seems to be performance. I'd assume dbms have gotten better at recursion since this was posted.
Rolling it up in either case should be fairly easy using String_Agg() as given above or as you have.
I'd be curious to see a more accurate representation of the data. If there is some consistency to how the data is stored/limitations to levels of nesting/other group structures there may be a shortcut approach other than recursion or iterative updates.

How to display null values in IN operator for SQL with two conditions in where

I have this query
select *
from dbo.EventLogs
where EntityID = 60181615
and EventTypeID in (1, 2, 3, 4, 5)
and NewValue = 'Received'
If 2 and 4 does not exist with NewValue 'Received' it shows this
current results
What I want
Ideally you should maintain somewhere a table containing all possible EventTypeID values. Sans that, we can use a CTE in place along with a left join:
WITH EventTypes AS (
SELECT 1 AS ID UNION ALL
SELECT 2 UNION ALL
SELECT 3 UNION ALL
SELECT 4 UNION ALL
SELECT 5
)
SELECT et.ID AS EventTypeId, el.*
FROM EventTypes et
LEFT JOIN dbo.EventLogs el
ON el.EntityID = 60181615 AND
el.NewValue = 'Received'
WHERE
et.ID IN (1,2,3,4,5);

how to use unique value to stop data from replicating?

I have this excel document and I import the sheet from that file into my database after that I use this code to put them in separate tables:
insert into LP_Pending_Info(Service_order,Company,Country , HQ_AgingBase_Date ,PENDING_DAYS, Posting_Date,Service_Type,Service_TypeText,SUB_SVC_Type,Status,Status_Text,Reason,Reason_Text,Reason_Aging,SVC_Comment,ASC_Code,ASC_Name,ASC_JobNo,Model,CIC_Product,SerialNo,IN_OUT_WTY,IMEI,Defect_DESC,Detail_Type,Detail_TypeText,Repair_RCV_DT,Repair_RCV_TM,Complete_DT,SVC_Level,Consumer,Consumer_Text,TelNumber,City1,Street,Engineer_Code)
select Service_order,Company,Country , [HQ aging base date] ,PENDING_DAYS, Posting_Date,Service_Type,SERVICE_TYPE_TXT,SUB_SVC_Type,Status,Status_Text,Reason,Reason_Text,Reason_Aging,SVC_Comment,[ASC code],[ASC
name],ASC_JOB_NO,Model,CIC_PRD,SERIAL_NO,INOUTWTY,IMEI,Defect_DESC,Detail_Type,DETAIL_TYPE_TEXT,Repair_RCV_DT,Repair_RCV_TM,Complete_DT,SVC_Level,Consumer,consumer_txt,TEL_NUMBER,City1,Street,[Engineer code]
from LP_Pending_Jobs;
insert into LP_Part_Codes(Service_order, PartCode,serPluspart)
select Service_order, Part_code , CONVERT(nvarchar(150), Service_order)+Part_code
from LP_Pending_Jobs
cross apply (
--unpivot
select Part_code1 as Part_code where len(Part_code1) > 0
union all
select Part_code2 where len(Part_code2) > 0
union all
select Part_code3 where len(Part_code3) > 0
union all
select Part_code4 where len(Part_code4) > 0
union all
select Part_code5 where len(Part_code5) > 0
) unp;
insert into LP_PS_Codes(Service_Order, PS)
select Service_order,PS
from LP_Pending_Jobs
cross apply (
select PS1 as PS where len(PS1)>0
union all
select PS2 where len(PS2) > 0
union all
select PS3 where len(PS3) > 0
union all
select PS4 where len(PS4) > 0
union all
select PS5 where len(PS5) > 0
) unp;
insert into LP_Confirmation_Codes(Service_Order, SO_NO)
select Service_order,SO
from LP_Pending_Jobs
cross apply (
select confirmation_No1 as SO where len(confirmation_No1)>0
union all
select SO_NO2 where len(SO_NO2) > 0
union all
select SO_NO3 where len(SO_NO3) > 0
union all
select SO_NO4 where len(SO_NO4) > 0
union all
select SO_NO5 where len(SO_NO5) > 0
) unp;
----------------
insert into LP_QTY(Service_Order, QTY)
select Service_order,QTY
from LP_Pending_Jobs
cross apply (
select QTY1 as QTY where len(QTY1)>0
union all
select QTY2 where len(QTY2) > 0
union all
select QTY3 where len(QTY3) > 0
union all
select QTY4 where len(QTY4) > 0
union all
select QTY5 where len(QTY5) > 0
) unp;
insert into LP_ASC_PO_Codes(Service_Order, ASC_PO_NO)
select Service_order,ASC_PO
from LP_Pending_Jobs
cross apply (
select ASC_PO_No1 as ASC_PO where len(ASC_PO_No1)>0
union all
select ASC_PO_No2 where len(ASC_PO_No2) > 0
union all
select ASC_PO_No3 where len(ASC_PO_No3) > 0
union all
select ASC_PO_No4 where len(ASC_PO_No4) > 0
union all
select ASC_PO_No5 where len(ASC_PO_No5) > 0
) unp;
insert into LP_PO_Date(Service_Order, PO_Date)
select Service_order,PO_Date
from LP_Pending_Jobs
cross apply (
select PO_DATE1 as PO_Date where len(PO_DATE1)>0
union all
select PO_DATE2 where len(PO_DATE2) > 0
union all
select PO_DATE3 where len(PO_DATE3) > 0
union all
select PO_DATE4 where len(PO_DATE4) > 0
union all
select PO_DATE5 where len(PO_DATE5) > 0
) unp;
insert into LP_SO_Date(Service_Order, SO_Date)
select Service_order,SO_Date
from LP_Pending_Jobs
cross apply (
select SO_DATE1 as SO_Date where len(SO_DATE1)>0
union all
select SO_DATE2 where len(SO_DATE2) > 0
union all
select SO_DATE3 where len(SO_DATE3) > 0
union all
select SO_DATE4 where len(SO_DATE4) > 0
union all
select SO_DATE5 where len(SO_DATE5) > 0
) unp;
and because the data's are in one row I used cross apply to put each data in separate column.
The problem start when I join them , cause they show way more data that it should be.
here is the select code:
select distinct LP_Pending_Info.Service_Order,LP_Pending_Info.Pending_Days,
LP_Pending_Info.Service_Type,LP_Pending_Info.ASC_Code,LP_Pending_Info.Model,
LP_Pending_Info.IN_OUT_WTY, LP_Part_Codes.PartCode,LP_PS_Codes.PS,
LP_Confirmation_Codes.SO_NO,LP_Pending_Info.Engineer_Code,serPluspart
from LP_Pending_Info
inner join LP_Part_Codes on LP_Pending_Info.Service_order = LP_Part_Codes.Service_order
inner join LP_PS_Codes on LP_Part_Codes.Service_Order = LP_PS_Codes.Service_Order
inner join LP_Confirmation_Codes on LP_PS_Codes.Service_Order = LP_Confirmation_Codes.Service_Order
order by LP_Pending_Info.Service_order;
I asked around and I come to this point that I need a unique column, so I added
'serPluspart'
this column to my table in hope that it'll fix my problem but when I use my select I still see the extra data.
here is the first 25 record:
for every service order at top I have five part code, by looking at the picture the part code '4182134076' should have 2 rows because it has two part code but it shows that service order for 8 times and I don't know how to fix it. appreciate any suggestion.
the excel input file for this '4182134076' service order:
Service_order PENDING_DAYS SERVICE_TYPE ASC code MODEL INOUTWTY Part_code1 PS1 ASC_PO_No1 confirmation_No1 QTY1 PO_DATE1 SO_DATE1 Part_code2 PS2 ASC_PO_No2 SO_NO2 QTY2 PO_DATE2 SO_DATE2 Part_code3 PS3 ASC_PO_No3 SO_NO3 QTY3 PO_DATE3 SO_DATE3 Part_code4 PS4 ASC_PO_No4 SO_NO4 QTY4 PO_DATE4 SO_DATE4 Part_code5 PS5 ASC_PO_No5 SO_NO5 QTY5 PO_DATE5 SO_DATE5 Engineer code
4182134076 36 CI 4285818 SM-A310FZDDTHR LP GH96-09938A P 4182134076/1 1000237676 1 09.07.2016 GH81-13601A U 4182134076 1000224921 1 05.06.2016 7086002211
Looking at the output, you would expect eight rows because you have three pairs of binary alternatives (PartCode - GH81-13601A or GH96-09938A; PS - P or U; SO_NO 1000224921 OR 1000237676). Because PartCode, PS, and SO_NO come from three different tables and the only inner join is on the service_order, you will get 2^3 rows. Without seeing the data, it is hard to say exactly what is wrong, but I assume when you say you are only expecting two rows, that these three elements are linked, although the tables are not. I suspect you need some foreign keys, but without some sample data, I cannot be sure.
OK having seen the input data I know what to do! In LP_Part_Codes get rid of your column serPluspart (it is not helping). Now add a SubOrder (smallint) to each of LP_Part_Codes, LP_PS_Codes and LP_Confirmation_Codes.
Next add a column to your unpivots e.g:
insert into LP_PS_Codes(Service_Order, PS, SubOrder)
select Service_order,PS,SubOrder
from LP_Pending_Jobs
cross apply (
select PS1 as PS, 1 AS SubOrder where len(PS1)>0
union all
select PS2, 2 AS SubOrder where len(PS2) > 0
union all
select PS3, 3 AS SubOrder where len(PS3) > 0
union all
select PS4, 4 AS SubOrder where len(PS4) > 0
union all
select PS5, 5 AS SubOrder where len(PS5) > 0
) unp;
Do the same for LP_Part_Codes and LP_Confirmation_Codes.
Now you can amend your main select by adding LP_Part_Codes.SubOrder = LP_PS_Codes.SubOrder = LP_Confirmation_Codes.SubOrder
And you will now have only two records for this order.
HTH
Jonathan

Find overlapping sets of data in a table

I need to identify duplicate sets of data and give those sets who's data is similar a group id.
id threshold cost
-- ---------- ----------
1 0 9
1 100 7
1 500 6
2 0 9
2 100 7
2 500 6
I have thousands of these sets, most are the same with different id's. I need find all the like sets that have the same thresholds and cost amounts and give them a group id. I'm just not sure where to begin. Is the best way to iterate and insert each set into a table and then each iterate through each set in the table to find what already exists?
This is one of those cases where you can try to do something with relational operators. Or, you can just say: "let's put all the information in a string and use that as the group id". SQL Server seems to discourage this approach, but it is possible. So, let's characterize the groups using:
select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id;
Oh, I think that solves your problem. The groupname can serve as the group id. If you want a numeric id (which is probably a good idea, use dense_rank():
select d.id, dense_rank() over (order by groupname) as groupid
from (select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id
) d;
Here's the solution to my interpretation of the question:
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
;
WITH BaseTable AS
(
SELECT 1 id, 0 as threshold, 9 as cost
UNION SELECT 1, 100, 7
UNION SELECT 1, 500, 6
UNION SELECT 2, 0, 9
UNION SELECT 2, 100, 7
UNION SELECT 2, 500, 6
UNION SELECT 3, 1, 9
UNION SELECT 3, 100, 7
UNION SELECT 3, 500, 6
)
, BaseCTE AS
(
SELECT
id
--,dense_rank() over (order by threshold, cost ) as GroupId
,
(
SELECT CAST(TblGrouping.threshold AS varchar(8000)) + '/' + CAST(TblGrouping.cost AS varchar(8000)) + ';'
FROM BaseTable AS TblGrouping
WHERE TblGrouping.id = BaseTable.id
ORDER BY TblGrouping.threshold, TblGrouping.cost
FOR XML PATH ('')
) AS MultiGroup
FROM BaseTable
GROUP BY id
)
,
CTE AS
(
SELECT
*
,DENSE_RANK() OVER (ORDER BY MultiGroup) AS GroupId
FROM BaseCTE
)
SELECT *
INTO #tempGrouping
FROM CTE
-- SELECT * FROM #tempGrouping;
UPDATE BaseTable
SET BaseTable.GroupId = #tempGrouping.GroupId
FROM BaseTable
INNER JOIN #tempGrouping
ON BaseTable.Id = #tempGrouping.Id
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
Where BaseTable is your table, and and you don't need the CTE "BaseTable", because you have a data table.
You may need to take extra-precautions if your threshold and cost fields can be NULL.

Joining a list of values with table rows in SQL

Suppose I have a list of values, such as 1, 2, 3, 4, 5 and a table where some of those values exist in some column. Here is an example:
id name
1 Alice
3 Cindy
5 Elmore
6 Felix
I want to create a SELECT statement that will include all of the values from my list as well as the information from those rows that match the values, i.e., perform a LEFT OUTER JOIN between my list and the table, so the result would be like follows:
id name
1 Alice
2 (null)
3 Cindy
4 (null)
5 Elmore
How do I do that without creating a temp table or using multiple UNION operators?
If in Microsoft SQL Server 2008 or later, then you can use Table Value Constructor
Select v.valueId, m.name
From (values (1), (2), (3), (4), (5)) v(valueId)
left Join otherTable m
on m.id = v.valueId
Postgres also has this construction VALUES Lists:
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (num,letter)
Also note the possible Common Table Expression syntax which can be handy to make joins:
WITH my_values(num, str) AS (
VALUES (1, 'one'), (2, 'two'), (3, 'three')
)
SELECT num, txt FROM my_values
With Oracle it's possible, though heavier From ASK TOM:
with id_list as (
select 10 id from dual union all
select 20 id from dual union all
select 25 id from dual union all
select 70 id from dual union all
select 90 id from dual
)
select * from id_list;
the following solution for oracle is adopted from this source. the basic idea is to exploit oracle's hierarchical queries. you have to specify a maximum length of the list (100 in the sample query below).
select d.lstid
, t.name
from (
select substr(
csv
, instr(csv,',',1,lev) + 1
, instr(csv,',',1,lev+1 )-instr(csv,',',1,lev)-1
) lstid
from (select ','||'1,2,3,4,5'||',' csv from dual)
, (select level lev from dual connect by level <= 100)
where lev <= length(csv)-length(replace(csv,','))-1
) d
left join test t on ( d.lstid = t.id )
;
check out this sql fiddle to see it work.
Bit late on this, but for Oracle you could do something like this to get a table of values:
SELECT rownum + 5 /*start*/ - 1 as myval
FROM dual
CONNECT BY LEVEL <= 100 /*end*/ - 5 /*start*/ + 1
... And then join that to your table:
SELECT *
FROM
(SELECT rownum + 1 /*start*/ - 1 myval
FROM dual
CONNECT BY LEVEL <= 5 /*end*/ - 1 /*start*/ + 1) mypseudotable
left outer join myothertable
on mypseudotable.myval = myothertable.correspondingval
Assuming myTable is the name of your table, following code should work.
;with x as
(
select top (select max(id) from [myTable]) number from [master]..spt_values
),
y as
(select row_number() over (order by x.number) as id
from x)
select y.id, t.name
from y left join myTable as t
on y.id = t.id;
Caution: This is SQL Server implementation.
fiddle
For getting sequential numbers as required for part of output (This method eliminates values to type for n numbers):
declare #site as int
set #site = 1
while #site<=200
begin
insert into ##table
values (#site)
set #site=#site+1
end
Final output[post above step]:
select * from ##table
select v.id,m.name from ##table as v
left outer join [source_table] m
on m.id=v.id
Suppose your table that has values 1,2,3,4,5 is named list_of_values, and suppose the table that contain some values but has the name column as some_values, you can do:
SELECT B.id,A.name
FROM [list_of_values] AS B
LEFT JOIN [some_values] AS A
ON B.ID = A.ID