SQL query with conditions for audit - sql

I want to write a SQL query for several columns with conditions. the table looks like this:
ID Company User
1 Bov LPF
2 Ak LPF
3 Bov LPF
4 Bov ABC
5 Ak ABC
6 ZP ABC
7 REP ABC
8 REP CDE
9 KEK CDE
10 Ak CDE
11 PER CDE
12 Bov BKE
The result must be as followed:
Give me three rows per user (only the users with at least three rows will be in the result).
Company Bov, REP and Ak must return at least two times in the result
It is for an audit so everything must be ordered randomly (order by newid() for example)
Total rows is 9.
Expected result for example:
ID Company User
1 Bov LPF
2 Ak LPF
3 Bov LPF
4 Bov ABC
5 Ak ABC
7 REP ABC
8 REP CDE
10 Ak CDE
11 PER CDE
How should i write the query?
Update:
I was thinking about a query like this, but this doesn't give me the right results:
select *
from
(
select *,row_number() over(partition by user,company order by user, company) as row, ROW_NUMBER() over(order by newid()) as total
from
(
select *
from
(
select *, 0 as Bov, sum(iif(Company= 'Ak',1,0)) over (order by newid()) as Ak
FROM table a
where Company = 'Ak'
) as eu
where eu.Ak <= 2
UNION ALL
select *
from
(
select *, sum(iif(company = 'Bov',1,0)) over (order by newid() ) as Bov, 0 as Ak
FROM table a
where Company = 'Bov'
) as nn
where nn.Bov <= 2
UNION ALL
select *, 0 as Bov, 0 as Ak
FROM table a
where Company not in ('Bov','Ak')
) as z
) as z1
where z1.row <= 3
and z1.total <= 9
Filter <= 2 is for the company that must return at least two times in the result
Filter row <= 3 is for the three rows per user.
Filter total <= 9 is for the total rows that must be 9.
With this query i am not for sure that i have at least two rows for the companies Bov, Ak and REP.

select *
from table
group by id
having count(User) <= 3
order by id
Is this what you need?

You can use this.
DECLARE #MyTable TABLE (ID INT, Company VARCHAR(10), [User] VARCHAR(10))
INSERT INTO #MyTable VALUES
(1 , 'Bov', 'LPF'),
(2 , 'Ak ', 'LPF'),
(3 , 'Bov', 'LPF'),
(4 , 'Bov', 'ABC'),
(5 , 'Ak ', 'ABC'),
(6 , 'ZP ', 'ABC'),
(7 , 'REP', 'ABC'),
(8 , 'REP', 'CDE'),
(9 , 'KEK', 'CDE'),
(10 , 'Ak ', 'CDE'),
(11 , 'PER', 'CDE'),
(12 , 'Bov', 'BKE')
SELECT ID, Company, [User] FROM
( SELECT *
, ROW_NUMBER() OVER (PARTITION BY [User] ORDER BY (CASE WHEN Company IN ('Bov','REP','Ak') THEN 0 ELSE 1 END) ) RN
, COUNT(*) OVER (PARTITION BY [User]) CmpCnt
, COUNT(CASE WHEN Company IN ('Bov','REP','Ak') THEN 1 END) OVER (PARTITION BY [User]) PriCmpCnt
FROM #MyTable
) T
WHERE
T.CmpCnt > 2
and T.PriCmpCnt > 1
and T.RN < 4
order by ID
Result:
ID Company User
----------- ---------- ----------
1 Bov LPF
2 Ak LPF
3 Bov LPF
4 Bov ABC
5 Ak ABC
7 REP ABC
8 REP CDE
10 Ak CDE
11 PER CDE

Here's how I'd approach it; see comments for explanation / let me know if you need anymore info to understand anything...
SQL Fiddle Demo
;with cte as
(
select Id
, Company
, [User]
, Row_Number() over (partition by [User] order by randomOrder) PerUserRowNum
, case
when Company in ('BOV','REP', 'AK') then
case
when Row_Number() over (partition by Company order by randomOrder) <= 2 then 1
else 0
end
else 0
end MustInclude
from (select *, newid() randomOrder from SourceData) x
)
select top 9 --total rows is 9
Id, Company, [User]
from cte
where PerUserRowNum <= 3 --show 3 rows per user
and [User] in ( --only the users with at least three rows will be in the result
select [User]
from cte
where PerUserRowNum = 3
)
order by MustInclude desc, newid() --ensure all the stuff we must include is returned, then make up the rest of the results with whatever

Related

Selecting top most row in Bigquery based on conditions

I have a huge table, where sometimes 1 product ID has multiple specifications. I want to select the newest but unfortunately, I don't have the date information. please consider this example dataset
Row ID Type Sn Sn_Ind
1 3 SLN SL20 20
2 1 SL SL 0
3 2 SL SL 0
4 1 M SL21 10
5 3 M SL21 10
6 1 SLN SL20 20
I used the below query to somehow group the products in give them row numbers like
with cleanedMasterData as(
SELECT *
FROM (
SELECT *,ROW_NUMBER() OVER(PARTITION BY ID ORDER BY Sn DESC, Sn_Ind DESC) AS rn
FROM `project.dataset.table`
)
-- where rn = 1
)
select * from cleanedMasterData
Please find below the example table after cleaning
Row ID Type Sn Sn_Ind rn
1 1 SL SL 0 1
2 1 M SL21 10 2
3 1 SLN SL20 20 3
4 2 SL SL 0 1
5 3 M SL21 10 1
6 3 SLN SL20 20 2
but if you see for ID 2 and 3, I can easily select the top row with where rn = 1
but for ID 1, my preferred row would be 2 because that is the newest.
My question here is how do I prioritise a value in column so that I can get the desired solution like :
Row ID Type Sn Sn_Ind rn
1 1 M SL21 10 1
2 2 SL SL 0 1
3 3 M SL21 10 1
As the values are fixed in Sn column - for ex SL, SL20, SL19, SL21 etc - If somehow I can give weightage to these values and create a new temp column with weightage and sort based on it, then?
Thank you for your support in advance!!
Consider below
SELECT *
FROM `project.dataset.table`
WHERE TRUE
QUALIFY ROW_NUMBER() OVER(PARTITION BY ID ORDER BY IF(Sn = 'SL', 0, 1) DESC, Sn DESC) = 1
If applied to sample data in your question - output is
It wasn't difficult, I tried a few things and it worked out. If anyone can optimize the below solution even more that would be awesome.
first the dataset
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 ID, 'SLN' Type, 'SL20' Sn, 20 Sn_Ind UNION ALL
SELECT 1 , 'SL' , 'SL' , 0 UNION ALL
SELECT 2 , 'SL' , 'SL' , 0 UNION ALL
SELECT 1 , 'M' , 'SL21' , 10 UNION ALL
SELECT 3 , 'M' , 'SL21' , 10 UNION ALL
SELECT 1 , 'SLN' , 'SL20' , 20
)
with weightage as(
SELECT
*,
MAX(CASE Sn WHEN 'SL' THEN 0 ELSE 1 END) OVER (PARTITION BY Sn) AS weightt,
FROM
`project.dataset.table`
ORDER BY
weightt DESC, Sn DESC
), main as (
select * EXCEPT(rn, weightt)
from (
select * ,ROW_NUMBER() OVER(PARTITION BY ID ORDER BY weightt DESC, Sn DESC) AS rn
from weightage )
where rn = 1
)
select * from main
after this, I can get the desired result
Row ID Type Sn Sn_Ind
1 1 M SL21 10
2 2 SL SL 0
3 3 M SL21 10

Recursive Query and INNER JOIN

I am stuck with a query that is using recursive and wondering if you guys can help me out.
I have this query below and it is based on the ShipQuantity, then it lists the number of records. For example, mfgPN "ABC123" has a ShipQuantity of 4, it will list 4 records with a number 1,2,3, 4.
WITH feedInfo
AS (
SELECT df1.RecID, MfgPN, LinkID, ShipQuantity, 1 AS Number
FROM EXT_DistributorFeed df1
WHERE 1 = 1
AND df1.mfgPN IN ('ABC1', 'ABC2')
UNION ALL
SELECT df2.RecID, df2.MfgPN, df2.LinkID, df2.ShipQuantity, feedInfo.number + 1 AS Number
FROM EXT_DistributorFeed df2
INNER JOIN feedInfo ON df2.RecID = feedInfo.RecID
WHERE 1 = 1
AND number < feedInfo.ShipQuantity
AND df2.mfgPN IN ('ABC1', 'ABC2')
)
Select feedInfo.*
From feedInfo
OPTION (maxrecursion 20000);
Let's say the result is
RecID MfgPN LinkID ShipQuantity Number
101 ABC1 L11111 4 1
102 ABC1 L11111 4 2
103 ABC1 L11111 4 3
104 ABC1 L11111 4 4
105 ABC2 L22222 2 1
106 ABC2 L22222 2 2
Now, I have another table "EXT_DistributorFeedDetail" where it may contain serial# (some part# have serial# and some part# don't have). This table has only two columns: (1) LinkID and (2)SerialNo. Like this:
EXT_DistributorFeedDetail
LinkID SerialNo
L22222 S999999
L22222 S888888
I would like to join the feedInfo with EXT_DistributorFeedDetail table to get the result like this:
RecID MfgPN LinkID ShipQuantity Number Serial
101 ABC1 L11111 4 1 NULL
102 ABC1 L11111 4 2 NULL
103 ABC1 L11111 4 3 NULL
104 ABC1 L11111 4 4 NULL
105 ABC2 L22222 2 1 S99999
106 ABC2 L22222 2 2 S88888
Any expert out there can help would be greatly appreciated.
Thank you,
Looks like you're trying to match up the LinkID and Number from the recursive query to the LinkID and a Row Number in the EXT_DistributorFeedDetail table
WITH feedInfo
AS (
SELECT df1.RecID, MfgPN, LinkID, ShipQuantity, 1 AS Number
FROM EXT_DistributorFeed df1
WHERE 1 = 1
AND df1.mfgPN IN ('ABC1', 'ABC2')
UNION ALL
SELECT df2.RecID, df2.MfgPN, df2.LinkID, df2.ShipQuantity, feedInfo.number + 1 AS Number
FROM EXT_DistributorFeed df2
INNER JOIN feedInfo ON df2.RecID = feedInfo.RecID
WHERE 1 = 1
AND number < feedInfo.ShipQuantity
AND df2.mfgPN IN ('ABC1', 'ABC2')
)
Select fi.*,
dfd.SerialNo [Serial]
From feedInfo fi
LEFT JOIN (SELECT *, ROW_NUMBER() OVER (PARTITION BY LinkID ORDER BY SerialNo) rn,
FROM EXT_DistributorFeedDetail) dfd
ON dfd.LinkID = fi.LinkID AND dfd.rn = fi.Number
OPTION (maxrecursion 20000);
Depending on what order you want the serial numbers in the EXT_DistributorFeedDetail table to be you would need to change the order by in the Window function ROW_NUMBER() OVER (PARTITION BY LinkID ORDER BY SerialNo) if you take out the Order by the it would be random and could change.
It looks like you want to join the SerialNo in descending order for the number. You can do this by changing the last part of the query to this:
with feedinfo as (
....
)
select f.*, e.*
from feedinfo f
left join (
select *, rn = row_number() over (partition by linkid order by serialno desc)
from ext_distributorfeeddetail
) e on f.linkid = e.linkid and f.number = e.rn
option (maxrecursion 20000);
You have to left join on the EXT_DistributorFeedDetail table to extract serial column. Note that for each recid in the output the serial column will be repeated as many times as the number of entries per linkid in the EXT_DistributorFeedDetail table.
WITH feedInfo AS
(
SELECT df1.RecID, MfgPN, LinkID, ShipQuantity, 1 AS Number
FROM EXT_DistributorFeed df1
WHERE df1.mfgPN IN ('ABC1', 'ABC2')
UNION ALL
SELECT df2.RecID, df2.MfgPN, df2.LinkID, df2.ShipQuantity,
feedInfo.number + 1 AS Number
FROM EXT_DistributorFeed df2
INNER JOIN feedInfo ON df2.RecID = feedInfo.RecID
WHERE number < feedInfo.ShipQuantity
AND df2.mfgPN IN ('ABC1', 'ABC2')
)
Select feedInfo.*, e.serial
From feedInfo
left join EXT_DistributorFeedDetail e
on e.linkid = feedinfo.linkid

SQL grouping interescting/overlapping rows

I have the following table in Postgres that has overlapping data in the two columns a_sno and b_sno.
create table data
( a_sno integer not null,
b_sno integer not null,
PRIMARY KEY (a_sno,b_sno)
);
insert into data (a_sno,b_sno) values
( 4, 5 )
, ( 5, 4 )
, ( 5, 6 )
, ( 6, 5 )
, ( 6, 7 )
, ( 7, 6 )
, ( 9, 10)
, ( 9, 13)
, (10, 9 )
, (13, 9 )
, (10, 13)
, (13, 10)
, (10, 14)
, (14, 10)
, (13, 14)
, (14, 13)
, (11, 15)
, (15, 11);
As you can see from the first 6 rows data values 4,5,6 and 7 in the two columns intersects/overlaps that need to partitioned to a group. Same goes for rows 7-16 and rows 17-18 which will be labeled as group 2 and 3 respectively.
The resulting output should look like this:
group | value
------+------
1 | 4
1 | 5
1 | 6
1 | 7
2 | 9
2 | 10
2 | 13
2 | 14
3 | 11
3 | 15
Assuming that all pairs exists in their mirrored combination as well (4,5) and (5,4). But the following solutions work without mirrored dupes just as well.
Simple case
All connections can be lined up in a single ascending sequence and complications like I added in the fiddle are not possible, we can use this solution without duplicates in the rCTE:
I start by getting minimum a_sno per group, with the minimum associated b_sno:
SELECT row_number() OVER (ORDER BY a_sno) AS grp
, a_sno, min(b_sno) AS b_sno
FROM data d
WHERE a_sno < b_sno
AND NOT EXISTS (
SELECT 1 FROM data
WHERE b_sno = d.a_sno
AND a_sno < b_sno
)
GROUP BY a_sno;
This only needs a single query level since a window function can be built on an aggregate:
Get the distinct sum of a joined table column
Result:
grp a_sno b_sno
1 4 5
2 9 10
3 11 15
I avoid branches and duplicated (multiplicated) rows - potentially much more expensive with long chains. I use ORDER BY b_sno LIMIT 1 in a correlated subquery to make this fly in a recursive CTE.
Create a unique index on a non-unique column
Key to performance is a matching index, which is already present provided by the PK constraint PRIMARY KEY (a_sno,b_sno): not the other way round (b_sno, a_sno):
Is a composite index also good for queries on the first field?
WITH RECURSIVE t AS (
SELECT row_number() OVER (ORDER BY d.a_sno) AS grp
, a_sno, min(b_sno) AS b_sno -- the smallest one
FROM data d
WHERE a_sno < b_sno
AND NOT EXISTS (
SELECT 1 FROM data
WHERE b_sno = d.a_sno
AND a_sno < b_sno
)
GROUP BY a_sno
)
, cte AS (
SELECT grp, b_sno AS sno FROM t
UNION ALL
SELECT c.grp
, (SELECT b_sno -- correlated subquery
FROM data
WHERE a_sno = c.sno
AND a_sno < b_sno
ORDER BY b_sno
LIMIT 1)
FROM cte c
WHERE c.sno IS NOT NULL
)
SELECT * FROM cte
WHERE sno IS NOT NULL -- eliminate row with NULL
UNION ALL -- no duplicates
SELECT grp, a_sno FROM t
ORDER BY grp, sno;
Less simple case
All nodes can be reached in ascending order with one or more branches from the root (smallest sno).
This time, get all greater sno and de-duplicate nodes that may be visited multiple times with UNION at the end:
WITH RECURSIVE t AS (
SELECT rank() OVER (ORDER BY d.a_sno) AS grp
, a_sno, b_sno -- get all rows for smallest a_sno
FROM data d
WHERE a_sno < b_sno
AND NOT EXISTS (
SELECT 1 FROM data
WHERE b_sno = d.a_sno
AND a_sno < b_sno
)
)
, cte AS (
SELECT grp, b_sno AS sno FROM t
UNION ALL
SELECT c.grp, d.b_sno
FROM cte c
JOIN data d ON d.a_sno = c.sno
AND d.a_sno < d.b_sno -- join to all connected rows
)
SELECT grp, sno FROM cte
UNION -- eliminate duplicates
SELECT grp, a_sno FROM t -- add first rows
ORDER BY grp, sno;
Unlike the first solution, we don't get a last row with NULL here (caused by the correlated subquery).
Both should perform very well - especially with long chains / many branches. Result as desired:
SQL Fiddle (with added rows to demonstrate difficulty).
Undirected graph
If there are local minima that cannot be reached from the root with ascending traversal, the above solutions won't work. Consider Farhęg's solution in this case.
I want to say another way, it may be useful, you can do it in 2 steps:
1. take the max(sno) per each group:
select q.sno,
row_number() over(order by q.sno) gn
from(
select distinct d.a_sno sno
from data d
where not exists (
select b_sno
from data
where b_sno=d.a_sno
and a_sno>d.a_sno
)
)q
result:
sno gn
7 1
14 2
15 3
2. use a recursive cte to find all related members in groups:
with recursive cte(sno,gn,path,cycle)as(
select q.sno,
row_number() over(order by q.sno) gn,
array[q.sno],false
from(
select distinct d.a_sno sno
from data d
where not exists (
select b_sno
from data
where b_sno=d.a_sno
and a_sno>d.a_sno
)
)q
union all
select d.a_sno,c.gn,
d.a_sno || c.path,
d.a_sno=any(c.path)
from data d
join cte c on d.b_sno=c.sno
where not cycle
)
select distinct gn,sno from cte
order by gn,sno
Result:
gn sno
1 4
1 5
1 6
1 7
2 9
2 10
2 13
2 14
3 11
3 15
here is the demo of what I did.
Here is a start that may give some ideas on an approach. The recursive query starts with a_sno of each record and then tries to follow the path of b_sno until it reaches the end or forms a cycle. The path is represented by an array of sno integers.
The unnest function will break the array into rows, so a sno value mapped to the path array such as:
4, {6, 5, 4}
will be transformed to a row for each value in the array:
4, 6
4, 5
4, 4
The array_agg then reverses the operation by aggregating the values back into a path, but getting rid of the duplicates and ordering.
Now each a_sno is associated with a path and the path forms the grouping. dense_rank can be used to map the grouping (cluster) to a numeric.
SELECT array_agg(DISTINCT map ORDER BY map) AS cluster
,sno
FROM ( WITH RECURSIVE x(sno, path, cycle) AS (
SELECT a_sno, ARRAY[a_sno], false FROM data
UNION ALL
SELECT b_sno, path || b_sno, b_sno = ANY(path)
FROM data, x
WHERE a_sno = x.sno
AND NOT cycle
)
SELECT sno, unnest(path) AS map FROM x ORDER BY 1
) y
GROUP BY sno
ORDER BY 1, 2
Output:
cluster | sno
--------------+-----
{4,5,6,7} | 4
{4,5,6,7} | 5
{4,5,6,7} | 6
{4,5,6,7} | 7
{9,10,13,14} | 9
{9,10,13,14} | 10
{9,10,13,14} | 13
{9,10,13,14} | 14
{11,15} | 11
{11,15} | 15
(10 rows)
Wrap it one more time for the ranking:
SELECT dense_rank() OVER(order by cluster) AS rank
,sno
FROM (
SELECT array_agg(DISTINCT map ORDER BY map) AS cluster
,sno
FROM ( WITH RECURSIVE x(sno, path, cycle) AS (
SELECT a_sno, ARRAY[a_sno], false FROM data
UNION ALL
SELECT b_sno, path || b_sno, b_sno = ANY(path)
FROM data, x
WHERE a_sno = x.sno
AND NOT cycle
)
SELECT sno, unnest(path) AS map FROM x ORDER BY 1
) y
GROUP BY sno
ORDER BY 1, 2
) z
Output:
rank | sno
------+-----
1 | 4
1 | 5
1 | 6
1 | 7
2 | 9
2 | 10
2 | 13
2 | 14
3 | 11
3 | 15
(10 rows)

Group and tally values for each record in SQL [duplicate]

This question already has answers here:
How to use GROUP BY to concatenate strings in SQL Server?
(22 answers)
Closed 8 years ago.
Im trying to run a select statement to group records having similar IDs but also tally the values from another column for each master ID. So for example below. The result for each line will be the first instance unique ID and the 2 names shown from each record separated by semi colon. Thanks in advance.
Current set
ID Name Cnt
-------------------------------- ----------------- ---
0001D72BA5F664BE129B6AB5744E2BE0 Talati, Shilpa 1
0001D72BA5F664BE129B6AB5744E2BE0 Weaver, Larry 1
0007EAB7CE9A3F2F95D2D63D0BBD08A9 St-Hilaire, Edith 1
0007EAB7CE9A3F2F95D2D63D0BBD08A9 Talati, Shilpa 1
Result:
0001D72BA5F664BE129B6AB5744E2BE0 Talati, Shilpa; Weaver, Larry
The easiest way to solve this in SQL Server is:
select masterId, min(name) + '; ' + max(name)
from table t
group by masterId;
Here's one way using a recursive common table expression. Given a table like this:
create table dbo.Fizzbuzz
(
id int not null identity(1,1) primary key clustered ,
group_id int not null ,
name varchar(50) not null ,
cnt int not null ,
)
containing this data
id group_id name cnt
-- -------- ------ ---
1 1 Bob 3
2 1 Carol 5
3 1 Ted 6
4 1 Alice 16
5 2 Harold 72
6 2 Maude 28
This query
with recursive_cte as
(
select group_id = t.group_id ,
row = t.row ,
name = convert(varchar(8000),t.name) ,
cnt = t.cnt
from ( select * ,
row = row_number() over (
partition by group_id
order by id
)
from dbo.Fizzbuzz
) t
where t.row = 1
UNION ALL
select group_id = prv.group_id ,
row = nxt.row ,
name = convert(varchar(8000), prv.name + ' and ' + nxt.name ) ,
cnt = prv.cnt + nxt.cnt
from recursive_cte prv
join ( select * ,
row = row_number() over (
partition by group_id
order by id
)
from dbo.Fizzbuzz
) nxt on nxt.group_id = prv.group_id
and nxt.row = prv.row + 1
)
select group_id = t.group_id ,
total = t.cnt ,
names = t.name
from ( select * ,
rank = rank() over (
partition by group_id
order by row desc
)
from recursive_cte
) t
where rank = 1
order by group_id
produces the following output
group_id cnt name
-------- --- -------------------------------
1 30 Bob and Carol and Ted and Alice
2 100 Harold and Maude
One should note however, that the depth of recursion is bounded in SQL Server.
SELECT
t1.ID,
(SELECT Name + '; '
FROM yourtable t2
WHERE t1.ID = t2.ID
for xml path('')) as Name
FROM yourtable t1
GROUP BY t1.ID

Make Two Queries into 1 result set with 2 columns

Say I have a table that looks like this:
Person Table
ID AccountID Name
1 6 Billy
2 6 Joe
3 6 Tom
4 8 Jamie
5 8 Jake
6 8 Sam
I have two queries that I know work by themselves:
Select Name Group1 from person where accountid = 6
Select Name Group2 from person where accountid = 8
But I want a single Result Set to look like this:
Group1 Group2
Billy Jamie
Joe Jake
Tom Same
You can use row_number() to assign a distinct value for each row, ans then use a FULL OUTER JOIN to join the two subqueries:
select t1.group1,
t2.group2
from
(
select name group1,
row_number() over(order by id) rn
from yourtable
where accountid = 6
) t1
full outer join
(
select name group2,
row_number() over(order by id) rn
from yourtable
where accountid = 8
) t2
on t1.rn = t2.rn;
See SQL Fiddle with Demo
I agree you should do this client side. But it can be done in T/SQL:
select G1.Name as Group1
, G2.Name as Group2
from (
select row_number() over (order by ID) as rn
, *
from Group
where AccountID = 6
) as G1
full outer join
(
select row_number() over (order by ID) as rn
, *
from Group
where AccountID = 8
) as G2
on G1.rn = G2.rn
order by
coalesce(G1.rn, G2.rn)