Complex join in sql with top 10 row - sql

Table1:
Id Word Frequency
1 A 1
2 B 5
Table2:
Id Word SecondWord SecondFrequency
1 A A1 1
2 A A2 5
3 A A3 10
4 A A4 9
5 A A5 20
6 B B1 5
7 B B2 8
8 B B3 50
9 B B4 40
10 B B5 68
Required output
Top 3 record from “Table2” with Order by SecondFrequency Desc
Ex.
Word Frequency SecondWord SecondFrequency
A 1 A5 20
A 1 A3 10
A 1 A4 9
B 5 B5 68
B 5 B3 50
B 5 B4 40
How can i get the desire output

Use ROWNUMBER function based on second frequency for get you required result:
CREATE TABLE #Table1(Id TINYINT, Word VARCHAR(1),Frequency TINYINT)
CREATE TABLE #Table2(Id TINYINT, Word VARCHAR(1),SecondWord
VARCHAR(2),SecondFrequency TINYINT)
INSERT INTO #Table1(Id, Word ,Frequency)
SELECT 1,'A',1 UNION ALL
SELECT 2,'B',5
INSERT INTO #Table2(Id, Word ,SecondWord ,SecondFrequency)
SELECT 1,'A','A1',1 UNION ALL
SELECT 2,'A','A2',5 UNION ALL
SELECT 3,'A','A3',10 UNION ALL
SELECT 4,'A','A4',9 UNION ALL
SELECT 5,'A','A5',20 UNION ALL
SELECT 6,'B','B1',5 UNION ALL
SELECT 7,'B','B2',8 UNION ALL
SELECT 8,'B','B3',50 UNION ALL
SELECT 9,'B','B4',40 UNION ALL
SELECT 10,'B','B5',68
SELECT *
FROM
(
SELECT ROW_NUMBER() OVER(PARTITION BY #Table1.Word ORDER BY
SecondFrequency DESC ) RNo ,#Table1.Word ,#Table1.Frequency,
SecondWord ,SecondFrequency
FROM #Table1
JOIN #Table2 ON #Table1.Word = #Table2.Word
) A
WHERE RNo BETWEEN 1 AND 3

you can use Row Number. By using Row Number you can give each row with the same 'word' a number based on their SecondFrequency. those number will be reset if the 'word' is changed.
;with cte as
(
select *, ROW_NUMBER() OVER (PARTITION BY Word ORDER BY SecondFrequency DESC) AS RowNumber from table2
)
select A.Word, B.Frequency, A.SecondWord, A.SecondFrequency
from cte A left join table1 B
on A.Word = B.Word
where A.RowNumber < 4

Inner Join with Row_Number() will help in this case !!!
CREATE TABLE #Table1
(
Id INT
,Word VARCHAR(10)
,Frequency INT
)
INSERT INTO #Table1 SELECT 1,'A',1
UNION SELECT 2,'B',5
CREATE TABLE #Table2
(
Id INT
,Word VARCHAR(10)
,SecondWord VARCHAR(10)
,SecondFrequency INT
)
INSERT INTO #Table2 SELECT
1,'A','A1',1 UNION ALL SELECT
2,'A','A2',5 UNION ALL SELECT
3,'A','A3',10 UNION ALL SELECT
4,'A','A4',9 UNION ALL SELECT
5,'A','A5',20 UNION ALL SELECT
6,'B','B1',5 UNION ALL SELECT
7,'B','B2',8 UNION ALL SELECT
8,'B','B3',50 UNION ALL SELECT
9,'B','B4',40 UNION ALL SELECT
10,'B','B5',68
SELECT * FROM #Table1
SELECT * FROM #Table2
SELECT X.Word,X.Frequency,X.SecondWord,X.SecondFrequency
FROM
(SELECT T1.Word,T1.Frequency,T2.SecondWord,T2.SecondFrequency,ROW_NUMBER() OVER(PARTITION BY T1.WORD ORDER BY T2.SecondFrequency desc) as RN
FROM #Table1 T1
JOIN #Table2 T2
ON T1.Word = T2.Word
) AS X
WHERE X.RN<=3

get the top 3 rows from Table_2
join the Table_1
the syntax is : ROW_NUMBER() OVER(PARTITION BY COL1 ORDER BY COL2) AS num
COL1 is the column to group and COL2 is the column to sort , num is the sorted number to be used to limit the results
SELECT t2.Word,
t1.Frequency,
t2.SecondWord,
t2.SecondFrequency
FROM
(SELECT *
FROM
(SELECT Word,
SecondWord,
SecondFrequency,
ROW_NUMBER() over(PARTITION BY Word
ORDER BY SecondFrequency DESC) AS num
FROM Table_2) T
WHERE T.num <= 3 ) t2
JOIN Table_1 AS t1 ON t2.Word = t1.Word
ORDER BY t2.SecondFrequency DESC;

Related

Find exactly equal rows in 2 tables, both in terms of value and number

I have two Table, that both of them have 2 field (provinceid,cityid)
i want to find provinceid that have exactly the same cityid in this two table.
for example i have this tables:
table1:
provinceid
cityid
1
1
1
2
2
3
2
4
3
6
table2:
provinceid
cityid
1
1
1
5
2
3
2
4
3
6
3
7
i want a query that just return provinceid =2 and city id =3 and 4.
i try this query and it is right. but i want a better query:
select provinceid ,t1.cityid
from t1
left join t2 on t1=provinceid=t2.provinceid and t1.cityid=t2.cityid
where t2.provinceid is not null and t2.cityid is not null
and t1.provinceid not in (select provinceid
from t2
left join t1 on t1=provinceid=t2.provinceid and t1.cityid=t2.cityid
where t1.provinceid is not null and t1.cityid is not null)
thank you
Try this :
select t1.provinceid ,t1.cityid
from table1 t1 join table2 t2
on t1.provinceid=t2.provinceid
and t1.cityid=t2.cityid
and t1.provinceid in (
select distinct(t1.provinceid)
from
(select provinceid, count(provinceid) as cnt from table1 group by provinceid) as t1
cross join
(select provinceid ,count(provinceid) as cnt from table2 group by provinceid) as t2
where t1.cnt = t2.cnt);
Output:
provinceid
cityid
1
1
2
3
2
4
The simplest method for an exact match is to use string aggregation. The exact syntax varies by database, but in Standard SQL this looks like:
select t1.provinceid, t2.provinceid
from (select provinceid,
listagg(cityid, ',') within group (order by cityid) as cities
from t1
group by provinceid
) t1 join
(select provinceid,
listagg(cityid, ',') within group (order by cityid) as cities
from t2
group by provinceid
) t2
on t1.cities = t2.cities;
If you want the provinceids to be the same as well, just add t1.provinceid = t2.provinceid to the on clause.
Or, if you want the provinceids to be the same, you can use full join instead:
select provinceid
from t1 full join
t2
using (provinceid, cityid)
group by provinceid
having count(*) = count(t1.cityid) and count(*) = count(t2.cityid);
Besides match in provid and cityid, we are looking for exactly matching sets of records as well. There might be many different methods to this. I prefer to have string comparison for list of cities for each provide with addition to provide and cityid match clause to remove other sets of provide and cityid which are available in tables but not the exact row match.
WITH table1 AS(
SELECT 1 AS PROVID, 1 AS CITYID FROM DUAL UNION ALL
SELECT 1 AS PROVID, 2 AS CITYID FROM DUAL UNION ALL
SELECT 2 AS PROVID, 3 AS CITYID FROM DUAL UNION ALL
SELECT 2 AS PROVID, 4 AS CITYID FROM DUAL UNION ALL
SELECT 3 AS PROVID, 6 AS CITYID FROM DUAL
),
table2 AS (
SELECT 1 AS PROVID, 1 AS CITYID FROM DUAL UNION ALL
SELECT 1 AS PROVID, 5 AS CITYID FROM DUAL UNION ALL
SELECT 2 AS PROVID, 3 AS CITYID FROM DUAL UNION ALL
SELECT 2 AS PROVID, 4 AS CITYID FROM DUAL UNION ALL
SELECT 3 AS PROVID, 6 AS CITYID FROM DUAL UNION ALL
SELECT 3 AS PROVID, 7 AS CITYID FROM DUAL
),
listed_table1 AS (
SELECT
a.provid,
listagg(cityid,',') within GROUP (ORDER BY cityid) list_city
FROM table1 a
GROUP BY a.provid
),
listed_table2 AS (
SELECT
a.provid,
listagg(cityid,',') within GROUP (ORDER BY cityid) list_city
FROM table2 a
GROUP BY a.provid
)
SELECT
t1.provid, t1.cityid
FROM
(SELECT x.*, x1.list_city FROM table1 x, listed_table1 x1 WHERE x.provid = x1.provid) t1,
(SELECT y.*, y1.list_city FROM table2 y, listed_table2 y1 WHERE y.provid = y1.provid) t2
WHERE t1.provid = t2.provid AND t1.cityid = t2.cityid AND t1.list_city = t2.list_city
;
You can use (union ..)except (inner join..) to detect non-matches. Step by step
with u12 as (
select PROVID, CITYID from table1
union
select PROVID, CITYID from table2
),
c12 as (
select t1.PROVID, t2.CITYID
from table1 t1
join table2 t2 on t1.PROVID=t2.PROVID and t1.CITYID=t2.CITYID
),
nonMatch as (
select distinct PROVID
from (
select PROVID, CITYID from u12
except
select PROVID, CITYID from c12
) t
)
select *
from table1 t
where not exists (
select 1
from nonMatch n
where n.PROVID = t.PROVID);
If a number of doubles counts then count them first
with t1 as (
select PROVID, CITYID, count(*) n
from table1
group by PROVID, CITYID
),
t2 as (
select PROVID, CITYID, count(*) n
from table2
group by PROVID, CITYID
),
u12 as (
select PROVID, CITYID, n from t1
union
select PROVID, CITYID, n from t2
),
c12 as (
select t1.PROVID, t1.CITYID, t1.n
from t1
join t2 on t1.PROVID = t2.PROVID and t1.CITYID = t2.CITYID and t1.n = t2.n
),
nonMatch as (
select distinct PROVID
from (
select PROVID, CITYID, n from u12
except
select PROVID, CITYID, n from c12
) t
)
select *
from table1 t
where not exists (
select 1
from nonMatch n
where n.PROVID = t.PROVID)
db<>fiddle

Case when duplicate add one more letter

For example: I have a table with these records below
1 A
2 A
3 B
4 C
...
and I need to migrate these record in to another table
1 AA
2 AB
3 B
4 C
...
Meaning if the record is duplicate, it will automatically add one more letter alphabetically.
Just a slightly different approach
Example
Declare #YourTable Table (ID int,[SomeCol] varchar(50))
Insert Into #YourTable Values
(1,'A')
,(2,'A')
,(3,'B')
,(4,'C')
Select *
,NewVal = concat(SomeCol,IIF(sum(1) over (partition by SomeCol)=1,'',char(64+row_number() over ( partition by SomeCol order by ID ))) )
From #YourTable
Returns
ID SomeCol NewVal
1 A AA
2 A AB
3 B B
4 C C
EDIT - Requested UPDATE
Declare #YourTable Table (ID int,[SomeCol] varchar(50))
Insert Into #YourTable Values
(1,'A')
,(2,'A')
,(3,'B')
,(4,'C')
Select *
,NewVal = concat(SomeCol,IIF(sum(1) over (partition by SomeCol)=1,'',replace(char(63+row_number() over ( partition by SomeCol order by ID )),'#','')) )
From #YourTable
Returns
ID SomeCol NewVal
1 A A
2 A AA
3 B B
4 C C
We might be able to handle this requirement with the help of a calendar table mapping secondary letters to duplicate sequence counts:
WITH letters AS (
SELECT 1 AS seq, 'A' AS let UNION ALL
SELECT 2, 'B' UNION ALL
SELECT 3, 'C' UNION ALL
...
SELECT 26, 'Z' UNION ALL
...
),
cte AS (
SELECT id, let, ROW_NUMBER() OVER (PARTITION BY let ORDER BY id) rn,
COUNT(*) OVER (PARTITION BY let) cnt
FROM yourTable
)
SELECT t1.id, t1.let + CASE WHEN t1.cnt > 1 THEN t2.let ELSE '' END AS let
FROM cte t1
LEFT JOIN letters t2
ON t1.id = t2.seq
ORDER BY t1.id;
Demo

SQL Grouping by first digit from sets of record

I need your help in SQL
I have a set of records of Cost center ID below.
what I want to do is to segregate/group them by inserting column to distinguish the category.
as you can see all digits start in 7 is belong to the bold digits.
my expected out is on below image also.
You can as the below:
DECLARE #Tbl TABLE (ID INT)
INSERT INTO #Tbl
VALUES
(735121201),
(735120001),
(5442244),
(735141094),
(735141097),
(4008060),
(735117603),
(40100000),
(735142902),
(735151199),
(4010070)
;WITH TableWithRowId
AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY(SELECT NULL)) RowId,
ID
FROM
#Tbl
), TempTable
AS
(
SELECT T.RowId + 1 AS RowId FROM TableWithRowId T
WHERE
LEFT(T.ID, 1) != 7
), ResultTable
AS
(
SELECT
T.RowId ,
T.ID,
DENSE_RANK() OVER (ORDER BY (SELECT TOP 1 A.RowId FROM TempTable A WHERE A.RowId > T.RowId ORDER BY A.RowId)) AS Flag
FROM TableWithRowId T
)
SELECT * FROM ResultTable
Result:
RowId ID Flag
----------- ----------- ----------
1 735121201 1
2 735120001 1
3 5442244 1
4 735141094 2
5 735141097 2
6 4008060 2
7 735117603 3
8 40100000 3
9 735142902 4
10 735151199 4
11 4010070 4
The following query is similer with NEER's
;WITH test_table(CenterID)AS(
SELECT '735121201' UNION ALL
SELECT '735120001' UNION ALL
SELECT '5442244' UNION ALL
SELECT '735141094' UNION ALL
SELECT '735141097' UNION ALL
SELECT '4008060' UNION ALL
SELECT '735117603' UNION ALL
SELECT '40100000' UNION ALL
SELECT '735142902' UNION ALL
SELECT '735151199' UNION ALL
SELECT '4010070'
),t1 AS (
SELECT *,ROW_NUMBER()OVER(ORDER BY(SELECT 1)) AS rn,CASE WHEN LEFT(t.CenterID,1)='7' THEN 1 ELSE 0 END AS isSeven
FROM test_table AS t
),t2 AS(
SELECT t1.*,ROW_NUMBER()OVER(ORDER BY t1.rn) AS toFilter
FROM t1 LEFT JOIN t1 AS pt ON pt.rn=t1.rn-1
WHERE pt.CenterID IS NULL OR (t1.isSeven=1 AND pt.isSeven=0)
)
SELECT t1.CenterID,x.toFilter FROM t1
CROSS APPLY(SELECT TOP 1 t2.toFilter FROM t2 WHERE t2.rn<=t1.rn ORDER BY rn desc) x
CenterID toFilter
--------- --------------------
735121201 1
735120001 1
5442244 1
735141094 2
735141097 2
4008060 2
735117603 3
40100000 3
735142902 4
735151199 4
4010070 4

How to add rows to second table from the first table using Common table expression in SQL Server

For example, I have the first table and I want to fill the second table using the first table.
Table A
Name A B C D
-----------------------
name1 a1 b1 c1 d1
name2 a2 b2 c2 d2
Table B (Desired Format)
ID Name Code
----------------------
1 name1 a1
2 name1 b1
3 name1 c1
4 name1 d1
5 name2 a2
6 name2 b2
7 name2 c2
8 name2 d2
As per the suggestions of #Surendra Nath GM, I tried this
DECLARE #counter as int
SET #counter = 1;
;WITH Actual AS
(
SELECT ROW_NUMBER() OVER( ORDER BY IDKEY) as ID FROM Northwind.dbo.Table1
WHERE
),FIRST AS
(
SELECT ((ROW_NUMBER() OVER( ORDER BY IDKEY))*4-3) AS ID,Name, A
FROM Northwind.dbo.Table1
),SECOND AS
(
SELECT ((ROW_NUMBER() OVER( ORDER BY IDKEY))*4-2) AS ID, Name ,B
from Northwind.dbo.Table1
), NEXT AS
(
SELECT ((ROW_NUMBER() OVER( ORDER BY IDKEY))*4-1) AS ID, Name, C
from Northwind.dbo.Table1next
), ________ as
(
SELECT ((ROW_NUMBER() OVER( ORDER BY IDKEY))*4) AS ID, Name, D
from Northwind.dbo.Table1
)
#counter = #counter+1;
)
SELECT * FROM FIRST
UNION ALL
SELECT * FROM SECOND
UNION ALL
SELECT * FROM NEXT
UNION ALL
SELECT * FROM _________
ORDER BY ID
Here, what do I use after the "SECOND AS". I tried writing "THIRD AS" but apparently no such command exists so after some searching, I could write the third row using "NEXT AS" but I am completely clueless as to how do I insert the Fourth row.
I know I can simply write 4 insert commands for each of A,B,C and D but then I will get the not codes in the following order which is NOT desired:
Table B(NOT DESIRED IN THIS FORMAT)
ID Name Code
----------------------
1 name1 a1
2 name2 a2
3 name1 b1
4 name2 b2
5 name1 c1
6 name2 c2
7 name1 d1
8 name2 d2
Also, the desired format can be achieved using simple loops but in the project I am working on, there are around 200000 rows and the simple loops method takes a lot of time. So, I want to do it using CTE. Please help.
You can do it with two inserts:
insert into tableB(A, B)
select A1, B1
from tableA;
insert into tableB(A, B)
select A2, B2
from tableA;
Or union the table with one insert:
insert into tableB(A, B)
select A1, B1
from tableA union all
select A2, B2
from tableA;
This assumes that id is defined as id int indentity(1, 1) so it automatically increments.
If not, you could assign it as:
insert into tableB(id, A, B)
select row_number() over (order by (select NULL)) as id, A1, B1
from (select A1, B1
from tableA union all
select A2, B2
from tableA
) t
getting the ID will be the tricky part over here, in order to do that and use the set opertaions, you have to split the job into two and generate odd ID's for the select of A1,B1 and generate event ID's for the select of A2,B2
Shown as below
;WITH Actual AS
(
SELECT ROW_NUMBER() OVER( ORDER BY Name) as RN, * FROM Table1
),FIRST AS
(
SELECT ((RN*2)-1) AS ID,A1,B1
FROM TABLE1
ORDER BY RN
),SECOND AS
(
SELECT (RN*2) AS ID,A2,B2
FROM TABLE1
ORDER BY RN
)
SELECT * FROM FIRST
UNION ALL
SELECT * FROM SECOND
if you have four columns then you can use the below query instead
;WITH Actual AS
(
SELECT ROW_NUMBER() OVER( ORDER BY Name) as RN, * FROM Table1
),FIRST AS
(
SELECT ((RN*2)-1) AS ID,A1,B1,C1,D1
FROM TABLE1
ORDER BY RN
),SECOND AS
(
SELECT (RN*2) AS ID,A2,B2,C2,D2
FROM TABLE1
ORDER BY RN
)
SELECT * FROM FIRST
UNION ALL
SELECT * FROM SECOND

Is it possible to write a sql query that is grouped based on a running total of a column?

It would be easier to explain with an example. Suppose I wanted to get at most 5 items per group.
My input would be a table looking like this:
Item Count
A 2
A 3
A 3
B 4
B 4
B 5
C 1
And my desired output would look like this:
Item Count
A 5
A>5 3
B 4
B>5 9
C 1
An alternative output that I could also work with would be
Item Count RunningTotal
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
I can use ROW_NUMBER() to get the top X records in each group, however my requirement is to get the top X items for each group, not X records. My mind is drawing a blank as to how to do this.
declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.Item, t1.Count, sum(t2.count) as RunningTotal from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
Result:
Item Count RunningTotal
---- ----------- ------------
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
Considering the clarifications from your comment, you should be able to produce the second kid of output from your post by running this query:
select t.Item
, t.Count
, (select sum(tt.count)
from mytable tt
where t.item=tt.item and (tt.creating_user_priority < t.creating_user_priority or
( tt.creating_user_priority = t.creating_user_priority and tt.created_date < t.createdDate))
) as RunningTotal
from mytable t
declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.row, t1.Item, t1.Count, sum(t2.count) as RunningTotal
into #RunTotal
from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
alter table #RunTotal
add GrandTotal int
update rt
set GrandTotal = gt.Total
from #RunTotal rt
left join (
select Item, sum(Count) Total
from #RunTotal rt
group by Item) gt
on rt.Item = gt.Item
select Item, max(RunningTotal)
from #RunTotal
where RunningTotal <= 5
group by Item
union
select a.Item + '>5', total - five
from (
select Item, max(GrandTotal) total
from #RunTotal
where GrandTotal > 5
group by Item
) a
left join (
select Item, max(RunningTotal) five
from #RunTotal
where RunningTotal <= 5
group by Item
) b
on a.Item = b.Item
I've updated the accepted answer and got your desired result.
SELECT Item, SUM(Count)
FROM mytable t
GROUP BY Item
HAVING SUM(Count) <=5
UNION
SELECT Item, 5
FROM mytable t
GROUP BY Item
HAVING SUM(Count) >5
UNION
SELECT t2.Item + '>5', Sum(t2.Count) - 5
FROM mytable t2
GOUP BY Item
HAVING SUM(Count) > 5
ORDER BY 1, 2
select 'A' as Name, 2 as Cnt
into #tmp
union all select 'A',3
union all select 'A',3
union all select 'B',4
union all select 'B',4
union all select 'B',5
union all select 'C',1
select Name, case when sum(cnt) > 5 then 5 else sum(cnt) end Cnt
from #tmp
group by Name
union
select Name+'>5', sum(cnt)-5 Cnt
from #tmp
group by Name
having sum(cnt) > 5
Here is what I have so far. I know it's not complete but... this should be a good starting point.
I can get your second output by using a temp table and an update pass:
DECLARE #Data TABLE
(
ID INT IDENTITY(1,1) PRIMARY KEY
,Value VARCHAR(5)
,Number INT
,Total INT
)
INSERT INTO #Data (Value, Number) VALUES ('A',2)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',5)
INSERT INTO #Data (Value, Number) VALUES ('C',1)
DECLARE
#Value VARCHAR(5)
,#Count INT
UPDATE #Data
SET
#Count = Total = CASE WHEN Value = #Value THEN Number + #Count ELSE Number END
,#Value = Value
FROM #Data AS D
SELECT
Value
,Number
,Total
FROM #Data
There may be better ways, but this should work.