Perform counting for number of occurrence in SQL

Perform counting for number of occurrence in SQL - sql

Is it common and convenient for SQL to perform such data manipulation, capturing only results in columns satisfying the conditions, and perform counting for number of occurrence? How to write SQL code to generate the desired output (if feasible).
Name is presented only when the conditions (Cond1 to Cond5) are yes.
Desired Input
ID Cond1 Cond2 Cond3 Cond4 Cond5 Name1 Name2 Name3 Name4 Name5
1 No Yes No No Yes (null) Result1 n/a (null) Result2
2 Yes No Yes No Yes Result3 n/a Result4 (null) Result5
Desired Output
ID Counting Name
1 1 Result1
1 2 Result2
2 1 Result3
2 2 Result4
2 3 Result5

This can be done with union all and row_number():
select id, row_number() over(partition by id order by seq) couting, name
from (
select id, name1 name, 1 seq from mytable where cond1 = 'Yes'
union all select id, name2, 2 from mytable where cond2 = 'Yes'
union all select id, name3, 3 from mytable where cond3 = 'Yes'
union all select id, name4, 4 from mytable where cond4 = 'Yes'
union all select id, name5, 5 from mytable where cond5 = 'Yes'
) x
order by id, rn

You can use UNPIVOT with pairs of columns and then filter on the Yes rows and use the ROW_NUMBER analytic function to get the incremental index of the result:
Query:
SELECT id,
ROW_NUMBER() OVER ( PARTITION BY id ORDER BY value ) AS "COUNT",
name
FROM table_name
UNPIVOT ( ( cond, name ) FOR value IN (
( Cond1, Name1 ) AS 'V1',
( Cond2, Name2 ) AS 'V2',
( Cond3, Name3 ) AS 'V3',
( Cond4, Name4 ) AS 'V4',
( Cond5, Name5 ) AS 'V5'
) )
WHERE cond = 'Yes'
Test Data:
CREATE TABLE table_name (
ID NUMBER(10,0) PRIMARY KEY,
Cond1 VARCHAR2(3) CHECK ( Cond1 IN ( 'Yes', 'No' ) ),
Cond2 VARCHAR2(3) CHECK ( Cond2 IN ( 'Yes', 'No' ) ),
Cond3 VARCHAR2(3) CHECK ( Cond3 IN ( 'Yes', 'No' ) ),
Cond4 VARCHAR2(3) CHECK ( Cond4 IN ( 'Yes', 'No' ) ),
Cond5 VARCHAR2(3) CHECK ( Cond5 IN ( 'Yes', 'No' ) ),
Name1 VARCHAR2(10),
Name2 VARCHAR2(10),
Name3 VARCHAR2(10),
Name4 VARCHAR2(10),
Name5 VARCHAR2(10),
CHECK ( ( Cond1 = 'Yes' AND Name1 IS NOT NULL ) OR ( Cond1 = 'No' AND ( Name1 IS NULL OR Name1 = 'n/a' ) ) ),
CHECK ( ( Cond2 = 'Yes' AND Name2 IS NOT NULL ) OR ( Cond2 = 'No' AND ( Name2 IS NULL OR Name2 = 'n/a' ) ) ),
CHECK ( ( Cond3 = 'Yes' AND Name3 IS NOT NULL ) OR ( Cond3 = 'No' AND ( Name3 IS NULL OR Name3 = 'n/a' ) ) ),
CHECK ( ( Cond4 = 'Yes' AND Name4 IS NOT NULL ) OR ( Cond4 = 'No' AND ( Name4 IS NULL OR Name4 = 'n/a' ) ) ),
CHECK ( ( Cond5 = 'Yes' AND Name5 IS NOT NULL ) OR ( Cond5 = 'No' AND ( Name5 IS NULL OR Name5 = 'n/a' ) ) )
);
INSERT INTO table_name ( ID, Cond1, Cond2, Cond3, Cond4, Cond5, Name1, Name2, Name3, Name4, Name5 )
SELECT 1, 'No', 'Yes', 'No', 'No', 'Yes', null, 'Result1', 'n/a', null, 'Result2' FROM DUAL UNION ALL
SELECT 2, 'Yes', 'No', 'Yes', 'No', 'Yes', 'Result3', 'n/a', 'Result4', null, 'Result5' FROM DUAL;
Output:
ID | COUNT | NAME
-: | ----: | :------
1 | 1 | Result1
1 | 2 | Result2
2 | 1 | Result3
2 | 2 | Result4
2 | 3 | Result5
db<>fiddle here

Another option:
SQL> with
2 test (id, cond1, cond2, cond3, cond4, cond5, name1, name2, name3, name4, name5) as
3 -- your sample data
4 (select 1, 'no' , 'yes', 'no' , 'no', 'yes', null , 'result1', 'n/a' , null, 'result2' from dual union all
5 select 2, 'yes', 'no' , 'yes', 'no', 'yes', 'result3', 'n/a' , 'result4', null, 'result5' from dual
6 ),
7 temp as
8 -- values whose COND column is 'yes'
9 (select id,
10 decode(cond1, 'yes', name1) n1,
11 decode(cond2, 'yes', name2) n2,
12 decode(cond3, 'yes', name3) n3,
13 decode(cond4, 'yes', name4) n4,
14 decode(cond5, 'yes', name5) n5
15 from test
16 ),
17 up as
18 -- unpivot data
19 (select *
20 from temp
21 unpivot (c_name for pc in (n1, n2, n3, n4, n5))
22 )
23 -- final result
24 select id,
25 row_number() over (partition by id order by c_name) counting,
26 c_name as name
27 from up
28 order by id;
ID COUNTING NAME
---------- ---------- -------
1 1 result1
1 2 result2
2 1 result3
2 2 result4
2 3 result5
SQL>

You can use CONNECT BY LEVEL for achieving the desired result as following:
SELECT
ID,
ROW_NUMBER() OVER(PARTITION BY ID ORDER BY LVL) AS "Counting",
NAME_ AS "Name"
FROM
(SELECT
T.ID,
DECODE(LVL, 1, COND1, 2, COND2, 3, COND3, 4, COND4, 5, COND5) AS COND,
DECODE(LVL, 1, NAME1, 2, NAME2, 3, NAME3, 4, NAME4, 5, NAME5) AS NAME_,
LVL AS LVL
FROM
YOUR_TABLE T join
(Select level as lvl from dual CONNECT BY LEVEL <= 5) on (1=1)
)
WHERE COND = 'Yes';
Cheers!!

Here is another option using UNPIVOT.
create table mytab(id number,
cond1 varchar2(3),
cond2 varchar2(3),
cond3 varchar2(3),
cond4 varchar2(3),
cond5 varchar2(3),
Name1 varchar2(7),
Name2 varchar2(7),
Name3 varchar2(7),
Name4 varchar2(7),
Name5 varchar2(7));
insert into mytab values(1,'No','Yes','No','No','Yes',null,'Result1','n/a',null,'Result2');
insert into mytab values(2,'Yes','No','Yes','No','Yes','Result3','n/a','Result4',null,'Result5');
commit;
select * from mytab;
Output:
ID COND1 COND2 COND3 COND4 COND5 NAME1 NAME2 NAME3 NAME4 NAME5
1 No Yes No No Yes (null) Result1 n/a (null) Result2
2 Yes No Yes No Yes Result3 n/a Result4 (null) Result5
UNPIVOT based solution.
with ns as (
select id,
n,
names
from mytab
unpivot(names for n in (name1 as 'n1',
name2 as 'n2',
name3 as 'n3',
name4 as 'n4',
name5 as 'n5'))),
cs as (
select id,
n,
condns
from mytab
unpivot(condns for n in (cond1 as 'n1',
cond2 as 'n2',
cond3 as 'n3',
cond4 as 'n4',
cond5 as 'n5')))
select ns.id,
row_number() over(partition by ns.id order by ns.n) counting,
ns.names
from ns inner join cs
on ns.id = cs.id
and ns.n = cs.n
and cs.condns = 'Yes'
order by 1,2;
Output:
ID COUNTING NAMES
1 1 Result1
1 2 Result2
2 1 Result3
2 2 Result4
2 3 Result5

Related

Using multilist column as foreign key reference

I have a table TABLEA that store data in a Columns which are basically multilist columns like this ColumnA ',2562,2563,2564,' and ColumnB with values ',121,122,123,'.
These column are actually foreign key values coming from another table.
Data is something like this in Table A.
ID NAME ColumnA ColumnB
1 ITEM1 ,2562,2563,2564, ,121,122,123
2 ITEM2 NULL ,6455,545,
3 ITEM3 ,1221,1546, NULL
4 ITEM4 NULL NULL
I want to join these columns with there parent tables and extract data.
I am hoping the result set would have 8 rows.
For example
ITEM ColumnA ColumB
ITEM1 2562 121
ITEM1 2563 122
ITEM1 2564 123
ITEM2 NULL 6455
ITEM2 NULL 545
....
I have tried this query with some help but this is not working when I try to use ColumnB as well and also it ignores the Items with NULL values.
The Column A is saving Ids of USER_GROUP table but ColumnB is fetching the Ids from some other table lets say GROUP1 and there could be another Column ColumnC that might be storing values from another table so that's kind of situation I am stuck in and hope I have explained so someone can understand but I am open if you want me to improve more
SELECT ug.*
FROM USER_GROUP ug
WHERE EXISTS (SELECT 1
FROM TableA t1
WHERE t1.COLUMNA LIKE '%,' || ug.ID || ',%'
)
AND EXISTS (SELECT 1
FROM TableA t1
WHERE t1.COLUMNB LIKE '%,' || ug.ID || ',%'
);

Here's one option:
SQL> with test (id, name, cola, colb) as
2 (select 1, 'item1', ',2562,2563,2564,', ',121,122,123,' from dual union all
3 select 2, 'item2', null , ',6455,545,' from dual union all
4 select 3, 'item3', ',1221,1546,' , null from dual union all
5 select 4, 'item4', null , null from dual
6 ),
7 remcom
8 -- remove leading and trailing commas
9 as (select id,
10 name,
11 rtrim(ltrim(cola, ','), ',') cola,
12 rtrim(ltrim(colb, ','), ',') colb
13 from test
14 )
15 select id,
16 name,
17 regexp_substr(cola, '[^,]+', 1, column_value) cola,
18 regexp_substr(colb, '[^,]+', 1, column_value) colb
19 from remcom r cross join
20 table(cast(multiset(select level from dual
21 connect by level <= regexp_count(nvl(r.cola, r.colb), ',') + 1
22 ) as sys.odcinumberlist))
23 order by id, name, cola, colb;
ID NAME COLA COLB
---------- ----- ---------- ----------
1 item1 2562 121
1 item1 2563 122
1 item1 2564 123
2 item2 545
2 item2 6455
3 item3 1221
3 item3 1546
4 item4
8 rows selected.
SQL>
Now that you have it, join this result with another table you have.
By the way, this example nicely shows what it is a bad idea to store multiple values into the same column. Don't do that.

You don't need to use (slow) regular expressions and can do it with simple string functions in a recursive sub-query factoring clause:
WITH split_data ( id, name, columna, columnb, starta, enda, startb, endb ) AS (
SELECT id,
name,
columna,
columnb,
INSTR(columna,',',1,1),
INSTR(columna,',',1,2),
INSTR(columnb,',',1,1),
INSTR(columnb,',',1,2)
FROM test_data
UNION ALL
SELECT id,
name,
columna,
columnb,
enda,
CASE WHEN enda = 0 THEN 0 ELSE INSTR(columna,',',enda+1,1) END,
endb,
CASE WHEN endb = 0 THEN 0 ELSE INSTR(columnb,',',endb+1,1) END
FROM split_data
WHERE enda > 0
OR endb > 0
)
SELECT id,
name,
CASE
WHEN starta = 0 THEN NULL
WHEN enda = 0 THEN SUBSTR( columna, starta + 1 )
ELSE SUBSTR( columna, starta + 1, enda - starta - 1 )
END AS valuea,
CASE
WHEN startb = 0 THEN NULL
WHEN endb = 0 THEN SUBSTR( columnb, startb + 1 )
ELSE SUBSTR( columnb, startb + 1, endb - startb - 1 )
END as valueb
FROM split_data
ORDER BY id, starta, startb;
Which for your test data:
CREATE TABLE test_data ( ID, NAME, ColumnA, ColumnB ) AS
SELECT 1, 'ITEM1', ',2562,2563,2564', ',121,122,123' FROM DUAL UNION ALL
SELECT 2, 'ITEM2', NULL, ',6455,545' FROM DUAL UNION ALL
SELECT 3, 'ITEM3', ',1221,1546', NULL FROM DUAL UNION ALL
SELECT 4, 'ITEM4', NULL, NULL FROM DUAL;
Outputs:
ID | NAME | VALUEA | VALUEB
-: | :---- | :----- | :-----
1 | ITEM1 | 2562 | 121
1 | ITEM1 | 2563 | 122
1 | ITEM1 | 2564 | 123
2 | ITEM2 | null | 6455
2 | ITEM2 | null | 545
3 | ITEM3 | 1221 | null
3 | ITEM3 | 1546 | null
4 | ITEM4 | null | null
db<>fiddle here

order columns by their value

I've got a table A with 3 columns that contains the same data, for exemple:
TABLE A
KEY COL1 COL2 COL3
1 A B C
2 B C null
3 A null null
4 D E F
5 null C B
6 B C A
7 D E F
As a result I expect the distinct values of this table and the order doesn't matter. So key 1 and 6 are the same and 2 and 5 also and 4 and 7. The rest is different.
Ofcourse, I can't use a distinct in my select that will only filter 4 and 7.
I could use a very complex case statement, or a select in a select with an order by. But this needs to be used in a conversion, so performance is an issue here.
Does anyone have a good performant way to do this?
The result I expect
COL1 COL2 COL3
A B C
B C null
A null null
D E F

If you can have many columns then you can UNPIVOT then order the values and then PIVOT and take the DISTINCT rows:
Oracle Setup:
CREATE TABLE table_name ( KEY, COL1, COL2, COL3 ) AS
SELECT 1, 'A', 'B', 'C' FROM DUAL UNION ALL
SELECT 2, 'B', 'C', null FROM DUAL UNION ALL
SELECT 3, 'A', null, null FROM DUAL UNION ALL
SELECT 4, 'D', 'E', 'F' FROM DUAL UNION ALL
SELECT 5, null, 'C', 'B' FROM DUAL UNION ALL
SELECT 6, 'B', 'C', 'A' FROM DUAL UNION ALL
SELECT 7, 'D', 'E', 'F' FROM DUAL
Query:
SELECT DISTINCT
COL1, COL2, COL3
FROM (
SELECT key,
value,
ROW_NUMBER() OVER ( PARTITION BY key ORDER BY value ) AS rn
FROM table_name
UNPIVOT ( value FOR name IN ( COL1, COL2, COL3 ) ) u
)
PIVOT ( MAX( value ) FOR rn IN (
1 AS COL1,
2 AS COL2,
3 AS COL3
) )
Output:
COL1 | COL2 | COL3
:--- | :--- | :---
A | B | C
B | C | null
D | E | F
A | null | null
db<>fiddle here

The complicated case expression is going to have the best performance. But the simplest method is going to be conditional aggregation:
select key,
max(case when seqnum = 1 then col end) as col1,
max(case when seqnum = 2 then col end) as col2,
max(case when seqnum = 3 then col end) as col3
from (select key,col,
row_number() over (partition by key order by col asc) as seqnum
from ((select key, col1 as col from t) union all
(select key, col2 as col from t) union all
(select key, col3 as col from t)
) kc
where col is not null
) kc
group by key;

delete duplicate rows has null

Here is the data
Id Name Value col1 col2 col3
1 test1 1 null null null
2 test1 1 x null null
3 test1 1 x y null
4 test2 2 x y z
5 test2 2 x y null
Find duplicate based on "Name" and "Value" column and delete the one which has null values in more columns.
I managed to delete duplicates by following http://www.dba-oracle.com/t_delete_duplicate_table_rows.htm#null but dint know what should be done to achieve this in SQL
Expected result
ID Name Value col1 Col2 Col3
3 test1 1 X y null
4 test2 2 x y z

Oracle Setup:
CREATE TABLE table_name ( Id, Name, Value, col1, col2, col3 ) AS
SELECT 1, 'test1', 1, null, null, null FROM DUAL UNION ALL
SELECT 2, 'test1', 1, 'x', null, null FROM DUAL UNION ALL
SELECT 3, 'test1', 1, 'x', 'y', null FROM DUAL UNION ALL
SELECT 4, 'test2', 2, 'x', 'y', 'z' FROM DUAL UNION ALL
SELECT 5, 'test2', 2, 'x', 'y', null FROM DUAL;
Query:
SELECT *
FROM (
SELECT t.*,
ROW_NUMBER() OVER (
PARTITION BY name, value
ORDER BY DECODE( col1, NULL, 0, 1 )
+ DECODE( col2, NULL, 0, 1 )
+ DECODE( col3, NULL, 0, 1 ) DESC,
col1, col2, col3
) AS rn
FROM table_name t
)
WHERE rn = 1;
Output:
ID NAME VALUE C C C RN
---------- ----- ---------- - - - ----------
3 test1 1 x y 1
4 test2 2 x y z 1

Querying a table in SQL Server based on permutation of column2 and 3

I have a table like this:
col1 col2 col3
111 1 1
222 1 0
333 0 1
444 0 0
Here col2 = 1 means col1 is commercial, col3 = 1 means col1 is retail as well. How do I get a result like below?
ID Description
111 Commercial
111 Retail
222 Commercial
333 Retail

You can do it with a UNION ALL:
SELECT ID = col1, 'Commercial' FROM MyTable WHERE col2=1
UNION ALL
SELECT ID = col1, 'Retail' FROM MyTable WHERE col3=1

Uses almost the same as above but in a single result set
Select ID = col1, t.Description
from MyTable
cross apply (select Description = 'Commercial' where col2 = 1 union
select Description = 'Retail' where coll3 = 1)t

Can be done with UNPIVOT also:
DECLARE #t TABLE
(
col1 INT ,
col2 INT ,
col3 INT
)
INSERT INTO #t
VALUES ( 111, 1, 1 ),
( 222, 1, 0 ),
( 333, 0, 1 ),
( 444, 0, 0 )
SELECT col1 ,
CASE WHEN col = 'col2' THEN 'Commercial'
ELSE 'Retail'
END AS Description
FROM #t UNPIVOT( r FOR col IN ( [col2], [col3] ) ) u
WHERE r <> 0

Find duplicates out of multiple columns

I have a tricky sql problem. Let me qive you an example
ID1 Name Name2 Name3 Name4
100 Albert Kevin Jon Alex
101 Albert Jon Kevin Alex
102 Albert Georg Alex Babera
103 Albert Stefany
Lets say ID1 gives me a project ID and Name is the main person (Albert). Name2-4 are subgroups of people who worked with Albert. Now I want to count matches between this subgroups. First I want to know exact matches. For example between 100 and 101.
Second is it possible to count how many names matches? Like one match between 101 and 100.
Thanks in advance

I know it is long and not bulletproof but it kind of does the job.
WITH source_t AS
(
SELECT 100 id, 'Albert' name, 'Kevin' name2, 'Jon' name3, 'Alex' name4 FROM DUAL UNION ALL
SELECT 101, 'Albert', 'Jon', 'Kevin', 'Alex' FROM DUAL UNION ALL
SELECT 102, 'Albert', 'Georg', 'Alex', 'Babera' FROM DUAL UNION ALL
SELECT 103, 'Albert', 'Stefany', NULL, NULL FROM DUAL
)
, tab_1 AS
(
SELECT id, name, name2 FROM source_t UNION ALL
SELECT id, name, name3 FROM source_t UNION ALL
SELECT id, name, name4 FROM source_t
)
, tab_2 AS
(
SELECT id
, name
, name2
, ROW_NUMBER() OVER (PARTITION BY id, name ORDER BY name2) AS r_number
FROM tab_1
)
, tab_3 AS
(
SELECT id
, name
, MAX(CASE WHEN r_number = 1 THEN name2 END) AS name2
, MAX(CASE WHEN r_number = 2 THEN name2 END) AS name3
, MAX(CASE WHEN r_number = 3 THEN name2 END) AS name4
FROM tab_2
GROUP BY
id
, name
)
SELECT tab_3.id
, tab_3.name
, tab_3.name2
, tab_3.name3
, tab_3.name4
, tab_4.n_count
FROM tab_3
LEFT JOIN
(
SELECT name
, name2
, name3
, name4
, COUNT(1) AS n_count
FROM tab_3
GROUP BY
name
, name2
, name3
, name4
) tab_4
ON tab_3.name = tab_4.name
and NVL(tab_3.name2, 'NULL') = NVL(tab_4.name2, 'NULL')
and NVL(tab_3.name3, 'NULL') = NVL(tab_4.name3, 'NULL')
and NVL(tab_3.name4, 'NULL') = NVL(tab_4.name4, 'NULL')
;
/*
102 Albert Alex Babera Georg 1
103 Albert Stefany NULL NULL 1
101 Albert Alex Jon Kevin 2
100 Albert Alex Jon Kevin 2
*/

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Perform counting for number of occurrence in SQL - sql

Related

Using multilist column as foreign key reference

order columns by their value

delete duplicate rows has null

Querying a table in SQL Server based on permutation of column2 and 3

Find duplicates out of multiple columns

Categories

Resources