Select first row in each GROUP BY group - sql

I have a requirement in my project that I have this data with me:
C1 | C2 | C3 | C4
A | B | 2 | X
A | B | 3 | Y
C | D | 4 | Q
C | D | 1 | P
Where C1, C2, C3 and C4 are columns name in Database
And I have need to show data like this
C1 | C2 | C3 | C4
A | B | 5 | X
C | D | 5 | Q

The answer to this is fairly simple. Just follow my solution below:
--CREATE THE SAMPLE TABLE
CREATE TABLE TABLE1 (C1 char(1) NULL, C2 char(1) NULL, C3 int NULL, C4 char(1) NULL);
GO
--INSERT THE SAMPLE VALUES
INSERT INTO TABLE1 VALUES ('A', 'B', 2, 'X'), ('A', 'B', 3, 'Y'), ('C', 'D', 4, 'Q'), ('C','D', 1, 'P');
GO
--SELECT SUM(C3) AND GROUP BY ONLY C1 AND C2, THEN SELECT TOP 1 ONLY FROM C4
SELECT
C1,
C2,
SUM(C3) AS C3,
(SELECT TOP(1) C4 FROM TABLE1 AS B WHERE A.C1 = B.C1) AS C4
FROM
TABLE1 AS A
GROUP BY
C1,
C2;
GO
--CLEAN UP THE DATABASE, DROP THE SAMPLE TABLE
IF EXISTS(SELECT name FROM sys.tables WHERE object_id = OBJECT_ID(N'TABLE1')) DROP TABLE TABLE1;
GO
Let me know if this helps.

Assuming you mean the first record ordered by c4 (grouped by c1 and c2), then this will work establishing a row_number and using max with case:
with cte as (
select *,
row_number() over (partition by c1, c2 order by c4) rn
from yourtable
)
select c1, c2, sum(c3), max(case when rn = 1 then c4 end) c4
from cte
group by c1, c2
SQL Fiddle Demo
However, if you don't want to order by c4, then you need some other column to ensure the correct order of the results. Without an order by clause, there's no guarantee on how they are returned.

I hope you choose 'X' and 'Q' as those rows where inserted first, while grouping C1 and C2.
I would suggest you to add an identity column in your table and work based on it as given below.
Table:
DECLARE #DB TABLE (ID INT IDENTITY(1,1),C1 VARCHAR(10),C2 VARCHAR(10),C3 INT,C4 VARCHAR(10))
INSERT INTO #DB VALUES
('A','B',2,'X'),
('A','B',3,'Y'),
('C','D',4,'Q'),
('C','D',1,'P')
Code:
SELECT A.*,B.C4
FROM (
SELECT C1,C2,SUM(C3) C3 FROM #DB
GROUP BY C1,C2) A
JOIN
(
SELECT C1,C2,C4 FROM (
SELECT *,ROW_NUMBER() OVER (PARTITION BY C1,C2 ORDER BY ID) [ROW]
FROM #DB) LU WHERE LU.ROW = 1) B
ON A.C1 = B.C1 AND A.C2 = B.C2
Result:

Related

postgresql - count distinct combination of three columns- order doesn't matter

I'm trying to count distinct combinations of three columns, order of the columns doesn't matter
sample :
a a a
a a b
a b a
b b a
b a b
the result I'm getting :
a a a 1
a a b 1
a b a 1
b b a 1
b a b 1
desired result
aaa 1
aab 2
bba 2
You can use an ordered array
select v[1], v[2], v[3], count(*) n
from tbl t
cross join lateral (
select array_agg(col order by col) v
from (
values (c1),(c2),(c3)
) t(col)
) s
group by v[1], v[2], v[3];
db<>fiddle
Maybe you can use checksums for getting the required result eg if it is really just combinations 'a' and 'b' that you are dealing with, you could convert the letters to integers (by calling the ASCII() function) and add these up so that you get a checksum.
TABLE
create table t (c1, c2, c3 ) as
select 'a', 'a', 'a' union all
select 'a', 'a', 'b' union all
select 'a', 'b', 'a' union all
select 'b', 'b', 'a' union all
select 'b', 'a', 'b' ;
Checksums
select c1, c2, c3, ascii( c1 ) + ascii( c2 ) + ascii( c3 ) as checksum
from t ;
-- output
c1 c2 c3 checksum
a a a 291
a a b 292
a b a 292
b b a 293
b a b 293
If this works for you, then you can use window functions eg
select c1, c2, c3, rc_ as rowcount
from (
select c1, c2, c3
, count(*) over ( partition by ascii( c1 ) + ascii( c2 ) + ascii( c3 ) order by 1 ) rc_
, row_number() over ( partition by ascii( c1 ) + ascii( c2 ) + ascii( c3 ) order by 1 ) rn_
from t
) sq
where rc_ = rn_ ;
-- output
c1 c2 c3 rowcount
a a a 1
a b a 2
b a b 2
See dbfiddle.
If you are dealing with strings that cannot easily converted to integers, you could create a mapping between the strings and integers, and implement the map_ as a view (so that it is easy to use in subsequent queries) eg
MAP
-- {1} find all distinct elements
-- {2} map each element to an integer
create view map_
as
select val_, rank() over ( order by val_ ) weight_
from (
select distinct val_
from (
select distinct c1 val_ from t union all
select distinct c2 from t union all
select distinct c3 from t
) all_elements
) unique_elements ;
Once you have this map, you can use its values for creating checksums (maybe also in a view) ...
Checksums
create view t_checksums_
as
select c1, c2, c3, c1weight + c2weight + c3weight as checksum
from (
select
c1, ( select weight_ from map_ where c1 = map_.val_ ) c1weight
, c2, ( select weight_ from map_ where c2 = map_.val_ ) c2weight
, c3, ( select weight_ from map_ where c3 = map_.val_ ) c3weight
from t
) valandweight ;
... and then, you can use the same query as before, for obtaining the final result - see dbfiddle.

Repeating rows based on the frequency

I have a table with 2 columns named A and B which is defined as :
A B
c1 2
c2 3
c3 4
The expected output is :
A B
c1 1
c1 2
c2 1
c2 2
c2 3
c3 1
c3 2
c3 3
c3 4
CREATE TABLE #table2
([A] varchar(2), [B] int)
;
INSERT INTO #table2
([A], [B])
VALUES
('c1', 2),
('c2', 3),
('c3', 4)
;WITH nums AS
(SELECT 1 AS value ,a,b from #table2
UNION ALL
SELECT value + 1 AS value ,A,b
FROM nums
WHERE nums.value <B)
SELECT a,value
FROM nums order by a,value
output
a value
c1 1
c1 2
c2 1
c2 2
c2 3
c3 1
c3 2
c3 3
c3 4
Created a Table valued function in which i used Recursive cte to evaluate recurring value of given in put Then Join the table with function using Cross Apply
CREATE FUNCTION [dbo].[udf_GetData] (
#Data INT
)
RETURNS #output TABLE (
Data INT
)
BEGIN
;WITH CTe
AS
(
SELECT 1 As Data
UNION ALL
SELECT Data+1
FROM CTe
WHERE Data < #Data
)
INSERT INTO #output
SELECT Data FROM CTe
RETURN
END
Sample Data and explained How to call function using CROSS APPLY
DECLARE #Data AS TABLE (A VARCHAR(10),B INT)
INSERT INTO #Data
SELECT 'c1', 2 UNION ALL
SELECT 'c2', 3 UNION ALL
SELECT 'c3', 4
SELECT d.A,
(SELECT [dbo].[udf_GetData](d.B)) AS RecData
FROM #Data d
Result
A RecursiveData
----------------
c1 1
c1 2
c2 1
c2 2
c2 3
c3 1
c3 2
c3 3
c3 4
You could try this:
// test data
declare #tbl table(A char(2), B int);
insert into #tbl values
('c1', 2),
('c2', 3),
('c3', 4);
// create CTE with numbers which we will need to join
declare #max int;
select #max = max(B) from #tbl;
;with numbers as (
select 1 n
union all
select n + 1 from numbers
where n < #max
)
// join numbers with your table
select A, n from #tbl t
join numbers n on t.B >= n.n
order by A, n
Say, your table name is test.
WITH r(a, b, repeat) as
(SELECT a, b, 1 from test
union all
select a, b, repeat+1 from r
where r.repeat < r.b)
select * from r
ORDER BY a, repeat;

oracle transposing rows to columns

my question is about transposing rows into columns.
I have got table T1(c1,c2,c3,c4,c5) columns with datatype varchar2, i want to transpose the rows obtained,
example:
select * from T1
gives
c1 c2 c3 c4 c5
row1 1 2 3 4 5
row2 A B C D E
....
rown U V W X Y
the result expected is
C1 1 A......U
C2 2 B......V
C3 3 C......W
C4 4 D......X
C5 5 E......Y
all rows in different columns(table contains only 10-15 rows)
i have tried the following query, but it isnt giving expected result.
Select RN,value
From (
Select x.*,row_number ()
Over ( Order By c1) rn From T1 x)
Unpivot (value For value_type In (C1,c2,c3,c4,c5)
);
So you only need to pivot data again:
dbfiddle demo
select *
from (
select rn, val, col
from (select t1.*, row_number() over (order by c1) rn from t1)
unpivot (val for col in (c1, c2, c3, c4, c5)))
pivot (max(val) for rn in (1, 2, 3, 4))
order by col
You have to know how many rows are in t1 and list them all in pivot in clause (1, 2, 3, 4) alternatively adding aliases for each column.

compare rows between two ids with in the same table and insert the missing row for that id

i have a table which has structure mentioned below
let's say table name -> tab1
id c1 c2 c3 c4
1 a b 01-02-18 c row1
1 o b 01-02-18 c row2
1 a b 04-05-16 c row3
1 n g 01-02-18 d row4
2 a b 01-02-18 c row5
So i want to insert id 1 rows to id 2. As data for row1 and row5 is same for column c1,c2,c3,c4 so i want to skip row1 to be inserted for id 2 .
Table should look like this
id c1 c2 c3 c4
1 a b 01-02-18 c row1
1 o b 01-02-18 c row2
1 a b 04-05-16 c row3
1 n g 01-02-18 d row4
2 a b 01-02-18 c row5
2 o b 01-02-18 c row6
2 a b 04-05-16 c row7
2 n g 01-02-18 d row8
i have written this query but doesn't give me the expected result
for selecting the unique record based on column :
select Count(*) FROM tab1 A
WHERE Not EXISTS
(select * from tab1 B where A.c1 = B.c1 AND A.c2 = B.c2 AND A.c3= B.c3
AND A.c4 = B.c4
and B.id=2 )and A.id = 1;
for inserting the records
insert into rsk_mdl_sec_map_ts
select '2', c1, c2, c3, c4
FROM tab1 A
WHERE Not EXISTS
(select * from tab1 B where A.c1 = B.c1 AND A.c2 = B.c2 AND A.c3= B.c3
AND A.c4 = B.c4
and B.id=2 )and A.id = 1;
can anyone help what is wrong in this or suggest me some other approach to achieve the same . Thanks
First build a query that pick rows which should be insered and omits those that already exist as id=2 :
SELECT *
FROM tab1 t1
WHERE id = 1
AND NOT EXISTS (
SELECT 'anything' FROM tab1 t2
WHERE t1.c1=t2.c1
AND t1.c2=t2.c2
AND t1.c3=t2.c3
AND t1.c4=t2.c4
AND id = 2
)
| ID | C1 | C2 | C3 | C4 |
|----|----|----|-----------------------|----|
| 1 | o | b | 2018-01-02 00:00:00.0 | c |
| 1 | a | b | 2016-04-05 00:00:00.0 | c |
| 1 | n | g | 2018-01-02 00:00:00.0 | d |
Demo: http://sqlfiddle.com/#!4/d66fc/4
Next, use INSERT ... SELECT .... command, just put INSERT above the SELECT command, and use 2 constant as ID, and DISTINCT clause to remove possible duplicates:
INSERT into tab1( id, c1, c2, c3, c4 )
SELECT DISTINCT 2, c1, c2, c3, c4
FROM tab1 t1
WHERE id = 1
AND NOT EXISTS (
SELECT 'anything' FROM tab1 t2
WHERE t1.c1=t2.c1
AND t1.c2=t2.c2
AND t1.c3=t2.c3
AND t1.c4=t2.c4
AND id = 2
)
Using your test data, we can do the following cross join - which will give us each ID combined with all rows that have id 1:
select
T2.id
, T1.c1
, T1.c2
, T1.c3
, T1.c4
from (
select *
from tab1
where id = 1
) T1, ( select unique id from tab1 ) T2
;
-- result
ID C1 C2 C3 C4
1 a b 01-FEB-18 c
1 o b 01-FEB-18 c
1 a b 04-MAY-18 c
1 n g 01-FEB-18 d
2 a b 01-FEB-18 c
2 o b 01-FEB-18 c
2 a b 04-MAY-18 c
2 n g 01-FEB-18 d
Then, use the MINUS operator and insert all resulting rows.
insert into tab1 ( id, c1, c2, c3, c4 )
(
select
T2.id
, T1.c1
, T1.c2
, T1.c3
, T1.c4
from (
select *
from tab1
where id = 1
) T1, ( select unique id from tab1 ) T2
)
minus -- do not insert existing rows
select * from tab1
;
3 rows inserted.
Tab1 now contains:
select * from tab1 ;
ID C1 C2 C3 C4
1 a b 01-FEB-18 c
1 o b 01-FEB-18 c
1 a b 04-MAY-18 c
1 n g 01-FEB-18 d
2 a b 01-FEB-18 c
2 a b 04-MAY-18 c
2 n g 01-FEB-18 d
2 o b 01-FEB-18 c
See dbfiddle for more details.
The simplest solution is to use MERGE:
merge into tab1
using ( select 2 as id, c1, c2, c3, c4
from tab1
where id = 1 -- optional
) q
on (q.id = tab1.id
and q.c1 = tab1.c1
and q.c2 = tab1.c2
and q.c3 = tab1.c3
and q.c4 = tab1.c4)
when not matched then
insert values (q.id, q.c1, q.c2, q.c3, q.c4)
;
This solution will work provided tab1(id, c1, c2, c3, c4) defines a set of unique rows.
Here is a demo on SQL Fiddle.
simply create a temporary table TMP_TABLE to do full join
CREATE TABLE TMP_TABLE AS SELECT *
FROM
(SELECT DISTINCT id FROM tab1 ) a,
(SELECT DISTINCT c1, c2, c3 FROM tab1 ) b

Remove duplicate (combination of 2 columns) values in a row

I have a requirement to remove duplicate values present in a row.
like :
C1 | C2 | C3 | C4 | C5 | C6
----------------------------
1 | 2 | 1 | 2 | 1 | 3
1 | 2 | 1 | 3 | 1 | 4
1 |NULL| 1 |NULL| 1 |NULL
OUTPUT of the query should be:
C1 | C2 | C3 | C4 | C5 | C6
----------------------------
1 | 2 | 1 | 3 |NULL|NULL
1 | 2 | 1 | 3 | 1 | 4
1 |NULL|NULL|NULL|NULL|NULL
As you can see combination of 2 columns should be unique in a row.
in Row 1:
combination of 1/2 is duplicate so its removed and 1/3 is in c5/c6 is moved to c3/c4
in Row 2:
there is no duplicate in the combination of 1/2 , 1/3, 1/4 so no change in the result
in Row 3:
All the 3 combinations are same like 1/NULL is present in all the combinations so c3 to c6 is set to null.
Thanks in advance
Maybe there is a more clever way... but you could convert them to pairs, distinct (union in this case does that), then pivot back.
with pairs as (
select id, c1 as x, c2 as y from mytable
union
select id, c3, c4 from mytable
union
select id, c5, c6 from mytable
)
select id,
max(decode(rn,1,x)) c1,
max(decode(rn,1,y)) c2,
max(decode(rn,2,x)) c3,
max(decode(rn,2,y)) c4,
max(decode(rn,3,x)) c5,
max(decode(rn,3,y)) c6
from (
select id, x, y, row_number() over (partition by id) rn
from pairs
) as foo
group by id
This one works - data included for testing, but might take some time to understand
A tip: un-comment the code snippets under the -- debug lines, copy the script until just these code snippets and paste this part into an SQL prompt to test the intermediate results.
The principle is get a row identifier to "remember" the rows; then to vertically pivot - not 3 columns to one, but 6 columns to 3 pairs of columns; then, use DISTINCT to de-dupe; then get an index within the row identifier of the de-duped intermediate rows; then use that index to pivot horizontally again.
Like so:
WITH
input(c1,c2,c3,c4,c5,c6) AS (
SELECT 1, 2,1, 2,1, 3
UNION ALL SELECT 1, 2,1, 3,1, 4
UNION ALL SELECT 1,NULL::INT,1,NULL::INT,1,NULL::INT
)
,
-- need rowid
input_with_rowid AS (
SELECT ROW_NUMBER() OVER() AS rowid, * FROM input
)
,
-- three groupy of 2 columns, so pivot using 3 indexes
idx3(idx) AS (SELECT 1 UNION SELECT 2 UNION SELECT 3)
,
-- pivot vertically, two columns at a time and de-dupe
pivot_pair AS (
SELECT DISTINCT
rowid
, CASE idx
WHEN 1 THEN c1
WHEN 2 THEN c3
WHEN 3 THEN c5
END AS c1
,
CASE idx
WHEN 1 THEN c2
WHEN 2 THEN c4
WHEN 3 THEN c6
END AS c2
FROM input_with_rowid CROSS JOIN idx3
)
-- debug
-- SELECT * FROM pivot_pair ORDER BY rowid;
,
-- add sequence per rowid
pivot_pair_with_seq AS (
SELECT
rowid
, ROW_NUMBER() OVER(PARTITION BY rowid) AS seq
, c1
, c2
FROM pivot_pair
)
-- debug
-- SELECT * FROM pivot_pair_with_seq;
SELECT
rowid
, MAX(CASE seq WHEN 1 THEN c1 END) AS c1
, MAX(CASE seq WHEN 1 THEN c2 END) AS c2
, MAX(CASE seq WHEN 2 THEN c1 END) AS c3
, MAX(CASE seq WHEN 2 THEN c2 END) AS c4
, MAX(CASE seq WHEN 3 THEN c1 END) AS c5
, MAX(CASE seq WHEN 3 THEN c2 END) AS c6
FROM pivot_pair_with_seq
GROUP BY rowid
ORDER BY rowid
;
rowid|c1|c2|c3|c4|c5|c6
1| 1| 2| 1| 3|- |-
2| 1| 2| 1| 3| 1| 4
3| 1|- |- |- |- |-
Using marcothesane's idea with pivot/unpivot operators. Easier to maintain if more input columns should be deduplicated. This maintains the order of source data (column pairs) - whereas marcothesane's solution might reorder column pairs depening on input data. Also it is a little slower than marcothesane's. It works only in 11R1 and up.
WITH
input(c1,c2,c3,c4,c5,c6) AS (
SELECT 1, 2,1, 2,1, 3 from dual
UNION ALL SELECT 1, 2,1, 3,1, 4 from dual
UNION ALL SELECT 1,NULL ,1,NULL ,1,NULL from dual
)
,
-- need rowid
input_with_rowid AS (
SELECT ROW_NUMBER() OVER (order by 1) AS row_id, input.* FROM input
),
unpivoted_pairs as
(
select row_id, tuple_idx, val1, val2, row_number() over (partition by row_id, val1, val2 order by tuple_idx) as keep_first
from input_with_rowid
UnPivot include nulls(
(val1, val2) --measure
for tuple_idx in ((c1,c2) as 1,
(c3,c4) as 2,
(c5,c6) as 3)
)
)
select row_id,
t1_val1 as c1,
t1_val2 as c2,
t2_val1 as c3,
t2_val2 as c4,
t3_val1 as c5,
t3_val2 as c6
from (
select row_id,
val1, val2, row_number() over (partition by row_id order by tuple_idx) as tuple_order
from unpivoted_pairs
where keep_first = 1
)
pivot (sum(val1) as val1, sum(val2) as val2
for tuple_order in ('1' as t1, '2' as t2, '3' as t3)
)