SQL: Finding values in sets, that only appear once - sql

Using Oracle SQL, I need to find the IDs (ICFPROKEYI) that occur more then once, but have a certain field (ICFFLDC) only once:
ICFPROKEYI|ICFKAVKEYI|ICFNUMS|ICFFLDC
----------|----------|-------|-----------------------------
2234884| 5887| 0|Farbe.14870
2234884| 5887| 1|Ueberschrift_i_24291101.18563
2234884| 5888| 0|Farbe.14870
2234884| 5889| 0|Farbe.14870
2234884| 5890| 0|Farbe.14870
2234884| 5896| 0|Farbe.14870
In this case, 2234884, because it appears 6 times but has a value (Ueberschrift_i_24291101.18563) appear only once

You can try this:
select a.ICFPROKEYI from table a join table b
on a.ICFPROKEYI = b.ICFPROKEYI and a.ICFFLDC <> b.ICFFLDC

GROUP BY icfprokeyi and use HAVING count(*) > 1 to get the icfprokeyi that appear more than once and GROUP BY icfprokeyi, icffldc and use HAVING count(*) = 1 to get the icfprokeyi where the icffldc doesn't exist in another row with the same icfprokeyi. Then join both aggregations.
SELECT x1.icfprokeyi
FROM (SELECT t1.icfprokeyi
FROM elbat t1
GROUP BY t1.icfprokeyi
HAVING count(*) > 1) x1
INNER JOIN (SELECT t2.icfprokeyi
FROM elbat t2
GROUP BY t2.icfprokeyi,
t2.icffldc
HAVING count(*) = 1) x2
ON x2.icfprokeyi = x1.icfprokeyi;

Here is the postgres query:
select ICFPROKEYI,ICFFLDC from table group by ICFPROKEYI,ICFFLDC having count(*)=1;
Help yourself to write the oracle equivalent for it.

I've tried this with MySQL and might be applicable in oracle as well.
SELECT ICFPROKEYI
FROM <yourtable> WHERE ICFPROKEYI IN
(
SELECT ICFPROKEYI
FROM <yourtable>
GROUP BY ICFPROKEYI
HAVING COUNT(ICFPROKEYI) > 1
)
GROUP BY ICFPROKEYI, ICFFLDC
HAVING cnt(ICFFLDC) = 1

You can simply use GROUP BY with HAVING (two conditions) as following:
SELECT
ICFPROKEYI
FROM <yourtable>
GROUP BY ICFPROKEYI
HAVING COUNT(1) > 1
AND SUM(CASE WHEN ICFFLDC = 'Ueberschrift_i_24291101.18563'
THEN 1 END) = 1
-- Update
After #ankit specified that it can be any value(not fixed value - Ueberschrift_i_24291101.18563), OP can achieve the desired result using following query:
SELECT ICFPROKEYI
FROM
( SELECT T.ICFPROKEYI,
COUNT(1) OVER(
PARTITION BY T.ICFPROKEYI, ICFFLDC
) AS CNT
FROM YOURTABLE T
)
WHERE CNT = 1
Cheers!!

I have replicated this on my local, please find the below SQL block and try it yourself
drop table test;
CREATE TABLE test(
ICFPROKEYI INTEGER NOT NULL
,ICFKAVKEYI INTEGER NOT NULL
,ICFNUMS number(1,0) NOT NULL
,ICFFLDC VARCHAR(30) NOT NULL
);
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234884,5887,0,'Farbe.14870');
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234884,5887,1,'Ueberschrift_i_24291101.18563');
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234884,5888,0,'Farbe.14870');
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234884,5889,0,'Farbe.14870');
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234884,5888,0,'Farbe.14870');
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234884,5889,0,'Farbe.14870');
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234885,5890,0,'Farbe.14870');
INSERT INTO test(ICFPROKEYI,ICFKAVKEYI,ICFNUMS,ICFFLDC) VALUES (2234885,5896,0,'Farbe.14870');
Query
SELECT A.ICFPROKEYI FROM (select ICFPROKEYI,ICFFLDC, count(*) AS ICFFLDC_CNT from test
group by ICFPROKEYI,ICFFLDC)A LEFT OUTER JOIN
(select ICFPROKEYI,count(*) as ICFPROKEYI_CNT from test group by ICFPROKEYI)B
ON A.ICFPROKEYI = B.ICFPROKEYI WHERE A.ICFFLDC_CNT = 1AND B.ICFPROKEYI_CNT > 1 ;

Related

How to get the aggregate results for missing values as zero

My DDL is like
create table if not exists sample_t
(
id bigserial NOT NULL constraint sample_t_id primary key,
test_value varchar(255),
test varchar(255) not null,
count bigint not null
);
Sample insert queries
INSERT INTO public.sample_t (id, test_value, test, count) VALUES (1, 'CC1', 'hi-1', 11);
INSERT INTO public.sample_t (id, test_value, test, count) VALUES (2, 'CC2', 'hi-1', 10);
INSERT INTO public.sample_t (id, test_value, test, count) VALUES (3, 'CC1', 'hi-2', 4);
My Query is
select test, sum(count) from sample_t where test_value= 'CC2' group by test;
The o/p is
test | sum
hi-1 | 10
However, I want to list down missing 'test' column values as 0. So the expected o/p should look like:
test | sum
hi-1 | 10
hi-2 | 0
Instead, use conditional aggregation:
select test, sum(case when test_value = 'CC2' then count else 0 end)
from sample_t
group by test;
Alternatively, if you have a table of all test values:
select t.test, coalesce(sum(count), 0)
from test t left join
sample_t s
on s.test = t.test and s.test_value = 'CC2'
group by t.test;
The problem here is that your WHERE clause might completely filter off a test group, should none of its records have the matching test value. You may use a left join here to preserve every initial test value:
SELECT DISTINCT
s1.test,
COALESCE(s2.cnt, 0) AS cnt
FROM sample_t s1
LEFT JOIN
(
SELECT test, COUNT(*) AS cnt
FROM sample_t
WHERE test_value = 'CC2'
GROUP BY test
) s2
ON s1.test = s2.test;
Or, you could use conditional aggregation:
SELECT
test, COUNT(CASE WHEN test_value = 'CC2' THEN 1 END) cnt
FROM sample_t
GROUP BY test;

SQL case when with equality and count conditions

How can i perform a query with filtering both value and count of the element.Such as
Select
(case
when element = 'data1' and (select count(element) from mytable where element='data1') > 15 then '1'
when element = 'data2' and (select count(element) from mytable where element='data2') > 15 then '2'
.
.
.
)
from mytable
where conditions
are there any quick and simple ways to implement this?
I think you want window functions:
select (case when element = 'data1' and
count(*) over (partition by element) > 15
then '1'
when element = 'data2' and
count(*) over (partition by element) > 15
then '2'
.
.
.
)
from mytable
where conditions
For both code clarity and performance reason, I would separate the aggregating in a CTE and then invoke it in a join. If the table is big perhaps make sense you put the result in a temporary table instead of CTE for performance reasons.
;WITH ElementCTE
AS
(
SELECT element, count(Element) AS count_Element
FROM mytable
GROUP BY element
WHERE Conditions
)
SELECT
CASE ELEMENT
WHEN 'Data1' AND count_Element > 15 THEN '1'
WHEN 'Data2' AND count_Element > 15 THEN '2'
FROM mytable AS mt
INNER JOIN Element AS el
ON mt.Element = el.Element
WHERE mt.conditions
Assuming you have a table:
CREATE TABLE NULLTEST
(
TransactioNo INT,
Code VARCHAR(25)
)
INSERT INTO NULLTEST VALUES (NULL, 'TEST1');
INSERT INTO NULLTEST VALUES (NULL, 'TEST2');
INSERT INTO NULLTEST VALUES (1, 'TEST2');
The query could look like this:
SELECT
CASE
WHEN Code = 'TEST2' AND
(SELECT COUNT(1) FROM dbo.NULLTEST n WHERE n.Code = 'TEST2')> 1 THEN '1'
WHEN Code = 'TEST1' AND
(SELECT COUNT(1) FROM dbo.NULLTEST n WHERE n.Code ='TEST1')> 1 THEN '2'
ELSE '3' end Yourcolumn
FROM dbo.NULLTEST t
WHERE ...

Getting Number of Common Values from 2 comma-seperated strings

I have a table that contains comma-separated values in a column In Postgres.
ID PRODS
--------------------------------------
1 ,142,10,75,
2 ,142,87,63,
3 ,75,73,2,58,
4 ,142,2,
Now I want a query where I can give a comma-separated string and it will tell me the number of matches between the input string and the string present in the row.
For instance, for input value ',142,87,', I want the output like
ID PRODS No. of Match
------------------------------------------------------------------------
1 ,142,10,75, 1
2 ,142,87,63, 2
3 ,75,73,2,58, 0
4 ,142,2, 1
Try this:
SELECT
*,
ARRAY(
SELECT
*
FROM
unnest(string_to_array(trim(both ',' from prods), ','))
WHERE
unnest = ANY(string_to_array(',142,87,', ','))
)
FROM
prods_table;
Output is:
1 ,142,10,75, {142}
2 ,142,87,63, {142,87}
3 ,75,73,2,58, {}
4 ,142,2, {142}
Add the cardinality(anyarray) function to the last column to get just a number of matches.
And consider changing your database design.
Check This.
select T.*,
COALESCE(No_of_Match,'0')
from TT T Left join
(
select ID,count(ID) No_of_Match
from (
select ID,unnest(string_to_array(trim(t.prods, ','), ',')) A
from TT t)a
Where A in ('142','87')
group by ID
)B
On T.Id=b.id
Demo Here
OutPut
If you install the intarray extension, this gets quite easy:
select id, prods, cardinality(string_to_array(trim(prods, ','), ',')::int[] & array[142,87])
from bad_design;
Otherwise it's a bit more complicated:
select bd.id, bd.prods, m.matches
from bad_design bd
join lateral (
select bd.id, count(v.p) as matches
from unnest(string_to_array(trim(bd.prods, ','), ',')) as l(p)
left join (
values ('142'),('87') --<< these are your input values
) v(p) on l.p = v.p
group by bd.id
) m on m.id = bd.id
order by bd.id;
Online example: http://rextester.com/ZIYS97736
But you should really fix your data model.
with data as
(
select *,
unnest(string_to_array(trim(both ',' from prods), ',') ) as v
from myTable
),
counts as
(
select id, count(t) as c from data
left join
( select unnest(string_to_array(',142,87,', ',') ) as t) tmp on tmp.t = data.v
group by id
order by id
)
select t1.id, t1.prods, t2.c as "No. of Match"
from myTable t1
inner join counts t2 on t1.id = t2.id;

Remove duplicates in SQL Result set of ONE table

Afternoon/Evening all,
I'm looking for the final touches to the below query. I need to remove the duplicate occurrences of a column in a particular row. Currently using the below SQL:
SELECT CBNEW.*
FROM CallbackNewID CBNEW
INNER JOIN (SELECT IDNEW, MAX(CallbackDate) AS MaxDate
FROM CallbackNewID
GROUP BY IDNEW) AS groupedCBNEW
ON (CBNEW.CallbackDate = groupedCBNEW.MaxDate) AND (CBNEW.IDNEW = groupedCBNEW.IDNEW);
My result set looks like the below
ID RecID Comp Rem Date_ IDNEW IDOLD CB? CallbackDate
138618 83209 1 0 2012-03-16 12:40:00 83209 83209 2 16-Mar-12
138619 83209 1 0 2012-03-16 12:40:00 83209 83209 2 16-Mar-12
110470 83799 1 0 2011-07-27 11:46:00 83799 83799 10 27-Jul-11
110471 83799 1 0 2011-07-27 11:46:00 83799 83799 10 27-Jul-11
This however gives me duplicate values in the CallBackDate and IDNEW Column because in the table there are some different Primary Keys with the same IDNEW and CallbackDate values.
If I dump this result into Excel, I can just use remove duplicates on the first ID column, and the problem's solved.
But what I want to do is make sure my result only includes the FIRST instance of the ID column, where IDNEW and CallbackDate are duplicated.
I'm sure I just need to append a tiny piece of SQL, but I'm stuck if I can find the answer so far.
Your help is very much appreciated.
Try adding MIN(ID) to the inner query and then adding it also on the ON clause:
SELECT CBNEW.*
FROM CallbackNewID CBNEW
INNER JOIN (SELECT IDNEW, MIN(ID) AS MinId, MAX(CallbackDate) AS MaxDate
FROM CallbackNewID
GROUP BY IDNEW) AS groupedCBNEW
ON (CBNEW.CallbackDate = groupedCBNEW.MaxDate)
AND (CBNEW.IDNEW = groupedCBNEW.IDNEW)
AND (CBNEW.ID = groupedCBNEW.MinId) ;
sqlfiddle demo
Here is a rather "brute force" approach. It just takes the results of your original query and does Min() on [ID], Max() on [Comp] and [Rem], and GROUP BY on everything else:
SELECT
Min(t.ID) AS MinOfID,
t.RecID,
Max(t.Comp) AS MaxOfComp,
Max(t.Rem) AS MaxOfRem,
t.Date_,
t.IDNEW,
t.IDOLD,
t.[CB?],
t.CallbackDate
FROM
(
SELECT CBNEW.*
FROM
CallbackNewID CBNEW
INNER JOIN
(
SELECT IDNEW, MAX(CallbackDate) AS MaxDate
FROM CallbackNewID
GROUP BY IDNEW
) AS groupedCBNEW
ON (CBNEW.CallbackDate = groupedCBNEW.MaxDate)
AND (CBNEW.IDNEW = groupedCBNEW.IDNEW)
) t
GROUP BY
t.RecID,
t.Date_,
t.IDNEW,
t.IDOLD,
t.[CB?],
t.CallbackDate;
It might not be terribly elegant, but if it works....
In MS SQL Server, I think you are looking for the ROW_NUMBER() function.
Something like this should help you get what you are looking for:
SELECT
X.*
FROM
(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY DBNEW.IDNEW, DBNEW.MaxDate) [row_num]
FROM
CallbackNewID CBNEW
INNER JOIN
(
SELECT
IDNEW,
MAX(CallbackDate) AS MaxDate
FROM
CallbackNewID
GROUP BY
IDNEW
) AS groupedCBNEW ON (CBNEW.CallbackDate = groupedCBNEW.MaxDate) AND (CBNEW.IDNEW = groupedCBNEW.IDNEW)
) X
WHERE
X.row_num = 1
SELECT
A.*
FROM
(SELECT
*,
ROW_NUMBER() OVER (PARTITION BY IDNEW ORDER BY CallbackDate DESC)
AS [row_num]
FROM CallbackNewID
) A
WHERE
A.row_num = 1

SQL query for finding first missing sequence string (prefix+no)

T-SQL query for finding first missing sequence string (prefix+no)
Sequence can have a prefix + a continuing no.
ex sequence will be
ID
-------
AUTO_500
AUTO_501
AUTO_502
AUTO_504
AUTO_505
AUTO_506
AUTO_507
AUTO_508
So above the missing sequence is AUTO_503 or if there is no missing sequence then it must return next sequence.
Also starting no is to specified ex. 500 in this case and prefix can be null i.e. no prefix only numbers as sequence.
You could LEFT JOIN the id numbers on shifted(+1) values to find gaps in sequential order:
SELECT
MIN(a.offsetnum) AS first_missing_num
FROM
(
SELECT 500 AS offsetnum
UNION
SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) + 1
FROM tbl
) a
LEFT JOIN
(SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) AS idnum FROM tbl) b ON a.offsetnum = b.idnum
WHERE
a.offsetnum >= 500 AND b.idnum IS NULL
SQLFiddle Demo
Using a recursive CTE to dynamically generate the sequence between the min and max of the ID Numbers maybe over complicated things a bit but it seems to work -
LIVE ON FIDDLE
CREATE TABLE tbl (
id VARCHAR(55)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
;WITH
data_cte(id)AS
(SELECT [id] = CAST(REPLACE(id, 'AUTO_', '') AS INT) FROM tbl)
,maxmin_cte(minId, maxId)AS
(SELECT [minId] = min(id),[maxId] = max(id) FROM data_cte)
,recursive_cte(n) AS
(
SELECT [minId] n from maxmin_cte
UNION ALL
SELECT (1 + n) n FROM recursive_cte WHERE n < (SELECT [maxId] from maxmin_cte)
)
SELECT x.n
FROM
recursive_cte x
LEFT OUTER JOIN data_cte y ON
x.n = y.id
WHERE y.id IS NULL
Check this solution.Here you just need to add identity column.
CREATE TABLE tbl (
id VARCHAR(55),
idn int identity(0,1)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
SELECT min(idn+500) FROM tbl where 'AUTO_'+cast((idn+500) as varchar)<>id
try this:
with cte as(
select cast(REPLACE(id,'AUTO_','') as int)-500+1 [diff],ROW_NUMBER()
over(order by cast(REPLACE(id,'AUTO_','') as int)) [rnk] from tbl)
select top 1 'AUTO_'+cast(500+rnk as varchar(50)) [ID] from cte
where [diff]=[rnk]
order by rnk desc
SQL FIddle Demo
Had a similar situation, where we have R_Cds that were like this R01005
;with Active_R_CD (R_CD)
As
(
Select Distinct Cast(Replace(R_CD,'R', ' ') as Int)
from table
where stat = 1)
select Arc.R_CD + 1 as 'Gaps in R Code'
from Active_R_CD as Arc
left outer join Active_R_CD as r on ARC.R_CD + 1 = R.R_CD
where R.R_CD is null
order by 1