SQL Column compare in the same table (self-join) - sql

I need a hint in order to solve this SQL (self-join) problem:
a table, with columns value and category
id || value || category || foo
------------------------------------
1 || 1 || a || 1
2 || 2 || a || 4
3 || 3 || a || 2
4 || 0 || b || 2
5 || 1 || b || 1
6 || 2 || b || 4
7 || 3 || b || 2
8 || 4 || b || 2
9 || 5 || b || 1
10 || 5 || b || 4
11 || 6 || b || 2
12 || 99 || z || 2
I would like to compare all values from category b and all values from category a and get all values that are in b and not in a or their id, so:
(0,1,2,3,4,5,5,6) "compare" (1,2,3) => (0,4,5,5,6)

ANSI SQL:
SELECT
*
FROM
tbl
WHERE
category = 'b'
AND value NOT IN (SELECT value FROM tbl WHERE category = 'a')
See it live here.

Start analyzing your task: "get all values that are in b and not in a or their id"
get all values > SELECT value FROM mytable
that are in b > WHERE category = 'b'
and not in a > AND value NOT IN (SELECT value FROM mytable WHERE category = 'a')
or their id - what should this mean?

Related

how to use xmlagg function

here is the table for reference
CREATE TABLE XX_EMPLOYEES
(
EMP_ID NUMBER NOT NULL,
EMP_FIRST_NAME VARCHAR2(250) NOT NULL,
EMP_MIDDLE_NAME VARCHAR2(250) NOT NULL,
EMP_LAST_NAME VARCHAR2(250) NOT NULL,
Hired_Date DATE NOT NULL,
Country VARCHAR2(250) NOT NULL,
Salary NUMBER NOT NULL
);
INSERT ALL
INTO XX_EMPLOYEES (EMP_ID, EMP_FIRST_NAME, EMP_MIDDLE_NAME, EMP_LAST_NAME, Hired_Date, Country, Salary) VALUES (1,'Tomm','Jef','Adam','01-Jan-2016','JORDAN',1000)
INTO XX_EMPLOYEES (EMP_ID, EMP_FIRST_NAME, EMP_MIDDLE_NAME, EMP_LAST_NAME, Hired_Date, Country, Salary) VALUES (2,'Mohammed','Ahmed','Mahmoud','15-Jul-2009','UAE',900)
INTO XX_EMPLOYEES (EMP_ID, EMP_FIRST_NAME, EMP_MIDDLE_NAME, EMP_LAST_NAME, Hired_Date, Country, Salary) VALUES (4,'Ali','Ahmad','Mahmoud','07-Jul-2000','UK',1200)
INTO XX_EMPLOYEES (EMP_ID, EMP_FIRST_NAME, EMP_MIDDLE_NAME, EMP_LAST_NAME, Hired_Date, Country, Salary) VALUES (10,'Basel','Jamal','Saeed','10-Apr-2001','UAE',1000)
SELECT * FROM dual;
I want to use XMLAGG function to return the employee full information in one line, concatenated by #$#
I have used loop function to do it and RTRIM but I need to use XMLAGG. Is it possible?
Result should be like this:
1,Tomm,Jef,Adam,01-JAN-2016,JORDAN,1000 #$# 2,Mohammed,Ahmed,Mahmoud,15-JUL-2009,UAE,900 #$# 4,Ali,Ahmad,Mahmoud,07-JUL-2000,UK,1200 #$# 10,Basel,Jamal,Saeed,10-APR-2001,UAE,1000
Statement processed.
If the final result doesn't exceed 4000 characters, listagg is simpler to use:
SQL> WITH
2 one_emp
3 AS
4 (SELECT emp_id
5 || ','
6 || emp_first_name
7 || ','
8 || emp_middle_name
9 || ','
10 || emp_last_name
11 || ','
12 || hired_date
13 || ','
14 || country
15 || ','
16 || salary AS one_employee
17 FROM xx_employees)
18 SELECT LISTAGG (one_employee, '#$#') WITHIN GROUP (ORDER BY NULL) AS result
19 FROM one_emp;
RESULT
--------------------------------------------------------------------------------
1,Tomm,Jef,Adam,01.01.16,JORDAN,1000#$#10,Basel,Jamal,Saeed,10.04.01,UAE,1000#$#
2,Mohammed,Ahmed,Mahmoud,15.07.09,UAE,900#$#4,Ali,Ahmad,Mahmoud,07.07.00,UK,1200
If the result is longer than 4000 characters (which might be, if there are many employees involved) or you just want to use xmlagg, then
SQL> WITH
2 one_emp
3 AS
4 (SELECT emp_id
5 || ','
6 || emp_first_name
7 || ','
8 || emp_middle_name
9 || ','
10 || emp_last_name
11 || ','
12 || hired_date
13 || ','
14 || country
15 || ','
16 || salary AS one_employee
17 FROM xx_employees)
18 SELECT RTRIM (
19 XMLAGG (XMLELEMENT (e, one_employee || '#$#') ORDER BY NULL).EXTRACT (
20 '//text()'),
21 '#$#') AS result
22 FROM one_emp;
RESULT
--------------------------------------------------------------------------------
1,Tomm,Jef,Adam,01.01.16,JORDAN,1000#$#2,Mohammed,Ahmed,Mahmoud,15.07.09,UAE,900
#$#4,Ali,Ahmad,Mahmoud,07.07.00,UK,1200#$#10,Basel,Jamal,Saeed,10.04.01,UAE,1000
SQL>
It isn't too difficult to convert that code into a function:
SQL> CREATE OR REPLACE FUNCTION f_test
2 RETURN CLOB
3 IS
4 retval CLOB;
5 BEGIN
6 WITH
7 one_emp
8 AS
9 (SELECT emp_id
10 || ','
11 || emp_first_name
12 || ','
13 || emp_middle_name
14 || ','
15 || emp_last_name
16 || ','
17 || hired_date
18 || ','
19 || country
20 || ','
21 || salary AS one_employee
22 FROM xx_employees)
23 SELECT LISTAGG (one_employee, '#$#') WITHIN GROUP (ORDER BY NULL)
24 INTO retval
25 FROM one_emp;
26
27 RETURN retval;
28 END;
29 /
Function created.
Let's try it:
SQL> select f_test from dual;
F_TEST
--------------------------------------------------------------------------------
1,Tomm,Jef,Adam,01.01.16,JORDAN,1000#$#10,Basel,Jamal,Saeed,10.04.01,UAE,1000#$#
2,Mohammed,Ahmed,Mahmoud,15.07.09,UAE,900#$#4,Ali,Ahmad,Mahmoud,07.07.00,UK,1200
SQL>
Without a CTE and without RTRIM:
SQL> SELECT XMLAGG (XMLELEMENT (
2 e,
3 emp_id
4 || ','
5 || emp_first_name
6 || ','
7 || emp_middle_name
8 || ','
9 || emp_last_name
10 || ','
11 || hired_date
12 || ','
13 || country
14 || ','
15 || salary
16 || '#$#')
17 ORDER BY NULL).EXTRACT ('//text()') AS result
18 FROM xx_employees;
RESULT
--------------------------------------------------------------------------------
1,Tomm,Jef,Adam,01.01.16,JORDAN,1000#$#2,Mohammed,Ahmed,Mahmoud,15.07.09,UAE,900
#$#4,Ali,Ahmad,Mahmoud,07.07.00,UK,1200#$#10,Basel,Jamal,Saeed,10.04.01,UAE,1000
#$#
SQL>

SQL Concatenate based on string inclusion

I have the following SQL table:
COL_A || COL_B ||
=========================
aa || 1 ||
aa || 2 ||
aa.bb || 3 ||
aa.bb.cc || 4 ||
aa.bb.cc || 5 ||
dd || 6 ||
dd.ee || 7 ||
As part of a SELECT query, I'd like to group by the values of Col_A and concatenate the values in Col_B based on the values in Col_A being a subset of one another. Meaning, if a value of Col_A is contained by/is equal to another value of Col_A, the corresponding Col_B of the superset/same Col_A value should be concatenated together.
Desired result:
COL_A || COL_B ||
======================================
aa || [1, 2, 3, 4, 5] ||
aa.bb || [3, 4, 5] ||
aa.bb.cc || [4, 5] ||
dd || [6, 7] ||
dd.ee || [7] ||
You can use a self join with array_agg:
select t1.col_a, array_agg(distinct t2.col_b)
from vals t1 join vals t2 on t2.col_a ~ t1.col_a
group by t1.col_a order by t1.col_a
You can do this using a lateral join
select t.cola, Concat('[',x.colB,']') ColB
from t
left join lateral (
select string_agg(colb::character,',') colB
from t t2
where t2.cola ~ t.cola
)x on true
group by t.cola, x.colb;
Working fiddle

Select all rows with equal values in 2 columns within each group

Consider the following table
ID || YEAR || TERM || NAME || UNIT
----------------------------------------
1 || 1985 || 1 || MARIE || 01VS
1 || 1986 || 2 || MARIE || 01VS
1 || 1986 || 2 || MARIE || 07GB
1 || 1986 || 3 || MARIE || 07GB
2 || 1992 || 1 || AVALON || 01VS
2 || 1992 || 2 || AVALON || 01VS
2 || 1992 || 3 || AVALON || 01VS
3 || 2001 || 1 || DENIS || 08HK
3 || 2001 || 1 || DENIS || 07GB
3 || 2001 || 2 || DENIS || 08HK
3 || 2002 || 1 || DENIS || 08HK
I wanted to write a sql query in H2 which would return all rows for each ID in which YEAR and TERM have equal values. So for the table above the result should be like below:
ID || YEAR || TERM || NAME || UNIT
----------------------------------------
1 || 1986 || 2 || MARIE || 01VS
1 || 1986 || 2 || MARIE || 07GB
3 || 2001 || 1 || DENIS || 08HK
3 || 2001 || 1 || DENIS || 07GB
You can use exists :
select t.*
from table t
where exists (select 1
from table t1
where t1.id = t.id and t1.year = t.year and
t.term = t1.term and t1.unit <> t.unit
);
Something like the below would work I think
select *
from table t
where exists (select id, term from table t2
where t2.id = t.id
and t2.term = t.term
group by id, term
having count(*) > 1)
However it would be easier if the table had a primary key of some sort.
How about joining the table to a subquery with GROUP BY and a HAVING ?
select t.*
from yourtable t
join
(
select ID, YEAR, TERM
from yourtable
group by ID, YEAR, TERM
having count(*) > 1
) d on (d.ID = t.ID and d.YEAR = t.YEAR and d.TERM = t.TERM);

How to query the latest date from each duplicated name

I have a question to query tuple(s) that have latest date of each name.
This is my example table.
ID || NAM E || DATE || INFOA || INFOB || INFOC
1 || Alice || 2015-08-20 12:0:0 || Y || N || Y
2 || Bob || 2015-08-20 12:0:0 || Y || N || Y
3 || Cheschire || 2015-08-20 12:0:0 || N || Y || Y
4 || Alice || 2015-08-25 12:0:0 || N || Y || N
5 || Bob || 2015-08-15 12:0:0 || Y || Y || N
Query I used
SELECT NAME, MAX(DATE), INFOA, INFOB, INFOC
FROM EXAMPLE_TABLE
GROUP BY NAME,INFOA,INFOB,INFOC
Result is...
Alice || 2015-08-20 12:0:0 || Y || N || Y
Bob || 2015-08-20 12:0:0 || Y || N || Y
Cheschire || 2015-08-20 12:0:0 || N || Y || Y
Alice || 2015-08-25 12:0:0 || N || Y || N
Bob || 2015-08-15 12:0:0 || Y || Y || N
But my expected result is...
Bob || 2015-08-20 12:0:0 || Y || N || Y
Cheschire || 2015-08-20 12:0:0 || N || Y || Y
Alice || 2015-08-25 12:0:0 || N || Y || N
What should I do?
Use NOT EXISTS to return a row if there are no other row with same name but a later date:
select *
from tablename t1
where NOT EXISTS (select 1 from tablename t2
where t2.name = t1.name
and t2.date > t1.date)
I tried below:
CREATE TABLE T1(AA varchar2(10),bb TIMESTAMP(6),cc varchar2(1),dd varchar2(1),ee varchar2(1));
INSERT INTO T1 VALUES ('a',systimestamp-5,'Y','N','Y');
INSERT INTO T1 VALUES ('b',systimestamp-5,'N','N','Y');
INSERT INTO T1 VALUES ('c',systimestamp-5,'N','Y','Y');
INSERT INTO T1 VALUES ('a',systimestamp-1,'N','Y','N');
insert into t1 values ('b',systimestamp-11,'Y','Y','N');
Now, below is the query I used to get output you wanted:
SELECT * FROM T1
WHERE (t1.aa, T1.BB) IN (SELECT aa, MAX(BB)
from t1 group by aa);
Output:
b 21-AUG-15 02.51.47.000000000 AM N N Y
c 21-AUG-15 02.51.47.000000000 AM N Y Y
a 25-AUG-15 02.51.48.000000000 AM N Y N
Note: as per your question, you required latest date for each name (no matter what other values would be)
use below query to get the results as you expected
select id,name,date1,infoa,infob,infoc
from
(
select id,name,date1, row_number() over (partition by name order by date1 desc) as s
,infoa,infob,infoc
from testpart
)
where s=1
order by date1
Please try with the below code snippet.
DECLARE #userData TABLE(
ID INT NOT NULL,
Name VARCHAR(MAX) NOT NULL,
[Date] DATETIME NOT NULL,
INFOA VARCHAR(MAX) NOT NULL,
INFOB VARCHAR(MAX) NOT NULL,
INFOC VARCHAR(MAX) NOT NULL
);
INSERT INTO #userData VALUES ('1','Alice','2015-08-20 12:0:0','Y','N','Y')
INSERT INTO #userData VALUES ('2','Bob','2015-08-20 12:0:0','Y','N','Y')
INSERT INTO #userData VALUES ('3','Cheschire','2015-08-20 12:0:0','N','Y','Y')
INSERT INTO #userData VALUES ('4','Alice','2015-08-25 12:0:0','N','Y','N')
INSERT INTO #userData VALUES ('5','Bob','2015-08-15 12:0:0','Y','Y','N')
SELECT a.ID,a.Name,a.Date, a.INFOA,a.INFOB,a.INFOC FROM (
select *,RANK() OVER (PARTITION BY [Name] ORDER BY [DATE] DESC) AS [Rank]
from #userData
) a where a.[Rank] = 1
ORDER BY a.ID

Pareto chart in SQL

I have table and I want to take only the columns until >80, according to U1.
T1:
User || C1 || C2 || C3 || C4 || C5 || Total
U1 || 10 || 20 || 15 || 40 || 15 || 100
U2 || 27 || 17 || 8 || 23 || 25 || 100
The answer that I look for is a table like this:
P1:
User || C2 || C3 || C4 || C5
U1 || 20 || 15 || 40 || 15
U2 || 17 || 8 || 23 || 25
SELECT *, C1+C2+C3+C4+C5 AS total
FROM t1
WHERE C1+C2+C3+C4+C5 > 80