Value need from range, based on Priority in SQL , - sql

There is 3 cases: (In all cases value needs to be picked up based on priority)
case 1 : zip exist between range
case 2: zip does not exist between range
case 3 : overlap range
Table
Temp1
state
zip_start
zip_end
Priority
Value
NY
100
200
1
A
NY
150
250
3
c
NY
null
null
2
B
Data
state
zip
NY
201
NY
400
OUTPUT :
state
zip_start
zip_end
Priority
Value
zip
NY
null
null
2
B
201
NY
null
null
2
B
400
I am trying with below code , but It's not picking the data based on priority:
SELECT ZIP,ZIP_START,ZIP_END,VALUE,PRIORITY,STATE,IX FROM
(
SELECT TMP1.*,
ROW_NUMBER () OVER (PARTITION BY STATE,ZIP ORDER BY PRIORITY ) IX
FROM
(
WITH CASE_1 AS
( SELECT
temp1.*
,DATA.ZIP
FROM TEMP1
LEFT JOIN
"DATA" ON DATA.STATE = temp1.STATE
WHERE DATA.ZIP BETWEEN TEMP1.ZIP_START AND TEMP1 .ZIP_END
),
CASE_2 AS
(
SELECT
temp1.*
,DATA.ZIP
FROM "DATA"
LEFT JOIN
TEMP1 ON DATA.STATE = temp1.STATE
WHERE (ZIP_START IS NULL OR ZIP_START = '')
AND (ZIP_END IS NULL OR ZIP_END = '')
AND Not EXISTS
(SELECT 1 FROM CASE_1 WHERE CASE_1.zip=DATA.zip
AND CASE_1.STATE=DATA.STATE)
)
SELECT * FROM CASE_1
UNION
SELECT * FROM CASE_2
)TMP1
) TMP2
WHERE TMP2.IX = 1;

From Oracle 12, you can use a LATERAL join and filter when the zip is within range or when one-or-other end of the range is NULL the ORDER BY priority and FETCH the FIRST matched ROW ONLY:
SELECT t.*, d.zip
FROM data d
CROSS JOIN LATERAL (
SELECT *
FROM temp1 t
WHERE d.state = t.state
AND (t.zip_start <= d.zip OR t.zip_start IS NULL)
AND (t.zip_end >= d.zip OR t.zip_end IS NULL)
ORDER BY priority
FETCH FIRST ROW ONLY
) t
In earlier versions, you can join the two tables and then use the ROW_NUMBER analytic function to find the best match:
SELECT state, zip_start, zip_end, priority, value, zip
FROM (
SELECT t.*,
d.zip,
ROW_NUMBER() OVER (PARTITION BY d.ROWID ORDER BY t.priority) AS rn
FROM data d
INNER JOIN temp1 t
ON ( d.state = t.state
AND (t.zip_start <= d.zip OR t.zip_start IS NULL)
AND (t.zip_end >= d.zip OR t.zip_end IS NULL))
)
WHERE rn = 1;
Which, for the sample data:
CREATE TABLE Temp1 (state, zip_start, zip_end, Priority, Value) AS
SELECT 'NY', 100, 200, 1, 'A' FROM DUAL UNION ALL
SELECT 'NY', 150, 250, 3, 'c' FROM DUAL UNION ALL
SELECT 'NY', null, null, 2, 'B' FROM DUAL;
CREATE TABLE Data (state, zip) AS
SELECT 'NY', 201 FROM DUAL UNION ALL
SELECT 'NY', 400 FROM DUAL;
Both output:
STATE
ZIP_START
ZIP_END
PRIORITY
VALUE
ZIP
NY
null
null
2
B
201
NY
null
null
2
B
400
db<>fiddle here

CREATE TABLE TEMP1 (
STATE VARCHAR(10),
ZIP_START NUMBER,
ZIP_END NUMBER,
PRIORITY NUMBER,
VAL VARCHAR(10));
INSERT INTO TEMP1 VALUES ('NY', 100,200,1,'A');
INSERT INTO TEMP1 VALUES ('NY', 150,250,3,'C');
INSERT INTO TEMP1 VALUES ('NY', null,null,2,'B');
CREATE TABLE DATATABLE (
STATE VARCHAR(10),
ZIP NUMBER
);
INSERT INTO DATATABLE VALUES ('NY', 201);
INSERT INTO DATATABLE VALUES ('NY', 400);
The main idea is to figure out first how many times your condition (zip in range between zip_start and end) is met. This is why we use count_match variable.
Once you get if your data is priority 1,2 or 3, you match your data table with the temp table to get the value associated with that priority.
SELECT
t0.STATE,
t0.ZIP_START,
t0.ZIP_END,
CASE WHEN t0.COUNT_MATCH > 1 THEN 3
WHEN t0.COUNT_MATCH = 1 THEN 1
WHEN t0.COUNT_MATCH = 0 THEN 2 END AS PRIORITY,
t.VAL,
t0.ZIP
FROM
(
SELECT
t1.STATE,
MIN(t2.ZIP_START) AS ZIP_START,
MAX(t2.ZIP_END) AS ZIP_END,
COUNT(t2.STATE) AS COUNT_MATCH,
t1.ZIP
FROM DATATABLE t1
LEFT JOIN TEMP1 t2 ON (t1.STATE = t2.STATE AND t1.ZIP>=t2.ZIP_START AND t1.ZIP <= t2.ZIP_END)
GROUP BY
t1.STATE, t1.ZIP) t0
LEFT JOIN TEMP1 t ON (t0.STATE = t.STATE AND CASE WHEN t0.COUNT_MATCH > 1 THEN 3
WHEN t0.COUNT_MATCH = 1 THEN 1
WHEN t0.COUNT_MATCH = 0 THEN 2 END = t.PRIORITY)
;

Related

How can I show only the records that satisfy 2 different conditions under the same table in SQL

I have this table with over 1,000,000 rows, and I am trying to return ONLY the records with the same [name] and have both NY = 1 and KS = 1, even though those will be in separate rows. For this example:
I want the query to return both records for James, because they both have NY = 1 and KS = 1, but if NY != 1 or KS != 1 for james, then I don't want the query to return it
There's probably a more efficient way, but you could give this a shot.
SELECT *
FROM
your_table AS a
WHERE
EXISTS(
SELECT TRUE
FROM your_table AS x
WHERE
a.number = x.number
AND x.KS = 1
)
AND EXISTS(
SELECT TRUE
FROM your_table AS x
WHERE
a.number = x.number
AND x.NY = 1
)
If you only want rows where one of these values is set:
SELECT t.*
FROM t
WHERE (x.NY = 1 AND
EXISTS (SELECT 1
FROM t t2
WHERE t2.number = t.number AND x.KS = 1
)
) AND
(x.KS = 1 AND
EXISTS (SELECT 1
FROM t t2
WHERE t2.number = t.number AND x.NY= 1
)
);
However, window functions might be a good bet:
select t.*
from (select t.*,
sum(ny) over (partition by number) as num_ny,
sum(ks) over (partition by number) as num_ks
from t
) t
where num_ny > 0 and num_ks > 0;
declare #table1 as table
(
sname varchar(50),
permno int,
symbol int,
groupdate date
)
insert into #table1(sname,permno,symbol,groupdate)
values
('ashan',1,0,'2019-01-01'),
('re',2,1,'2019-01-01'),
('saman',1,0,'2019-01-01'),
('ashan',0,1,'2019-01-01'),
('saman',1,1,'2019-01-01')
select * from #table1 a
inner join
(select x.sname from
(
select sname,count(sname)as xs
from #table1 group by sname
having count(sname)>1
)x)b on
a.sname=b.sname
where permno=1 and symbol=1

Recursive/hierarchical query in BigQuery

I have a recursion/hierarchical problem that I'm trying to figure out in BigQuery.
I have a list of employees and each employee has a manager ID. I need to be able to enter a single Employee_ID and return an array of every person beneath them.
CREATE TABLE p_RLS.testHeirarchy
(
Employee_ID INT64,
Employee_Name STRING,
Position STRING,
Line_Manager_ID INT64
);
INSERT INTO p_RLS.testHeirarchy (Employee_ID, Employee_Name, Position, Line_Manager_ID)
VALUES(1,'Joe','Worker',11),
(2,'James','Worker',11),
(3,'Jack','Worker',11),
(4,'Jill','Worker',12),
(5,'Jan','Worker',12),
(6,'Jacquie','Worker',13),
(7,'Joaquin','Worker',14),
(8,'Jeremy','Worker',14),
(9,'Jade','Worker',15),
(10,'Jocelyn','Worker',15),
(11, 'Bob', 'Store Manager',16),
(12, 'Bill', 'Store Manager',16),
(13, 'Barb', 'Store Manager',16),
(14, 'Ben', 'Store Manager',17),
(15, 'Burt', 'Store Manager',17),
(16, 'Sally','Group Manager',18),
(17, 'Sam','Group Manager',19),
(18, 'Anna', 'Ops Manager',20),
(19, 'Amy', 'Ops Manager',20),
(20, 'Zoe', 'State Manager', NULL);
My desired output would resemble:
SELECT 20 as Employee_ID, [19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] as Reports;
SELECT 11 as Employee_ID, [3,2,1] as Reports;
SELECT 1 as Employee_ID, [] as Reports;
I have got the following working but it seems very ugly/inconvenient and doesn't support unlimited levels:
WITH test as (
SELECT L0.Employee_ID, L0.Employee_Name, L0.Position, L0.Line_Manager_ID,
ARRAY_AGG(DISTINCT L1.Employee_ID IGNORE NULLS) as Lvl1,
ARRAY_AGG(DISTINCT L2.Employee_ID IGNORE NULLS) as Lvl2,
ARRAY_AGG(DISTINCT L3.Employee_ID IGNORE NULLS) as Lvl3,
ARRAY_AGG(DISTINCT L4.Employee_ID IGNORE NULLS) as Lvl4,
ARRAY_AGG(DISTINCT L5.Employee_ID IGNORE NULLS) as Lvl5,
ARRAY_AGG(DISTINCT L6.Employee_ID IGNORE NULLS) as Lvl6,
ARRAY_AGG(DISTINCT L7.Employee_ID IGNORE NULLS) as Lvl7
FROM p_RLS.testHeirarchy as L0
LEFT OUTER JOIN p_RLS.testHeirarchy L1 ON L0.Employee_ID = L1.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L2 ON L1.Employee_ID = L2.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L3 ON L2.Employee_ID = L3.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L4 ON L3.Employee_ID = L4.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L5 ON L4.Employee_ID = L5.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L6 ON L5.Employee_ID = L6.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L7 ON L6.Employee_ID = L7.Line_Manager_ID
WHERE L0.Employee_ID = 16
GROUP BY 1,2,3,4)
SELECT
Employee_ID, ARRAY_CONCAT(
IFNULL(Lvl1,[]),
IFNULL(Lvl2,[]),
IFNULL(Lvl3,[]),
IFNULL(Lvl4,[]),
IFNULL(Lvl5,[]),
IFNULL(Lvl6,[]),
IFNULL(Lvl7,[])) as All_reports
FROM test
Is there a better way to do this? Is a recursive approach possible in BigQuery?
Recursive CTE was recently introduced !
This makes things so much easier
with recursive iterations as (
select line_manager_id, employee_id, 1 pos from your_table
union all
select b.line_manager_id, a.employee_id, pos + 1
from your_table a join iterations b
on b.employee_id = a.line_manager_id
)
select line_manager_id, string_agg('' || employee_id order by pos, employee_id desc) as reports_as_list
from iterations
where not line_manager_id is null
group by line_manager_id
order by line_manager_id desc
If applied to sample data in question - output is
Below is for BigQuery Standard SQL
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
CREATE TEMP TABLE initialData AS WITH input AS (
SELECT 1 Employee_ID,'Joe' Employee_Name,'Worker' Position,11 Line_Manager_ID UNION ALL
SELECT 2,'James','Worker',11 UNION ALL
SELECT 3,'Jack','Worker',11 UNION ALL
SELECT 4,'Jill','Worker',12 UNION ALL
SELECT 5,'Jan','Worker',12 UNION ALL
SELECT 6,'Jacquie','Worker',13 UNION ALL
SELECT 7,'Joaquin','Worker',14 UNION ALL
SELECT 8,'Jeremy','Worker',14 UNION ALL
SELECT 9,'Jade','Worker',15 UNION ALL
SELECT 10,'Jocelyn','Worker',15 UNION ALL
SELECT 11, 'Bob', 'Store Manager',16 UNION ALL
SELECT 12, 'Bill', 'Store Manager',16 UNION ALL
SELECT 13, 'Barb', 'Store Manager',16 UNION ALL
SELECT 14, 'Ben', 'Store Manager',17 UNION ALL
SELECT 15, 'Burt', 'Store Manager',17 UNION ALL
SELECT 16, 'Sally','Group Manager',18 UNION ALL
SELECT 17, 'Sam','Group Manager',19 UNION ALL
SELECT 18, 'Anna', 'Ops Manager',20 UNION ALL
SELECT 19, 'Amy', 'Ops Manager',20 UNION ALL
SELECT 20, 'Zoe', 'State Manager', NULL
)
SELECT * FROM input;
CREATE TEMP TABLE ttt AS
SELECT Line_Manager_ID, ARRAY_AGG(Employee_ID) Reports FROM initialData WHERE NOT Line_Manager_ID IS NULL GROUP BY Line_Manager_ID;
LOOP
SET (run_away_stop, rows_count) = (SELECT AS STRUCT run_away_stop + 1, COUNT(1) FROM ttt);
CREATE OR REPLACE TEMP TABLE ttt1 AS
SELECT Line_Manager_ID, ARRAY(SELECT DISTINCT Employee_ID FROM UNNEST(Reports) Employee_ID ORDER BY Employee_ID DESC) Reports
FROM (
SELECT Line_Manager_ID, ARRAY_CONCAT_AGG(Reports) Reports
FROM (
SELECT t2.Line_Manager_ID, ARRAY_CONCAT(t1.Reports, t2.Reports) Reports
FROM ttt t1, ttt t2
WHERE (SELECT COUNTIF(t1.Line_Manager_ID = Employee_ID) FROM UNNEST(t2.Reports) Employee_ID) > 0
) GROUP BY Line_Manager_ID
);
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT * FROM ttt1 UNION ALL
SELECT * FROM ttt WHERE NOT Line_Manager_ID IN (SELECT Line_Manager_ID FROM ttt1);
IF (rows_count = (SELECT COUNT(1) FROM ttt) AND run_away_stop > 1) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;
SELECT Employee_ID,
(
SELECT STRING_AGG(CAST(Employee_ID AS STRING), ',' ORDER BY Employee_ID DESC)
FROM ttt.Reports Employee_ID
) Reports_as_list
FROM (SELECT DISTINCT Employee_ID FROM initialData) d
LEFT JOIN ttt ON Employee_ID = Line_Manager_ID
ORDER BY Employee_ID DESC;
with result
Row Employee_ID Reports_as_list
1 20 19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
2 19 17,15,14,10,9,8,7
3 18 16,13,12,11,6,5,4,3,2,1
4 17 15,14,10,9,8,7
5 16 13,12,11,6,5,4,3,2,1
6 15 10,9
7 14 8,7
8 13 6
9 12 5,4
10 11 3,2,1
11 10 null
12 9 null
13 8 null
14 7 null
15 6 null
16 5 null
17 4 null
18 3 null
19 2 null
20 1 null
In case if you need Reports as array - replace last statement in above script with below
SELECT Employee_ID, Reports Reports_as_array
FROM (SELECT DISTINCT Employee_ID FROM initialData) d
LEFT JOIN ttt ON Employee_ID = Line_Manager_ID
ORDER BY Employee_ID DESC;
Note: depends on level of nesting in your hierarchy - you might need to adjust 10 in OR run_away_stop > 10
To the question: "Is a recursive approach possible in BigQuery?"
Yes!
Now that BigQuery supports scripting and loops I solved some recursive problems from the Advent of Code with BigQuery:
https://towardsdatascience.com/advent-of-code-sql-bigquery-31e6a04964d4
CREATE TEMP TABLE planets AS SELECT 'YOU' planet;
LOOP
SET steps = steps+1
;
CREATE OR REPLACE TEMP TABLE planets AS
SELECT DISTINCT planet
FROM (
SELECT origin planet FROM t1 WHERE dest IN (SELECT planet FROM planets)
UNION ALL
SELECT dest planet FROM t1 WHERE origin IN (SELECT planet FROM planets)
)
;
IF 'SAN' IN (SELECT * FROM planets )
THEN LEAVE;
END IF;
END LOOP
;
SELECT steps-2
I would use a similar approach to navigate the graph and annotate all parent relationships.
Soon: I'll write a blog post on the specifics of tree traversal to get everyone under x. But this code will help you in the meantime.

ORACLE MAX GROUP BY

I am using Oracle (SQL Developer). Please find below the example and an outcome which would like to get (purpose of select is to find out people who submitted project A and have not done any activities in project B yet):
Data table:
CREATE TABLE "XXX"."TABLE1"
( "STATUS" VARCHAR2(20 BYTE),
"PROJECT_NAME" VARCHAR2(20 BYTE),
"VERSION_NUMBER" NUMBER,
"PERSON" VARCHAR2(20 BYTE)
);
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('SUBMITTED','A','0','PETER');
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('SUBMITTED','A','0','JOHN');
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('SUBMITTED','A','1','JOHN');
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('NEW','A','2','JOHN');
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('SUBMITTED','A','0','MARY');
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('SUBMITTED','B','0','PETER');
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('NEW','B','1','PETER');
Insert into XXX.TABLE1 (STATUS,PROJECT_NAME,VERSION_NUMBER,PERSON) values ('SUBMITTED','B','0','JOHN');
Created table should look like this:
TABLE1:
TABLE1.STATUS TABLE1.PROJECT_NAME TABLE1.VERSION_NUMBER TABLE1.PERSON
SUBMITTED A 0 PETER
SUBMITTED A 0 JOHN
SUBMITTED A 1 JOHN
NEW A 2 JOHN
SUBMITTED A 0 MARY
SUBMITTED B 0 PETER
NEW B 1 PETER
SUBMITTED B 0 JOHN
Result what I want get is this:
STATUS PROJECT_NAME VERSION_NUMBER PERSON STATUS_1 PROJECT_NAME_1 VERSION_NUMBER_1 PERSON_1
SUBMITTED A 0 PETER NEW B 1 PETER
SUBMITTED A 1 JOHN SUBMITTED B 0 JOHN
SUBMITTED A 0 MARY
Select which I am using now is:
select t.*,v.*
from TABLE1 t
left outer join ( select u.*
from TABLE1 u
where exists (select max(z.VERSION_NUMBER)
,z.PERSON
,z.PROJECT_NAME
from TABLE1 z
where z.PROJECT_NAME = 'B'
and u.PROJECT_NAME = z.PROJECT_NAME
and u.PERSON = z.PERSON
group by z.PERSON, z.PROJECT_NAME
having u.VERSION_NUMBER = max(z.VERSION_NUMBER))) v
on t.PERSON = v.PERSON
where exists (select max (w.VERSION_NUMBER)
,w.PERSON
,w.PROJECT_NAME
from TABLE1 w
where w.PROJECT_NAME = 'A'
and w.STATUS = 'SUBMITTED'
and t.PROJECT_NAME = w.PROJECT_NAME
and t.PERSON = w.PERSON
group by w.PERSON, w.PROJECT_NAME
having t.VERSION_NUMBER = max (w.VERSION_NUMBER))
QUESTION: What would be best(right) way to write such select (best practice), should I better use Analytic functions or use something else instead of EXISTS?
I think you've over-complicated this...
WITH
project_status (status, project_name, version_number, person)
AS
(SELECT 'SUBMITTED','A','0','PETER' FROM dual UNION ALL
SELECT 'SUBMITTED','A','0','JOHN' FROM dual UNION ALL
SELECT 'SUBMITTED','A','1','JOHN' FROM dual UNION ALL
SELECT 'NEW','A','2','JOHN' FROM dual UNION ALL
SELECT 'SUBMITTED','A','0','MARY' FROM dual UNION ALL
SELECT 'SUBMITTED','B','0','PETER' FROM dual UNION ALL
SELECT 'NEW','B','1','PETER' FROM dual UNION ALL
SELECT 'SUBMITTED','B','0','JOHN' FROM dual
)
SELECT DISTINCT
ps.person
,ps.project_name
,ps.status
FROM
project_status ps
WHERE 1=1
AND ps.project_name = 'A'
AND ps.status = 'SUBMITTED'
AND NOT EXISTS
(SELECT 1
FROM project_status ps2
WHERE ps2.person = ps.person
AND ps2.project_name = 'B'
)
;
purpose of select is to find out people who submitted project A and
have not done any activities in project B yet
If your purpose is just to get the people, then you don't need the complete rows. One method to answer this is to use group by and having:
select t1.person
from "XXX"."TABLE1" t1
group by t1.person
having sum(case when project_name = 'A' and status = 'New' then 1 else 0 end) > 0 and
sum(case when project_name = 'B' then 1 else 0 end) = 0;
If you need the complete rows, then Christian has a reasonable solution.

Combining rows to create two columns of data

I'm a bit confused on how to do this query properly. I have a table that looks like this. Where district 0 represent a value that should be applied to all district (global).
[ district ] [ code ] [ value ]
1 A 11
1 C 12
2 A 13
2 B 14
0 B 15
I have built a query (below) to combine the "global value" on each district.
[ district ] [ code ] [ district value ] [ global value ]
1 A 11 null -> row 1
1 B null 15 -> row 2
1 C 12 null -> row 3
2 A 13 null -> row 4
2 B 14 15 -> row 5
2 C null null -> row 6 (optional)
I did it by joining on the list of all possible district/code.
select all_code.district, all_code.code, table_d.value, table_g.value
from (select distinct b.district, a.code
from temp_table a
inner join (select distinct district
from temp_table
where district <> 0) b
on 1 = 1) all_code
left join temp_table table_d
on table_d.code = all_code.code
and table_d.district = all_code.district
left join temp_table table_g
on table_g.code = all_code.code
and table_g.district = 0
This query works great but seems pretty ugly. Is there a better way of doing this? (note that I don't care if row #6 is there or not).
Here's a script if needed.
create table temp_table
(
district VARCHAR2(5) not null,
code VARCHAR2(5) not null,
value VARCHAR2(5) not null
);
insert into temp_table (district, code, value)
values ('1', 'A', '11');
insert into temp_table (district, code, value)
values ('1', 'C', '12');
insert into temp_table (district, code, value)
values ('2', 'A', '13');
insert into temp_table (district, code, value)
values ('2', 'B', '14');
insert into temp_table (district, code, value)
values ('0', 'B', '15');
Here is one of the options. Since you are on 10g you can make use of partition outer join(partition by() clause) to fill the gaps:
with DCodes(code) as(
select 'A' from dual union all
select 'B' from dual union all
select 'C' from dual
),
DGlobal(code, value1) as(
select code
, value
from temp_table
where district = 0
)
select tt.district
, dc.code
, tt.value
, dg.value1 as global_value
from temp_table tt
partition by(tt.district)
right join DCodes dc
on (dc.code = tt.code)
left join DGlobal dg
on (dg.code = dc.code)
where tt.district != 0
order by 1, 2
Result:
DISTRICT CODE VALUE GLOBAL_VALUE
-------- ---- ----- ------------
1 A 11
1 B 15
1 C 12
2 A 13
2 B 14 15
2 C
I would argue that a lot of the "ugliness" comes from a lack of lookup tables for district and code. Without an authoritative source for those, you have to fabricate one from the values that are in use (hence the sub-queries with distinct).
In terms of cleaning up the query you have, the best I can come up with is to remove an unnecessary sub-query and use the proper syntax for the cross join:
SELECT a.district,
b.code,
c.value1,
d.value1
FROM (SELECT DISTINCT district FROM temp_table WHERE district <> 0) a
CROSS JOIN (SELECT DISTINCT code FROM temp_table) b
LEFT JOIN temp_table c
ON b.code = c.code AND a.district = c.district
LEFT JOIN temp_table d
ON b.code = d.code AND d.district = 0
ORDER BY district, code

Stuck on this union / except

Trying to find the best way to proceed with this, for some reason it is really tripping me up.
I have data like this:
transaction_id(pk) decision_id(pk) accepted_ind
A 1 NULL
A 2 <blank>
A 4 Y
B 1 <blank>
B 2 Y
C 1 Y
D 1 N
D 2 O
D 3 Y
Each transaction is guaranteed to have decision 1
There can be multiple decision possibilities (what-if's) type of scenarios
Accepted can have multiple values or be blank or NULL but only one can be accepted_ind = Y
I am trying to write a query to:
Return one row for each transaction_id
Return the decision_id where the accepted_ind = Y or if the transaction has no rows accepted_ind = Y, then return the row with decision_id = 1 (regardless of value in the accepted_ind)
I have tried:
1. Using logical "or" to pull the records, kept getting duplicates.
2. Using a union and except but can not quite get the logic down correctly.
Any assistance is appreciated. I am not sure why this is tripping me up so much!
Adam
Try this. Basically the WHERE clause says:
Where Accepted = 'Y'
OR
There is no accepted row for this transaction and the decision_id = 1
SELECT Transaction_id, Decision_ID, Accepted_id
FROM MyTable t
WHERE Accepted_ind = 'Y'
OR (NOT EXISTS (SELECT 1 FROM MyTable t2
WHERE Accepted_ind = 'Y'
and t2.Transaction_id = t.transaction_id)
AND Decision_id = 1)
This approach uses ROW_NUMBER() and therefore will only work on SQL Server 2005 or later
I have modified your sample data as as it stands, all transaction_id have a Y indicator!
DECLARE #t TABLE (
transaction_id NCHAR(1),
decision_id INT,
accepted_ind NCHAR(1) NULL
)
INSERT #t VALUES
( 'A' , 1 , NULL ),
( 'A' , 2 , '' ),
( 'A' , 4 , 'Y' ),
( 'B' , 1 , '' ),
( 'B' , 2 , 'N' ), -- change from your sample data
( 'C' , 1 , 'Y' ),
( 'D' , 1 , 'N' ),
( 'D' , 2 , 'O' ),
( 'D' , 3 , 'Y' )
And here is the query itself:
SELECT transaction_id, decision_id, accepted_ind FROM (
SELECT transaction_id, decision_id, accepted_ind,
ROW_NUMBER() OVER (
PARTITION BY transaction_id
ORDER BY
CASE
WHEN accepted_ind = 'Y' THEN 1
WHEN decision_id = 1 THEN 2
ELSE 3
END
) rn
FROM #t
) Raw
WHERE rn = 1
Results:
transaction_id decision_id accepted_ind
-------------- ----------- ------------
A 4 Y
B 1
C 1 Y
D 3 Y
The ROW_NUMBER() clause gives a 'priority' to each criterion you mention; we then ORDER BY to pick the best, and take the first row.
There's probably a neater/more efficient query, but I think this will get the job done. It assumes the table name is Decision:
SELECT CASE
WHEN accepteddecision.transaction_id IS NOT NULL THEN
accepteddecision.transaction_id
ELSE firstdecision.transaction_id
END AS transaction_id,
CASE
WHEN accepteddecision.decision_id IS NOT NULL THEN
accepteddecision.decision_id
ELSE firstdecision.decision_id
END AS decision_id,
CASE
WHEN accepteddecision.accepted_ind IS NOT NULL THEN
accepteddecision.accepted_ind
ELSE firstdecision.accepted_ind
END AS accepted_ind
FROM decision
LEFT OUTER JOIN (SELECT *
FROM decision AS accepteddecision
WHERE accepteddecision.accepted_ind = 'Y') AS
accepteddecision
ON accepteddecision.transaction_id = decision.transaction_id
LEFT OUTER JOIN (SELECT *
FROM decision AS firstdecision
WHERE firstdecision.decision_id = 1) AS firstdecision
ON firstdecision.transaction_id = decision.transaction_id
GROUP BY accepteddecision.transaction_id,
firstdecision.transaction_id,
accepteddecision.decision_id,
firstdecision.decision_id,
accepteddecision.accepted_ind,
firstdecision.accepted_ind
Out of interest, the following uses UNION and EXCEPT (plus a JOIN) as specified in the question title:
WITH T AS (SELECT * FROM (
VALUES ('A', 1, NULL),
('A', 2, ''),
('A', 4, 'Y'),
('B', 1, ''),
('B', 2, 'Y'),
('C', 1, 'Y'),
('D', 1, 'N'),
('D', 2, 'O'),
('D', 3, 'Y'),
('E', 2, 'O'), -- smaple data extended
('E', 1, 'N') -- smaple data extended
) AS T (transaction_id, decision_id, accepted_ind)
)
SELECT *
FROM T
WHERE accepted_ind = 'Y'
UNION
SELECT T.*
FROM (
SELECT transaction_id
FROM T
WHERE decision_id = 1
EXCEPT
SELECT transaction_id
FROM T
WHERE accepted_ind = 'Y'
) D
JOIN T
ON T.transaction_id = D.transaction_id
AND T.decision_id = 1;