Consider table Address , with fields Country, State, and other data fields. I want to get all the records except for those with Country,State combination as (US, IL), (US,LA), (IND,DEL)
The query goes like
Select * from Address a
where not exists
(
select Country,State
(select 'US' as Country, 'IL' as State
union
select 'US' as Country, 'LA' as State
union
select 'IND' as Country, 'DEL' as State
) e
where e.Country != a.Country and e.State != a.state
)
How can it be easily achieved (to replace coutry,state combination of union with simple subquery)? As total data is not very large, i am least bothered about performance for now.
I know i can create table variable, add all literal combination there using insert into syntax, and use table variable for not exists, but i feel it is overkill for small requirement (not exists on 2 variables).
Looks like your query tried to do this:
select *
from Address a
where not exists (
select *
from (
select 'US' as Country, 'IL' as State union all
select 'US' as Country, 'LA' as State union all
select 'IND' as Country, 'DEL' as State
) e
where e.Country = a.Country and
e.State = a.State
)
Or you could not use a derived table and still get the same result
select *
from Address as a
where not (
a.Country = 'US' and a.State = 'IL' or
a.Country = 'US' and a.State = 'LA' or
a.Country = 'IND' and a.State = 'DEL'
)
Simply use the values directly in the query:
-- Sample data.
declare #Table as Table ( Country VarChar(6), State VarChar(6), Foo VarChar(6) );
insert into #Table ( Country, State, Foo ) values
( 'US', 'IL', 'one' ), ( 'XX', 'LA', 'two' ), ( 'IND', 'XXX', 'three' ), ( 'IND', 'DEL', 'four' );
select * from #Table;
-- Demonstrate excluding specific combinations.
select T.*
from #Table as T left outer join
( values ( 'US', 'IL' ), ( 'US', 'LA' ), ( 'IND', 'DEL' ) ) as Exclude( Country, State )
on T.Country = Exclude.Country and T.State = Exclude.State
where Exclude.Country is NULL;
or
select *
from Address a
left outer join
( select 'US' as Country, 'IL' as State
union select 'US', 'LA'
union select 'IND', 'DEL' ) as n
on a.Country = n.Country and a.State = n.State
where n.Country is NULL;
Related
I have a query that is doing conditional matching on records for the sake of creating a "golden record" on a people table. In doing so, another requirement is to rank attributes within the matches by the most common occurrence so that the golden record has the best values. This is an over simplification of the process I am working on but it does show what I'm trying to do.
I believe what I have is working, but I think there is probably better ways to do it. I have separated each logic step into a temp table to better demonstrate what I'm doing.
Step1: Self Join to match on multiple rules. This or that or that...
Step2: Rank attributes within matching records by Count (occurrence)
Step3: Create a Golden Record for the matches and choose which
Attributes win base on previous steps' counts Step4: Assign Golden
Records to original People records
Here's some example data and my existing queries to show the logical steps:
CREATE TABLE Persons (
ID int IDENTITY(1,1),
FirstName varchar(255),
LastName varchar(255),
Address1 varchar(255),
City varchar(255),
State varchar(255),
BDay Varchar(255),
Email Varchar(255)
);
INSERT INTO Persons
SELECT 'RICK', 'ALLEN', '44 Street', 'Minneapolis', 'MN', '1/2/1970','help#test.com'
UNION ALL
SELECT 'JENNIFER', 'ALLEN', '123 Street', 'Minneapolis', 'MN', '4/8/1980','test#test.com'
UNION ALL
SELECT 'JENNIFER', 'ALLEN', '123 Street', 'Minneapolis', 'MN', '4/8/1981','test#test.com'
UNION ALL
SELECT 'JENNIFER', 'ALLEN', '42 Street', 'Minneapolis', 'MN', '4/8/1980','test#test.com'
UNION ALL
SELECT 'JENNIFER', 'ALLEN', '123 Street', 'Minneapolis', 'MN', '4/8/1980','test2#test.com'
UNION ALL
SELECT 'STEVEN', 'ALLEN', '555 Street', 'Minneapolis', 'MN', '2/8/1980','help#test.com'
SELECT * FROM Persons;
SELECT p1.FirstName
, p1.LastName
, p1.Address1
, p1.BDay
, p1.Email
, COUNT(1) OVER (PARTITION BY p2.ID) AS [MatchCount]
, COUNT(1) OVER (PARTITION BY p2.ID, p1.FirstName) AS [MatchCount_FirstName]
, COUNT(1) OVER (PARTITION BY p2.ID, p1.Address1) AS [MatchCount_Address1]
, COUNT(1) OVER (PARTITION BY p2.ID, p1.BDay) AS [MatchCount_BDay]
, COUNT(1) OVER (PARTITION BY p2.ID, p1.Email) AS [MatchCount_Email]
, p1.ID as OriginalID
, p2.ID as DupeID
INTO #tmp_dups
FROM Persons p1
INNER JOIN Persons p2
ON p1.FirstName = p2.FirstName AND
p1.LastName = p2.LastName AND
(
p1.Address1 = p2.Address1 OR
p1.BDay = p2.BDay OR
p1.Email = p2.Email
)
SELECT MIN(a.OriginalID) as OriginalID
, a.DupeID
INTO #tmp_matches
FROM #tmp_dups AS a
GROUP BY a.DupeID
SELECT
*
, ROW_NUMBER() OVER (PARTITION BY [DupeID] ORDER BY [MatchCount_FirstName] DESC) AS [Match_RankByCount_FirstName]
, ROW_NUMBER() OVER (PARTITION BY [DupeID] ORDER BY [MatchCount_Address1] DESC) AS [Match_RankByCount_Address1]
, ROW_NUMBER() OVER (PARTITION BY [DupeID] ORDER BY [MatchCount_BDay] DESC) AS [Match_RankByCount_BDay]
, ROW_NUMBER() OVER (PARTITION BY [DupeID] ORDER BY [MatchCount_Email] DESC) AS [Match_RankByCount_Email]
INTO #tmp_rankdups
FROM #tmp_dups
SELECT ROW_NUMBER() OVER(ORDER BY a.DupeID) + 100000 AS GoldenRecordID
--, MIN(a.OriginalID) as OriginalID
, a.DupeID
, MAX(CASE WHEN [Match_RankByCount_FirstName] = 1 THEN a.[FirstName] END) AS [FirstName]
, a.LastName
, MAX(CASE WHEN [Match_RankByCount_Address1] = 1 THEN a.[address1] END) AS [address1]
, MAX(CASE WHEN [Match_RankByCount_BDay] = 1 THEN a.[BDay] END) AS [BDay]
, MIN(CASE WHEN [Match_RankByCount_Email] = 1 THEN a.[Email] END) AS [Email]
INTO #tmp_goldenrecords
FROM #tmp_rankdups AS a
GROUP BY a.DupeID
, a.LastName
HAVING MIN(a.OriginalID) = a.DupeID
SELECT * FROM Persons AS p
INNER JOIN #tmp_matches AS m
ON p.ID = m.DupeID
INNER JOIN #tmp_goldenrecords AS g
ON g.DupeID = m.OriginalID
Is there a way to delete/update nested field in bigquery?
Let's say I have this data
wives.age wives.name name
21 angel adam
20 kale
21 victoria rossi
20 jessica
or in json:
{"name":"adam","wives":[{"name":"angel","age":21},{"name":"kale","age":20}]}
{"name":"rossi","wives":[{"name":"victoria","age":21},{"name":"jessica","age":20}]}
As you can see from the data above.
Adam has 2 wives, named angel and kale. How to:
Delete kale record.
Update jessica to dessica
I tried to google this, but can't find it. I also tried to unnest, etc but no luck.
The reason why we want to do this is because we insert the array to the wrong records and want to remove/update array data with some condition.
Below is for BigQuery Standard SQL
#standardSQL
WITH updates AS (
SELECT 'rossi' name, 'jessica' oldname, 'dessica' newname UNION ALL
SELECT 'rossi' name, 'victoria' oldname, 'polly' newname UNION ALL
SELECT 'adam' name, 'angel' oldname, 'jen' newname
), divorces AS (
SELECT 'adam' name, 'kale' wifename UNION ALL
SELECT 'adam' name, 'milly' wifename UNION ALL
SELECT 'rossi' name, 'linda' wifename
)
SELECT t.name,
ARRAY(
SELECT AS STRUCT
age,
CASE
WHEN NOT oldname IS NULL THEN newname
ELSE name
END name
FROM UNNEST(wives)
LEFT JOIN UNNEST(updates) ON t.name = u.name AND name = oldname
LEFT JOIN UNNEST(divorces) AS wifename ON t.name = d.name AND name = wifename
WHERE wifename IS NULL
) waves
FROM `project.dataset.table` t
LEFT JOIN (
SELECT name, ARRAY_AGG(STRUCT(oldname, newname)) updates
FROM updates GROUP BY name
) u ON t.name = u.name
LEFT JOIN (
SELECT name, ARRAY_AGG(wifename) divorces
FROM divorces GROUP BY name
) d ON t.name = d.name
You can test / play with above using dummy data as below
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'adam' name, [STRUCT<age INT64, name STRING>(21, 'angel'), (20, 'kale'), (22, 'milly')] wives UNION ALL
SELECT 'rossi', [STRUCT<age INT64, name STRING>(21, 'victoria'), (20, 'jessica'), (23, 'linda')]
), updates AS (
SELECT 'rossi' name, 'jessica' oldname, 'dessica' newname UNION ALL
SELECT 'rossi' name, 'victoria' oldname, 'polly' newname UNION ALL
SELECT 'adam' name, 'angel' oldname, 'jen' newname
), divorces AS (
SELECT 'adam' name, 'kale' wifename UNION ALL
SELECT 'adam' name, 'milly' wifename UNION ALL
SELECT 'rossi' name, 'linda' wifename
)
SELECT t.name,
ARRAY(
SELECT AS STRUCT
age,
CASE
WHEN NOT oldname IS NULL THEN newname
ELSE name
END name
FROM UNNEST(wives)
LEFT JOIN UNNEST(updates) ON t.name = u.name AND name = oldname
LEFT JOIN UNNEST(divorces) AS wifename ON t.name = d.name AND name = wifename
WHERE wifename IS NULL
) waves
FROM `project.dataset.table` t
LEFT JOIN (
SELECT name, ARRAY_AGG(STRUCT(oldname, newname)) updates
FROM updates GROUP BY name
) u ON t.name = u.name
LEFT JOIN (
SELECT name, ARRAY_AGG(wifename) divorces
FROM divorces GROUP BY name
) d ON t.name = d.name
result is as expected
name waves.age waves.name
adam 21 jen
rossi 21 polly
20 dessica
I hope you will be able to apply above to your real case :o)
I have a query where I build a table of values and I union it with another query. I wish to return a set of results where one value between the 2 tables does not match. (query example below). Every time I try to execute, I get the dreaded invalid identifier error, but I have no idea why. Any suggestions would be appreciated!
select * from (
select '1234567' as empno, 'A' as status, 'Active' as st_name from dual union all
select '89012345' as empno, 'DA' as status, 'Inactive' as st_name from dual) ft
union
select id,status,statusnm from second_table st
where st.id = ft.empno
and st.statusnm <> ft.st_name;
Any thoughts/advice?
You don't want union, you want join . . . and it should be explicit:
select st.*
from (select '1234567' as empno, 'A' as status, 'Active' as st_name from dual
union all
select '89012345' as empno, 'DA' as status, 'Inactive' as st_name from dual
) ft join
second_table st
on st.id = ft.empno and st.statusnm <> ft.st_name
Need help for one query which is fetching result from multiple rows based on some condition. For e.g. we have table with [Roll no] with [subjects]. Table can have multiple records for the same [Roll No]. My requirement is if the Student opt for only 'English' then result should return 'E', if Maths then 'M' and if both then 'B'.
// I think this is what you want.
INSERT INTO dbo.rolls
( name, subject )
VALUES ( 'Jones', 'English'),
( 'Smith', 'Math'),
('Adams','English'),
('Adams', 'Math')
GO
;WITH CTE AS (
SELECT subquery1.name, 'B' AS code FROM (
SELECT name,COUNT(name) AS cnt
FROM rolls
WHERE subject = 'English' OR subject = 'Math'
GROUP BY name
HAVING COUNT(name) > 1 ) AS subquery1
UNION
SELECT subquery2.name, SUBSTRING(rolls.subject,1,1) AS code FROM (
SELECT name,COUNT(name) AS cnt
FROM rolls
WHERE subject = 'English' OR subject = 'Math'
GROUP BY name
HAVING COUNT(name) = 1 ) AS subquery2
INNER JOIN dbo.rolls
ON rolls.name = subquery2.name
)
SELECT * FROM CTE
I have two tables:
tbl1: schoolID schoolname
tbl2 StudentID, schoolID ,Stu_Name, address, city, state, status
Status field contains value A for acceptance or R for rejected
I need to list cities in NY state where more than half of student applications were accepted.
SAMPLE TABLES
SELECT * INTO #tbl1 FROM
(
SELECT 1 schoolID,'SchoolA' schoolname
UNION ALL
SELECT 1,'SchoolB'
)TAB
SELECT * INTO #tbl2 FROM
(
SELECT 1 StudentID,1 schoolID ,'A' Stu_Name,'XXX' address,'CITYA' [CITY],'NY' [STATE],'A' [STATUS]
UNION ALL
SELECT 2,1,'A','XXX','CITYA','NY','A'
UNION ALL
SELECT 3,1,'A','XXX','CITYA','NY','A'
UNION ALL
SELECT 4,1,'A','XXX','CITYA','NY','A'
UNION ALL
SELECT 14,1,'A','XXX','CITYA','NY','R'
UNION ALL
SELECT 5,1,'A','XXX','CITYA','NY','R'
UNION ALL
SELECT 6,1,'A','XXX','CITYA','NY','R'
UNION ALL
SELECT 7,1,'A','XXX','CITYB','NY','A'
UNION ALL
SELECT 8,1,'A','XXX','CITYB','NY','A'
UNION ALL
SELECT 9,1,'A','XXX','CITYC','NY','A'
UNION ALL
SELECT 10,1,'A','XXX','CITYC','NY','R'
)TAB
QUERY
If you need to find out the cities where more than half of student applications were accepted irrespective of school, you can follow the below query.
DECLARE #CITY VARCHAR(30)='NY'
SELECT [CITY]
FROM
(
SELECT DISTINCT [CITY]
,CASE WHEN
(
-- Gets half of total count
COUNT([STATUS]) OVER(PARTITION BY [CITY])/2)
>=
-- Checks if half of total count is greater than count of accepted for each cities
COUNT(CASE WHEN [STATUS]='A' THEN 1 END) OVER(PARTITION BY [CITY]
)
THEN 'N'
ELSE 'Y'
END ACCEPTED
FROM #tbl2
WHERE [CITY] = #CITY
)TAB
WHERE ACCEPTED='Y'
If you wan tot filter this condition by school, you can follow the below query
DECLARE #SCHOOLID INT = 1
DECLARE #CITY VARCHAR(30)='NY'
SELECT [CITY]
FROM
(
SELECT DISTINCT [CITY]
,CASE WHEN
(
COUNT([STATUS]) OVER(PARTITION BY [CITY])/2)
>=
COUNT(CASE WHEN [STATUS]='A' THEN 1 END) OVER(PARTITION BY [CITY]
)
THEN 'N'
ELSE 'Y'
END ACCEPTED
FROM #tbl1 T1
JOIN #tbl2 T2 ON T1.schoolID=T2.schoolID AND T1.schoolID=#SCHOOLID
WHERE [CITY] = #CITY
)TAB
WHERE ACCEPTED='Y'