WITH clause in HIVE

WITH clause in HIVE - hive

In below code temp1 is used 2 times, so hive will be execute the select query in temp1 twice? or just once?
with temp1(
select distinct b as b, f, t1.id as id,
from
table1 t1
join
table2 t2 on (t1.id=t2.id)
),
agg_tbl as
(
select
max(abc) as maxabc,
tbl.t
from(
select
count(*) as cnt, b, f
from
temp1
group by
b, f
) tbl group by tbl.t
),
class_tbl as
(select
case
when bp = 1 then 'abc'
when bp = 2 then 'xyz'
end as class,
bp
from
( select
count(b) as bcount, bp
from
temp1
group by bp
)tbl
)
If that is the case, then using temporary tables will make sense.
Thanks.

Related

How do I count three different distinct values and group on an ID in MS-Access?

So I know MS-Access does not allow SELECT COUNT(DISTINCT....) FROM ..., but I am trying to find a more viable alternative to the usual standard of
SELECT COUNT(*) FROM (SELECT DISTINCT Name FROM table1)
My problem is I am trying to do three separate Count functions and group them on ID. If I use the method above, it is giving me the total unique value count for the whole table instead of the total count for only the value of ID. I tried doing
(SELECT COUNT(*) FROM (SELECT DISTINCT Name FROM table1 as T2
WHERE T2.ColumnA = T1.ColumnA)) As MyVal
FROM table1 as T1
but it tells me I need to specify a value for T1.ColumnA.
The SQL query I am trying to accomplish is this:
SELECT ID
COUNT(DISTINCT ColumnA) as CA,
COUNT(DISTINCT ColumnB) as CB,
COUNT(DISTINCT ColumnC) as CC
FROM table1
GROUP BY ID
Any ideas?

You can use subqueries. Assuming you have a table where each id occurs once:
select (select count(*)
from (select columnA
from table1 t1
where t1.id = t.id
group by columnA
) as a
) as num_a,
(select count(*)
from (select columnB
from table1 t1
where t1.id = t.id
group by columnB
) as b
) as num_b,
(select count(*)
from (select columnC
from table1 t1
where t1.id = t.id
group by columnC
) as c
) as num_c
from <table with ids> as t;
I'm not sure if you'll think this is "viable".
EDIT:
This makes it even more complicated . . . it suggests that MS Access doesn't support correlation clauses more than one level deep (might you consider switching to another database?).
In any case, the brute force way:
select a.id, a.numA, b.numB, c.numC
from ((select id, count(*) as numA
from (select id, columnA
from table1 t1
group by id, columnA
) as a
) as a inner join
(select id, count(*) as numB
from (select id, columnB
from table1 t1
group by id, columnB
) as b
) as b
on a.id = b.id
) inner join
(select id, count(*) as numC
from (select id, columnC
from table1 t1
group by id, columnC
) as c
) c
on c.id = a.id;

how to union the tables in big query

select a,b,c from(SELECT max(modifiedtime) a FROM tabl1 ),
(SELECT max(modifiedtime) b FROM tabl2 ),
(SELECT max(modifiedtime) c FROM tabl3 ) ;
working fine but...output is like..
Row
a b c
1 1421906942814 null null
2 null 1421906942814 null
3 null null 1421906942817
but I want to print data like
a b c
1421906942814 1421906942814 1421906942817
how to do it...

As far I know you need to use a dummykey (even if it's a constant) to be able to do the JOIN on it.
This works:
SELECT t1.value,
t2.value,
t3.value
FROM
(SELECT 1 AS dummykey,
'max1' AS value) t1
JOIN
(SELECT 1 AS dummykey,
'max2' AS value) t2 ON t1.dummykey=t2.dummykey
JOIN
(SELECT 1 AS dummykey,
'max3' AS value) t3 ON t1.dummykey=t3.dummykey

try
SELECT
(SELECT max(modifiedtime)
FROM tabl1) a,
(SELECT max(modifiedtime)
FROM tabl2) b,
(SELECT max(modifiedtime)
FROM tabl3) c;

I would do CROSS JOIN between them:
SELECT * FROM
(SELECT max(modifiedtime) FROM tabl1) a
CROSS JOIN
(SELECT max(modifiedtime) FROM tabl2) b
CROSS JOIN
(SELECT max(modifiedtime) FROM tabl3) c;

compare two tables on SQL server with result to show which column from its associated table

I need to compare two tables on SQL server.
table1
id value
3 5
table2
id value
8 6
SELECT *
FROM
(
SELECT a.id AS a_id, a.value AS a_value
FROM [table1] as a
UNION ALL
SELECT b.id AS b_id, b.value AS b_value
FROM [table2] as b
) tmp
GROUP BY tmp.a_id, tmp.a_value
HAVING COUNT(*) = 1
ORDER BY tmp.a_id
I get result:
a_id a_value
3 5
8 6
I need to know which column is from which table, e.g.
a_id a_value b_id b_value
3 5 8 6
Any help would be appreciated !
Thanks !

What about:
SELECT *
FROM (
SELECT a.id, a.value, 'Table1' AS TableName
FROM [table1] as a
UNION ALL
SELECT b.id, b.value, 'Table2' AS TableName
FROM [table2] as b
) tmp
GROUP BY tmp.id, tmp.value, tmp.TableName
HAVING COUNT(*) = 1
ORDER BY tmp.id

You could add a column for tbl. Something like:
SELECT
m.id
,m.value
INTO #tmp
FROM
(
SELECT a.id AS id, a.value AS value
FROM [table1] as a
UNION ALL
SELECT b.id AS id, b.value AS value
FROM [table2] as b
) m
GROUP BY
m.id
,m.value
HAVING COUNT(*) = 1
CREATE CLUSTERED INDEX ix_tmpidvalue ON #tmp
(
id ASC
,value ASC
)
SELECT *
FROM
(
SELECT a.id AS id, a.value AS value, 'a' tbl
FROM [table1] as a
UNION ALL
SELECT b.id AS id, b.value AS value, 'b' tbl
FROM [table2] as b
) tmp
WHERE EXISTS
(
SELECT 1
FROM #tmp n
WHERE tmp.id = n.id
AND tmp.value = n.value
)
ORDER BY tmp.id

SELECT Row Values WHERE MAX() is Column Value In GROUP BY Query

How can I select like this? Can I create a User defined Aggregate Function
SELECT Max(A),(SELECT TOP 1 FROM TheGroup Where B=Max(A)) FROM MyTable
where MyTable as Shown Below
A B C
--------------
1 2 S
3 4 S
4 5 T
6 7 T
I want a Query Like this
SELECT MAX(A),(B Where A=Max(A)),C FROM MYTable GROUP BY C
I'm Expecting the result as below
MAX(A) Condition C
-----------------------
3 4 S
6 7 T

SELECT A,B,C FROM
(SELECT *, ROW_NUMBER() OVER (PARTITION BY C ORDER BY A DESC) RN FROM MyTable)
WHERE RN = 1
(this query will always return only one row per C value)
OR
WITH CTE_Group AS
(
SELECT C, MAX(A) AS MaxA
FROM MyTable
GROUP BY C
)
SELECT g.MaxA, t.B, g.C
FROM MyTable t
INNER JOIN CTE_Group g ON t.A = g.MaxA AND t.C = g.C
(if there are multiple rows that have same Max(A) value - this query will return all of them)

Try Following Query :
SELECT TABLE1.A , TABLE2.B , TABLE1.C
FROM
(
SELECT MAX(A) AS A,C
FROM MYTable
GROUP BY C
) AS TABLE1
INNER JOIN
(
SELECT *
FROM MYTable
) AS TABLE2 ON TABLE1.A = TABLE2.A
SQLFIDDLE
you can do it by simple join query . join query always run faster then In query . Join query run only one time at the time of execution of the query . we can archive same result by using IN query .

SELECT t1.*
FROM YourTable t1
Left Outer Join YourTable t2 on t1.C=t2.C AND t1.A < t2.A
WHERE t2.A is null

how about this:
SELECT *
FROM MyTable
WHERE A IN (SELECT MAX(A) FROM MyTable GROUP BY C)

SELECT Max(A)
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update:
SELECT *
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update 2:
SELECT DISTINCT A, B
FROM MyTable
Where A=(SELECT Max(A) FROM MyTable GROUP BY C)
update 3:
ok, I think I understand what you're looking for now.. How about this:
SELECT *
FROM MyTable
Where A in (SELECT Max(A) FROM MyTable GROUP BY C)

WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY cola desc) AS Rno,
*
FROM
tbl
)
SELECT top 1
cola,colb
FROM
cte
order by Rno
Then try it:
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY col3 ORDER BY col1 desc) AS Rno,
*
FROM
tbl
)
SELECT
col1,col2,col3
FROM
cte
WHERE Rno=1

Join Table1 with Table2 using Table1 column values in Table2's conditionals

I have the following select statement...
SELECT ROW_NUMBER() OVER(order by cola) as [id], cola
FROM Table1
That makes up my table of all values I'm wanting to insert as #var, right now it works but I have to specify #var each time...
SELECT #var AS [Cola], (
SELECT COUNT(*)
FROM vwTableA AS Z
WHERE Cola = COALESCE(#var,Cola)
AND NOT EXISTS (
SELECT *
FROM TableB
WHERE Colb = Z.Colb
)
) AS [NewCol1],
(
SELECT COUNT(*)
FROM vwTableB AS Y
INNER JOIN TableC AS C
ON Y.Colc = C.Colc
WHERE Y.Cola = #var
) AS [NewCol2],
(
SELECT COUNT(*)
FROM vwTableC AS X
INNER JOIN TableD AS D
ON X.Colc = D.Colc
WHERE X.Cola = #var
) AS [NewCol3]
So I'm wanting to run this second select through all the values of "cola" from the first Select/Table I showed, instead of having to specify the #var and it only return one row each time. How can I do this?

If you use a Common Table Expression CTE you can use it to join to your other statement
with var as
( SELECT ROW_NUMBER() OVER(order by cola) as [id], cola
FROM Table1)
SELECT var.id AS [Cola], (
SELECT COUNT(*)
FROM vwTableA AS Z
WHERE Cola = COALESCE(var.id,Cola)
AND NOT EXISTS (
SELECT *
FROM TableB
WHERE Colb = Z.Colb
)
) AS [NewCol1],
(
SELECT COUNT(*)
FROM vwTableB AS Y
INNER JOIN TableC AS C
ON Y.Colc = C.Colc
WHERE Y.Cola = var.id
) AS [NewCol2],
(
SELECT COUNT(*)
FROM vwTableC AS X
INNER JOIN TableD AS D
ON X.Colc = D.Colc
WHERE X.Cola = var.id
) AS [NewCol3]
FROM var

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

WITH clause in HIVE - hive

Related

How do I count three different distinct values and group on an ID in MS-Access?

how to union the tables in big query

compare two tables on SQL server with result to show which column from its associated table

SELECT Row Values WHERE MAX() is Column Value In GROUP BY Query

Join Table1 with Table2 using Table1 column values in Table2's conditionals

Categories

Resources