WITH clause in HIVE - hive

In below code temp1 is used 2 times, so hive will be execute the select query in temp1 twice? or just once?
with temp1(
select distinct b as b, f, t1.id as id,
from
table1 t1
join
table2 t2 on (t1.id=t2.id)
),
agg_tbl as
(
select
max(abc) as maxabc,
tbl.t
from(
select
count(*) as cnt, b, f
from
temp1
group by
b, f
) tbl group by tbl.t
),
class_tbl as
(select
case
when bp = 1 then 'abc'
when bp = 2 then 'xyz'
end as class,
bp
from
( select
count(b) as bcount, bp
from
temp1
group by bp
)tbl
)
If that is the case, then using temporary tables will make sense.
Thanks.

Related

How do I count three different distinct values and group on an ID in MS-Access?

So I know MS-Access does not allow SELECT COUNT(DISTINCT....) FROM ..., but I am trying to find a more viable alternative to the usual standard of
SELECT COUNT(*) FROM (SELECT DISTINCT Name FROM table1)
My problem is I am trying to do three separate Count functions and group them on ID. If I use the method above, it is giving me the total unique value count for the whole table instead of the total count for only the value of ID. I tried doing
(SELECT COUNT(*) FROM (SELECT DISTINCT Name FROM table1 as T2
WHERE T2.ColumnA = T1.ColumnA)) As MyVal
FROM table1 as T1
but it tells me I need to specify a value for T1.ColumnA.
The SQL query I am trying to accomplish is this:
SELECT ID
COUNT(DISTINCT ColumnA) as CA,
COUNT(DISTINCT ColumnB) as CB,
COUNT(DISTINCT ColumnC) as CC
FROM table1
GROUP BY ID
Any ideas?
You can use subqueries. Assuming you have a table where each id occurs once:
select (select count(*)
from (select columnA
from table1 t1
where t1.id = t.id
group by columnA
) as a
) as num_a,
(select count(*)
from (select columnB
from table1 t1
where t1.id = t.id
group by columnB
) as b
) as num_b,
(select count(*)
from (select columnC
from table1 t1
where t1.id = t.id
group by columnC
) as c
) as num_c
from <table with ids> as t;
I'm not sure if you'll think this is "viable".
EDIT:
This makes it even more complicated . . . it suggests that MS Access doesn't support correlation clauses more than one level deep (might you consider switching to another database?).
In any case, the brute force way:
select a.id, a.numA, b.numB, c.numC
from ((select id, count(*) as numA
from (select id, columnA
from table1 t1
group by id, columnA
) as a
) as a inner join
(select id, count(*) as numB
from (select id, columnB
from table1 t1
group by id, columnB
) as b
) as b
on a.id = b.id
) inner join
(select id, count(*) as numC
from (select id, columnC
from table1 t1
group by id, columnC
) as c
) c
on c.id = a.id;

how to union the tables in big query

select a,b,c from(SELECT max(modifiedtime) a FROM tabl1 ),
(SELECT max(modifiedtime) b FROM tabl2 ),
(SELECT max(modifiedtime) c FROM tabl3 ) ;
working fine but...output is like..
Row
a b c
1 1421906942814 null null
2 null 1421906942814 null
3 null null 1421906942817
but I want to print data like
a b c
1421906942814 1421906942814 1421906942817
how to do it...
As far I know you need to use a dummykey (even if it's a constant) to be able to do the JOIN on it.
This works:
SELECT t1.value,
t2.value,
t3.value
FROM
(SELECT 1 AS dummykey,
'max1' AS value) t1
JOIN
(SELECT 1 AS dummykey,
'max2' AS value) t2 ON t1.dummykey=t2.dummykey
JOIN
(SELECT 1 AS dummykey,
'max3' AS value) t3 ON t1.dummykey=t3.dummykey
try
SELECT
(SELECT max(modifiedtime)
FROM tabl1) a,
(SELECT max(modifiedtime)
FROM tabl2) b,
(SELECT max(modifiedtime)
FROM tabl3) c;
I would do CROSS JOIN between them:
SELECT * FROM
(SELECT max(modifiedtime) FROM tabl1) a
CROSS JOIN
(SELECT max(modifiedtime) FROM tabl2) b
CROSS JOIN
(SELECT max(modifiedtime) FROM tabl3) c;

compare two tables on SQL server with result to show which column from its associated table

I need to compare two tables on SQL server.
table1
id value
3 5
table2
id value
8 6
SELECT *
FROM
(
SELECT a.id AS a_id, a.value AS a_value
FROM [table1] as a
UNION ALL
SELECT b.id AS b_id, b.value AS b_value
FROM [table2] as b
) tmp
GROUP BY tmp.a_id, tmp.a_value
HAVING COUNT(*) = 1
ORDER BY tmp.a_id
I get result:
a_id a_value
3 5
8 6
I need to know which column is from which table, e.g.
a_id a_value b_id b_value
3 5 8 6
Any help would be appreciated !
Thanks !
What about:
SELECT *
FROM (
SELECT a.id, a.value, 'Table1' AS TableName
FROM [table1] as a
UNION ALL
SELECT b.id, b.value, 'Table2' AS TableName
FROM [table2] as b
) tmp
GROUP BY tmp.id, tmp.value, tmp.TableName
HAVING COUNT(*) = 1
ORDER BY tmp.id
You could add a column for tbl. Something like:
SELECT
m.id
,m.value
INTO #tmp
FROM
(
SELECT a.id AS id, a.value AS value
FROM [table1] as a
UNION ALL
SELECT b.id AS id, b.value AS value
FROM [table2] as b
) m
GROUP BY
m.id
,m.value
HAVING COUNT(*) = 1
CREATE CLUSTERED INDEX ix_tmpidvalue ON #tmp
(
id ASC
,value ASC
)
SELECT *
FROM
(
SELECT a.id AS id, a.value AS value, 'a' tbl
FROM [table1] as a
UNION ALL
SELECT b.id AS id, b.value AS value, 'b' tbl
FROM [table2] as b
) tmp
WHERE EXISTS
(
SELECT 1
FROM #tmp n
WHERE tmp.id = n.id
AND tmp.value = n.value
)
ORDER BY tmp.id

SELECT Row Values WHERE MAX() is Column Value In GROUP BY Query

How can I select like this? Can I create a User defined Aggregate Function
SELECT Max(A),(SELECT TOP 1 FROM TheGroup Where B=Max(A)) FROM MyTable
where MyTable as Shown Below
A B C
--------------
1 2 S
3 4 S
4 5 T
6 7 T
I want a Query Like this
SELECT MAX(A),(B Where A=Max(A)),C FROM MYTable GROUP BY C
I'm Expecting the result as below
MAX(A) Condition C
-----------------------
3 4 S
6 7 T
SELECT A,B,C FROM
(SELECT *, ROW_NUMBER() OVER (PARTITION BY C ORDER BY A DESC) RN FROM MyTable)
WHERE RN = 1
(this query will always return only one row per C value)
OR
WITH CTE_Group AS
(
SELECT C, MAX(A) AS MaxA
FROM MyTable
GROUP BY C
)
SELECT g.MaxA, t.B, g.C
FROM MyTable t
INNER JOIN CTE_Group g ON t.A = g.MaxA AND t.C = g.C
(if there are multiple rows that have same Max(A) value - this query will return all of them)
Try Following Query :
SELECT TABLE1.A , TABLE2.B , TABLE1.C
FROM
(
SELECT MAX(A) AS A,C
FROM MYTable
GROUP BY C
) AS TABLE1
INNER JOIN
(
SELECT *
FROM MYTable
) AS TABLE2 ON TABLE1.A = TABLE2.A
SQLFIDDLE
you can do it by simple join query . join query always run faster then In query . Join query run only one time at the time of execution of the query . we can archive same result by using IN query .
SELECT t1.*
FROM YourTable t1
Left Outer Join YourTable t2 on t1.C=t2.C AND t1.A < t2.A
WHERE t2.A is null
how about this:
SELECT *
FROM MyTable
WHERE A IN (SELECT MAX(A) FROM MyTable GROUP BY C)
SELECT Max(A)
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update:
SELECT *
FROM MyTable
Where B=(SELECT Max(A) FROM MyTable)
update 2:
SELECT DISTINCT A, B
FROM MyTable
Where A=(SELECT Max(A) FROM MyTable GROUP BY C)
update 3:
ok, I think I understand what you're looking for now.. How about this:
SELECT *
FROM MyTable
Where A in (SELECT Max(A) FROM MyTable GROUP BY C)
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY cola desc) AS Rno,
*
FROM
tbl
)
SELECT top 1
cola,colb
FROM
cte
order by Rno
Then try it:
WITH
cte AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY col3 ORDER BY col1 desc) AS Rno,
*
FROM
tbl
)
SELECT
col1,col2,col3
FROM
cte
WHERE Rno=1

Join Table1 with Table2 using Table1 column values in Table2's conditionals

I have the following select statement...
SELECT ROW_NUMBER() OVER(order by cola) as [id], cola
FROM Table1
That makes up my table of all values I'm wanting to insert as #var, right now it works but I have to specify #var each time...
SELECT #var AS [Cola], (
SELECT COUNT(*)
FROM vwTableA AS Z
WHERE Cola = COALESCE(#var,Cola)
AND NOT EXISTS (
SELECT *
FROM TableB
WHERE Colb = Z.Colb
)
) AS [NewCol1],
(
SELECT COUNT(*)
FROM vwTableB AS Y
INNER JOIN TableC AS C
ON Y.Colc = C.Colc
WHERE Y.Cola = #var
) AS [NewCol2],
(
SELECT COUNT(*)
FROM vwTableC AS X
INNER JOIN TableD AS D
ON X.Colc = D.Colc
WHERE X.Cola = #var
) AS [NewCol3]
So I'm wanting to run this second select through all the values of "cola" from the first Select/Table I showed, instead of having to specify the #var and it only return one row each time. How can I do this?
If you use a Common Table Expression CTE you can use it to join to your other statement
with var as
( SELECT ROW_NUMBER() OVER(order by cola) as [id], cola
FROM Table1)
SELECT var.id AS [Cola], (
SELECT COUNT(*)
FROM vwTableA AS Z
WHERE Cola = COALESCE(var.id,Cola)
AND NOT EXISTS (
SELECT *
FROM TableB
WHERE Colb = Z.Colb
)
) AS [NewCol1],
(
SELECT COUNT(*)
FROM vwTableB AS Y
INNER JOIN TableC AS C
ON Y.Colc = C.Colc
WHERE Y.Cola = var.id
) AS [NewCol2],
(
SELECT COUNT(*)
FROM vwTableC AS X
INNER JOIN TableD AS D
ON X.Colc = D.Colc
WHERE X.Cola = var.id
) AS [NewCol3]
FROM var