SELECT COL HIVE SQL VALUE WHERE VALUES <5000 - sql

I'm learning about HIVE and I have come across a question I cannot seem to find a workable answer for. I have to extract all of the numeric columns that ONLY contain integer values <5000 from a table and create a space separated text file. I am familiar with creating text files and selecting rows but selecting columns that meet a specific parameter I am not familiar with, any help or guidance will be appreciated! Below I've listed the structure of the table. Also, there is an image attached showing the data in table format. For OUTPUT I need to go through ALL the COLUMNS and RETURN ONLY the the COLUMNS that meet the parameter of integer values LESS THAN 5000.
create table lineorder (
lo_orderkey int,
lo_linenumber int,
lo_custkey int,
lo_partkey int,
lo_suppkey int,
lo_orderdate int,
lo_orderpriority varchar(15),
lo_shippriority varchar(1),
lo_quantity int,
lo_extendedprice int,
lo_ordertotalprice int,
lo_discount int,
lo_revenue int,
lo_supplycost int,
lo_tax int,
lo_commitdate int,
lo_shipmode varchar(10)
)
Data in tbl format

Conditional columns selecting is a terrible, horrible, no good, very bad idea.
Being that said, here is a demo.
with t as
(
select stack
(
3
,10 ,100 ,1000 ,'X' ,null
,20 ,null ,2000 ,'Y' ,200000
,30 ,300 ,3000 ,'Z' ,300000
) as (c1,c2,c3,c4,c5)
)
select regexp_replace
(
printf(concat('%s',repeat(concat(unhex(1),'%s'),field(unhex(1),t.*,unhex(1))-2)),*)
,concat('([^\\x01]*)',repeat('\\x01([^\\x01]*)',field(unhex(1),t.*,unhex(1))-2))
,c.included_columns
) as record
from t
cross join (select ltrim
(
regexp_replace
(
concat_ws(' ',sort_array(collect_set(printf('$%010d',pos+1))))
,concat
(
'( ?('
,concat_ws
(
'|'
,collect_set
(
case
when cast(pe.val as int) >= 5000
or cast(pe.val as int) is null
then printf('\\$%010d',pos+1)
end
)
)
,'))|(?<=\\$)0+'
)
,''
)
) as included_columns
from t
lateral view posexplode(split(printf(concat('%s',repeat(concat(unhex(1),'%s'),field(unhex(1),*,unhex(1))-2)),*),'\\x01')) pe
) c
+---------+
| record |
+---------+
| 10 1000 |
| 20 2000 |
| 30 3000 |
+---------+

I don't think hive supports variable substitution in the function. So you would have to write a shell scripts that executes the first query which returns the required columns.Then you can assign it to a variable in shell script and then create a new query for creating files in local directory and run it via hive -e from bash.
create table t1(x int , y int) ; // table used for below query
Sample bash script :
cols =hive -e 'select concat_ws(',', case when min(x) > 5000 then 'x' end , case when min(y) > 5000 then 'y' end) from t1'
query ="INSERT OVERWRITE LOCAL DIRECTORY <directory name> ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' select $cols from t1 "
hive -e query

Related

How to get in which criteria the range falls using sql query?

Code:
IF OBJECT_ID('tempdb..#TempMaster','U') IS NOT NULL
DROP TABLE #TempMaster
IF OBJECT_ID('tempdb..#TempTransaction','U') IS NOT NULL
DROP TABLE #TempTransaction
CREATE TABLE #TempMaster
(
Sno INT IDENTITY(1,1),
RangeDesc VARCHAR(100),
RangeFromValue INT,
RangeToValue INT
)
CREATE TABLE #TempTransaction
(
Sno INT IDENTITY(1,1),
[Values] INT
)
INSERT INTO #TempMaster(RangeDesc,RangeFromValue,RangeToValue)
SELECT * FROM (VALUES('Type A',1,10),('Type B',11,20),('Type C',21,30)) AS T(RangeDesc,RangeFromValue,RangeToValue)
INSERT INTO #TempTransaction([Values])
SELECT 1
UNION ALL
SELECT 15
UNION ALL
SELECT 5
SELECT * FROM #TempMaster
SELECT * FROM #TempTransaction
Please anyone help me to get a this kind of a solution based on which range the value falls.
I want to get a range description and write down a query for a below output. Thanks in advance.
-------------------------------
sno Values RangeDesc
-------------------------------
1 1 Type A
2 15 Type B
3 5 Type A
-------------------------------
You do left join based on range values.
select t1.Sno, t1.[Values], t2.RangeDesc
from #TempTransaction t1
left join #TempMaster t2 on t1.[Values] between t2.RangeFromValue and t2.RangeToValue

SQL Server table to nested xml file

I have a table like this:
Action Action2 Name Action3 Batch
--------------------------------------
add PL Steve add 1
add PL Steve add 3
add PL Steve add 4
add PL Steve add 5
add PL Steve add 1
add PL Steve add 3
add PL Steve add 4
add PL Steve add 5
and need to turn it into an XML document like this:
Thanks #Isaac for the data script!
for <List> elements in <Branch> node, like <Branch><List/><List/></Branch>:
CREATE TABLE #mytable
(
Action VARCHAR(10),
Action2 VARCHAR(10),
Name VARCHAR(50),
Action3 VARCHAR(10),
Batch INT
);
INSERT INTO #mytable(Action,Action2,Name,Action3,Batch)
VALUES
('add','PL','Steve','add',1),
('add','PL','Steve','add',3),
('add','PL','Steve','add',4),
('add','PL','Steve','add',5),
('add','PL','Steve','add',1),
('add','PL','Steve','add',3),
('add','PL','Steve','add',4),
('add','PL','Steve','add',5);
INSERT INTO #mytable(Action,Action2,Name,Action3,Batch)
VALUES
('update','PL','John','insert',5),
('update','PL','Paul','insert',1),
('update','PL','Chris','delete',3),
('update','PL','Mary','update',4),
('update','PL','Jane','delete',5);
select a1.Action as '#Action', s.brancexml as '*'
from
(
select distinct Action
from #mytable
) as a1
cross apply
(
select
(
select a2.Action2 AS '#Action', a2.Name as '#Name', x.listxml as '*'
from
(
select distinct Action2, Name
from #mytable AS b
where b.Action = a1.Action
) AS a2
cross apply
(
select
(
select distinct c.Action3 as '#Action', c.Batch as '#Batch'
from #mytable AS c
where c.Action = a1.Action AND c.Action2 = a2.Action2 AND c.Name = a2.Name
for xml path('List'), type
) AS listxml
) as x
for xml path('Brance'), type
) as brancexml
) as s
for xml path('Start'), root('Entries'), type
You've got a solution already, but this might be put a bit simpler:
CREATE TABLE #mytable
(
Action VARCHAR(10),
Action2 VARCHAR(10),
Name VARCHAR(50),
Action3 VARCHAR(10),
Batch INT
);
INSERT INTO #mytable(Action,Action2,Name,Action3,Batch)
VALUES
('add','PL','Steve','add',1),
('add','PL','Steve','add',3),
('add','PL','Steve','add',4),
('add','PL','Steve','add',5),
('update','PL','John','insert',5),
('update','PL','Paul','insert',1),
('update','PL','Chris','delete',3),
('update','PL','Mary','update',4),
('update','PL','Jane','delete',5);
SELECT mt1.[Action] AS [#Action]
,(
SELECT mt2.Action2 AS [#Action]
,mt2.[Name] AS [#Name]
,(
SELECT mt3.Action3 AS [#Action]
,mt3.Batch AS [#Batch]
FROM #mytable mt3
WHERE mt3.[Action]=mt1.[Action]
AND mt3.Action2=mt2.Action2
AND mt3.[Name]=mt2.[Name]
FOR XML PATH('List'),TYPE
)
FROM #mytable mt2
WHERE mt2.[Action]=mt1.[Action]
GROUP BY mt2.Action2,mt2.[Name]
FOR XML PATH('Brance'),TYPE
)
FROM #mytable mt1
GROUP BY mt1.[Action]
FOR XML PATH('Start'),ROOT('Entries');
The idea in short:
We use a cascade of correlated sub-queries, each returning one fragment of the nested structure.
Using ,TYPE will return this as XML, otherwise you would get escaped text.
Using GROUP BY allows us to return the nesting data just once.
This won't be very fast... Indexes on the columns used in WHERE will help you.

Create one json per one table row

I would like to create jsons from the data in the table.
Table looks like that:
|code |
+------+
|D5ABX0|
|MKT536|
|WAEX44|
I am using FOR JSON PATH which is nice:
SELECT [code]
FROM feature
FOR JSON PATH
but the return value of this query are three concatenated jsons in one row:
|JSON_F52E2B61-18A1-11d1-B105-00805F49916B |
+----------------------------------------------------------+
1 |[{"code":"D5ABX0"},{"code":"MKT536"},{"code":"WAEX44"}]|
I need to have each row to be a separate json, like that:
|JSON_return |
+---------------------+
1 |{"code":"D5ABX0"} |
2 |{"code":"MKT536"} |
3 |{"code":"WAEX44"} |
I was trying to use splitting function (CROSS APPLY) which needs to have a separator as a parameter but this is not a robust solution as the json could be more expanded or branched and this could separate not the whole json but the json inside the json:
;WITH split AS (
SELECT [json] = (SELECT code FROM feature FOR JSON PATH)
)
SELECT
T.StringElement
FROM split S
CROSS APPLY dbo.fnSplitDelimitedList([json], '},{') T
The output is:
|StringElement |
+---------------------+
1 |[{"code":"D5ABX0" |
2 |"code":"MKT536" |
3 |"code":"WAEX44"}] |
Is there a way to force sqlserver to create one json per row?
You'll need to use as subquery to achieve this; FOR JSON will create a JSON string for the entire returned dataset. This should get you what you're after:
CREATE TABLE #Sample (code varchar(6));
INSERT INTO #Sample
VALUES ('D5ABX0'),
('MKT536'),
('WAEX44');
SELECT (SELECT Code
FROM #Sample sq
WHERE sq.code = S.code
FOR JSON PATH)
FROM #Sample S;
DROP TABLE #Sample;
CREATE TABLE #Temp
(
ID INT IDENTITY(1, 1) ,
StringValue NVARCHAR(100)
);
INSERT INTO #Temp
( StringValue )
VALUES ( N'D5ABX0' -- StringValue - nvarchar(100)
),
( 'MKT536' ),
( 'WAEX44' );
SELECT ID,'[{"code:":'''''+StringValue+'''''}]' AS JSON_return FROM #Temp
DROP TABLE #Temp

Insert into select Subquery returned more than 1 value

I have the following code and it give me an error when the table #ListaDeProducto has more than 1 row. Any idea?
insert into Solicitud_Plastico_Interna_Detalle(
IDSolicitud_Plastico_Interna
,IDTipo_Producto
,Cantidad_Solicitada
,Create_User
,Create_Date
,Contingencia
,Total
)
select
#IdSolicitud
,IDTipo_Producto
,Cantidad_Requerida
,#USUARIO
,getdate()
,Contingencia
,Total
from #ListaDeProducto
Table schema
CREATE TYPE [ListaProductoTableType2] AS TABLE
(
IDTipo_Producto int,
Tipo_Producto varchar(1000),
Cantidad_Requerida int,
Contingencia int ,
Total int,
IdSolicitud_batch varchar(100)
)
GO
I still will bet there is some trigger in the table.
So why you dont try create a new table to prove this query is ok with multiple rows
CREATE TABLE Solicitud_Plastico_Temporal AS (
select
#IdSolicitud as IDSolicitud_Plastico_Interna
,IDTipo_Producto
,Cantidad_Requerida
,#USUARIO as Create_User
,getdate() as Create_Date
,Contingencia
,Total
from #ListaDeProducto
)

Inserting an auto generated value into a column with specific pattern

I have a table named tblSample which has columns ID, PID etc. I want to auto generate those two columns with a specific pattern.
For example:
ID PID
------ ------
ABC001 PAB001
ABC002 PAB002
ABC003 PAB003
ABC004 PAB004
| |
| |
ABC999 PAB999
As you can see, the pattern 'ABC' in ID and 'PAB' in PID is the same. How can I insert those records into a table automatically and the range between those three digits after 'ABC' or 'PAB' is 001-999?
My suggestion is to create table structure as below with one identity column as testID and other computed by using that column ID and PID:
CREATE TABLE #tmpOne(testID INT IDENTITY (1,1),
ID AS ('ABC'+ (CASE WHEN len(testID) <=3 THEN CAST(RIGHT(0.001*testID, 3) AS VARCHAR) ELSE CAST(testID AS VARCHAR) END)),
Ename VARCHAR(20))
INSERT INTO #tmpOne(Ename)
SELECT 'Test'
SELECT * FROM #tmpOne
CREATE TABLE #tt(ID VARCHAR(100),PID VARCHAR(100))
GO
INSERT INTO #tt(ID,PID)
SELECT 'ABC'+RIGHT('000'+LTRIM(a.ID),3),'PAB'+RIGHT('000'+LTRIM(a.ID),3) FROM (
SELECT ISNULL(MAX(CASE WHEN SUBSTRING(t.id,4,LEN(ID))> SUBSTRING(t.id,4,LEN(PID)) THEN SUBSTRING(t.id,4,LEN(ID)) ELSE SUBSTRING(t.id,4,LEN(PID)) END )+1,1) AS id
FROM #tt AS t
) AS a
GO 999