How to split array data to next row in Postgres

How to split array data to next row in Postgres - sql

I have data in a table. I want to split the array data into separated rows.
create table test1 (
_id serial,
questionId character varying (50),
answer character varying (50),
subquestionId character varying (50),
subquestionAnswer character varying (50),
isActive character varying (1)
);
INSERT INTO test1 (questionid,answer,subquestionid,subquestionanswer,isactive)
values
('question 1','true','[100,101,102]','[[true],[false],[true]]','1'),('question 2','false','[101,106]','[[false],[true]]','1');
_id
questionid
answer
subquestionid
subquestionanswer
isactive
1
question 1
true
[100,101,102]
[[true],[false],[true]]
1
2
question 2
false
[101,106]
[[false],[true]]
1
Output should be needed.

As explained in this answer you may convert the string representation of arrays to real arrays with translate and cast. (As commented you should not store the arrays as strings)
select
_id,
answer,
translate(subquestionid, '[]', '{}')::int[] subquestionid,
translate(subquestionanswer, '[]', '{}')::boolean[] subquestionanswer,
isactive
from test1;
result
_id|answer|subquestionid|subquestionanswer |isactive|
---+------+-------------+-----------------------+--------+
1|true |{100,101,102}|{{true},{false},{true}}|1 |
2|false |{101,106} |{{false},{true}} |1 |
To split the arrays use unnest and keep the order using WITH ORDINALITY AS
Finaly limit the result to rows with the identical ordinality, which I assume is the expected result, though differing from your output
with t as (
select
_id,
answer,
translate(subquestionid, '[]', '{}')::int[] subquestionid,
translate(subquestionanswer, '[]', '{}')::boolean[] subquestionanswer,
isactive
from test1
)
select
t._id,
t.answer,
a.subquestionid,
b.subquestionanswer,
t.isactive
from t
cross join lateral unnest(subquestionid) WITH ORDINALITY AS a(subquestionid, nr)
cross join lateral unnest(subquestionanswer) WITH ORDINALITY AS b(subquestionanswer, nr)
where a.nr = b.nr
_id|answer|subquestionid|subquestionanswer|isactive|
---+------+-------------+-----------------+--------+
1|true | 100|true |1 |
1|true | 101|false |1 |
1|true | 102|true |1 |
2|false | 101|false |1 |
2|false | 106|true |1 |

You can do it using string_to_array to convert string to array, and generate multiple rows from single one as follows :
SELECT *
FROM (
SELECT t._id, t.questionid, t.answer, REGEXP_REPLACE(theAnswer, '\[|\]|\[\[|\]\]', '') as subquestionanswer, t.isactive
FROM test1 t,
unnest(string_to_array(subquestionanswer, '],[')) theAnswer
) S
this how it should be :
SELECT t._id, t.questionid, t.answer, s.subquestionid, t.subquestionanswer, t.isactive
FROM (
SELECT _id, questionid, answer, REGEXP_REPLACE(subquestionans, '\[|\]|\[\[|\]\]', '') as subquestionanswer, isactive,
ROW_NUMBER () OVER (
ORDER BY _id
)
FROM test1,
unnest(string_to_array(subquestionanswer, '],[')) subquestionans
) t
inner join (
SELECT _id , REGEXP_REPLACE(subquestion, '\[|\]', '') as subquestionid,
ROW_NUMBER () OVER (
ORDER BY _id
)
FROM test1 ,
unnest(string_to_array(subquestionid, ',')) subquestion
) s on s.ROW_NUMBER = t.ROW_NUMBER;
Demo here : https://dbfiddle.uk/b1w3RyCJ

Thank you for the reply!
Below table data needs to convert into screenshot format. Earlier there was one bracket one value, now there are one bracket two values. (subquestionid-153). I have extract this data from Talend.
create table test1 (_id serial, questionId character varying (50), answer character varying (50), subquestionId character varying (50), subquestionAnswer character varying (225), isActive character varying (1));
INSERT INTO test1 (questionid,answer,subquestionid,subquestionanswer,isactive) values ('question 1','true','[100,101,102]','[[true],[false],[true]]','1'),('question 2','false','[101,106]','[[false],[true]]','1');

Related

Split column value into substrings and search for pattern in SQL

I have a table like this:
campaign
code
AL2330GH_HDKASL_QCLKP
NULL
JPDJK34_QPKSLL_QKPAL
NULL
QCK32_SDSKDS_TLLKA
NULL
I want to update the above table by populating the column 'code' with a substring in column 'campaign' which starts with 'AL', 'QC', or 'QP'. All the column values have 3 substrings separated by an '_'. If none of the substrings matches with the provided values, then keep the 'code' column value as NULL. And if multiple matches happen, take the first substring.
Desired Output:
campaign
code
AL2330GH_HDKASL_QCLKP
AL2330GH
JPDJK34_QPKSLL_QKPAL
QPKSLL
QCK32_SDSKDS_TLLKA
QCK32
Link to try out the problem: https://www.db-fiddle.com/f/8qoFDL1RmjwpwFNP3LP4eK/1

Here's a method using OPENJSON():
;WITH src AS
(
SELECT campaign, value, code,
rn = ROW_NUMBER() OVER (PARTITION BY campaign ORDER BY [key])
FROM
(
SELECT campaign, [key], value, code
FROM dbo.SomeTable
CROSS APPLY OPENJSON(CONCAT('["',
REPLACE(STRING_ESCAPE(campaign,'JSON'),'_','","'),'"]')) AS j
) AS x WHERE LEFT(value,2) IN ('AL','QC','QP')
)
UPDATE src SET code = value WHERE rn = 1;
Example db<>fiddle

You can try to use STRING_SPLIT with CROSS APPLY and ROW_NUMBER window function to make it.
CHARINDEX function will find the first match position value then we can put the split value in the first parameter, then we can find which string the first appearance.
SELECT campaign,value
FROM (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY campaign ORDER BY CHARINDEX(v.value,t1.campaign)) rn
FROM mainTable t1
CROSS APPLY STRING_SPLIT(t1.campaign,'_') v
WHERE (value LIKE 'AL%'
OR value LIKE 'QC%'
OR value LIKE 'QP%')
) t1
WHERE rn = 1
If you want to UPDATE values you can try UPDATE like this.
UPDATE t1
SET
code = value
FROM (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY campaign ORDER BY CHARINDEX(v.value,t1.campaign)) rn
FROM mainTable t1
CROSS APPLY STRING_SPLIT(t1.campaign,'_') v
WHERE (value LIKE 'AL%'
OR value LIKE 'QC%'
OR value LIKE 'QP%')
) t1
WHERE rn = 1
sqlfiddle

Please try the following solution.
It is using XML and XQuery for tokenization. XML/XQuery data model is based on ordered sequences. Exactly what we need for the scenario.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (id INT IDENTITY PRIMARY KEY, campaign varchar(50), code varchar(20));
INSERT INTO #tbl (campaign, code) VALUES
('AL2330GH_HDKASL_QCLKP', NULL),
('JPDJK34_QPKSLL_QKPAL', NULL),
('QCK32_SDSKDS_TLLKA', NULL);
-- DDL and sample data population, end
DECLARE #separator CHAR(1) = '_';
UPDATE t
SET code = c.query('
for $x in /root/r[substring(text()[1],1,2)=("AL","QC","QP")]
return $x').value('(/r/text())[1]', 'VARCHAR(20)')
FROM #tbl AS t
CROSS APPLY (SELECT TRY_CAST('<root><r><![CDATA[' +
REPLACE(campaign, #separator, ']]></r><r><![CDATA[') +
']]></r></root>' AS XML)) AS t1(c);
Output
+----+-----------------------+----------+
| id | campaign | Code |
+----+-----------------------+----------+
| 1 | AL2330GH_HDKASL_QCLKP | AL2330GH |
| 2 | JPDJK34_QPKSLL_QKPAL | QPKSLL |
| 3 | QCK32_SDSKDS_TLLKA | QCK32 |
+----+-----------------------+----------+

like clause on all columns

I want to use the like clause to search all columns in a row.
something like
SELECT * FROM test WHERE '%something%' IN *
also, I don't know the exact columns that I have, this is why i need a wildcard (*)
there is a way to do that with snowflake / SQL?

You may consider using array_construct and array_contains:
CREATE or REPLACE TABLE test ( id number, v varchar, z varchar )
as SELECT * FROM VALUES
(1, 'Gokhan', 'Aylin'),
(2, 'Joe', 'Black');
SELECT *, ARRAY_CONSTRUCT( * ) combined
FROM test where ARRAY_CONTAINS( 'Gokhan'::variant, combined );
You can also convert this array to varchar to search partly matching strings:
SELECT *, ARRAY_CONSTRUCT( * ) combined
FROM test
WHERE combined::VARCHAR LIKE '%Go%';
+----+--------+-------+------------------------+
| ID | V | Z | COMBINED |
+----+--------+-------+------------------------+
| 1 | Gokhan | Aylin | [ 1, "Gokhan", "Aylin" |
+----+--------+-------+------------------------+

If you want to search for 'something' in all columns you can try to concatenate all columns in the where clause:
SELECT * from TABLE where CONCAT(colum1,column2,column3) LIKE '%something%'
Remember to cast to string type any non string type column.

You have to little tweak the SQL, concat takes all data types
select a.* from (
select *, concat(*) as all_col_data from snowflake.schema.table_name
) as a
where a.all_col_data like '%something%'

Query json dictionary data in SQL

My CLOB field in a table contains JSON and looks as following:
{"a":"value1", "b":"value2", "c":"value3"}
And I'm trying to write an SQL query to return a table with key and value fields like following:
key|value
---|------
a |value1
b |value2
c |value3
Any help would be hugely appreciated!

Use JSON_TABLE and then UNPIVOT if you want the values in rows instead of columns:
SELECT *
FROM (
SELECT p.*
FROM table_name t
CROSS JOIN
JSON_TABLE(
t.value,
'$'
COLUMNS (
a PATH '$.a',
b PATH '$.b',
c PATH '$.c'
)
) p
)
UNPIVOT ( value FOR key IN ( a, b, c ) );
So for some sample data:
CREATE TABLE table_name (
value CLOB CONSTRAINT ensure_json CHECK (value IS JSON)
);
INSERT INTO table_name ( value ) VALUES ( '{"a":"value1", "b":"value2", "c":"value3"}' );
This outputs:
KEY | VALUE
:-- | :-----
A | value1
B | value2
C | value3
db<>fiddle here
If you want to do it dynamically then you can parse the JSON in PL/SQL and use GET_KEYS to get a collection of key names and then access the correct one by its position and correlate that to the value using FOR ORDINALITY:
CREATE FUNCTION get_key(
pos IN PLS_INTEGER,
json IN CLOB
) RETURN VARCHAR2
AS
doc_keys JSON_KEY_LIST;
BEGIN
doc_keys := JSON_OBJECT_T.PARSE ( json ).GET_KEYS;
RETURN doc_keys( pos );
END get_key;
/
Then:
SELECT get_key( j.pos, t.value ) AS key,
j.value
FROM table_name t
CROSS APPLY JSON_TABLE(
t.value,
'$.*'
COLUMNS (
pos FOR ORDINALITY,
value PATH '$'
)
) j;
Outputs:
KEY | VALUE
:-- | :-----
a | value1
b | value2
c | value3
db<>fiddle here

SQL Server: Split values from columns with multiple values, into multiple rows [duplicate]

This question already has answers here:
Turning a Comma Separated string into individual rows
(16 answers)
Closed 4 years ago.
I have data that currently looks like this (pipe indicates separate columns):
ID | Sex | Purchase | Type
1 | M | Apple, Apple | Food, Food
2 | F | Pear, Barbie, Soap | Food, Toys, Cleaning
As you can see, the Purchase and Type columns feature multiple values that are comma delimited (some of the cells in these columns actually have up to 50+ values recorded within). I want the data to look like this:
ID | Sex | Purchase | Type
1 | M | Apple | Food
1 | M | Apple | Food
2 | F | Pear | Food
2 | F | Barbie | Toys
2 | F | Soap | Cleaning
Any ideas on how would I be able to do this with SQL? Thanks for your help everyone.
Edit: Just to show that this is different to some of the other questions. The key here is that data for each unique row is contained across two separate columns i.e. the second word in "Purchase" should be linked with the second word in "Type" for ID #1. The other questions I've seen was where the multiple values had been contained in just one column.

Basically you will required a delimited spliter function. There are many around. Here i am using DelimitedSplit8K from Jeff Moden http://www.sqlservercentral.com/articles/Tally+Table/72993/
-- create the sample table
create table #sample
(
ID int,
Sex char,
Purchase varchar(20),
Type varchar(20)
)
-- insert the sample data
insert into #sample (ID, Sex, Purchase, Type) select 1, 'M', 'Apple,Apple', 'Food,Food'
insert into #sample (ID, Sex, Purchase, Type) select 2, 'M', 'Pear,Barbie,Soap', 'Food,Toys,Cleaning'
select s.ID, s.Sex, Purchase = p.Item, Type = t.Item
from #sample s
cross apply DelimitedSplit8K(Purchase, ',') p
cross apply DelimitedSplit8K(Type, ',') t
where p.ItemNumber = t.ItemNumber
drop table #sample

EDIT: The original question as posted had the data as strings, with pipe characters as column delimiters and commas within the columns. The below solution works for that.
The question has since been edited to show that the input data is actually in columns, not as a single string.
I've left the solution here as an interesting version of the original question.
This is an interesting problem. I have a solution that works for a single row of your data. I dont know from the question if you are going to process it row by row, but I assume you will.
If so, this will work. I suspect there might be a better way using xml or without the temp tables, but in any case this is one solution.
declare #row varchar(1000); set #row='2 | F | Pear, Barbie, Soap | Food, Toys, Cleaning'
declare #v table(i int identity, val varchar(1000), subval varchar(100))
insert #v select value as val, subval from STRING_SPLIT(#row,'|')
cross apply (select value as subval from STRING_SPLIT(value,',') s) subval
declare #v2 table(col_num int, subval varchar(100), correlation int)
insert #v2
select col_num, subval,
DENSE_RANK() over (partition by v.val order by i) as correlation
from #v v
join (
select val, row_number()over (order by fst) as Col_Num
from (select val, min(i) as fst from #v group by val) colnum
) c on c.val=v.val
order by i
select col1.subval as ID, col2.subval as Sex, col3.subval as Purchase, col4.subval as Type
from #v2 col1
join #v2 col2 on col2.col_num=2
join #v2 col3 on col3.col_num=3
join #v2 col4 on col4.col_num=4 and col4.correlation=col3.correlation
where col1.col_num=1
Result is:
ID Sex Purchase Type
2 F Pear Food
2 F Barbie Toys
2 F Soap Cleaning

SQL Unpivot / Coalesce multiple columns into one column based on values

I am trying to unpivot / coalesce multiple columns into one value, based on values in the target columns.
Given the following sample data:
CREATE TABLE SourceData (
Id INT
,Column1Text VARCHAR(10)
,Column1Value INT
,Column2Text VARCHAR(10)
,Column2Value INT
,Column3Text VARCHAR(10)
,Column3Value INT
)
INSERT INTO SourceData
SELECT 1, NULL, NULL, NULL, NULL, NULL, NULL UNION
SELECT 2, 'Text', 1, NULL, NULL, NULL, NULL UNION
SELECT 3, 'Text', 2, 'Text 2', 1, NULL, NULL UNION
SELECT 4, NULL, NULL, NULL, 1, NULL, NULL
I am trying to produce the following result set:
Id ColumnText
----------- ----------
1 NULL
2 Text
3 Text 2
4 NULL
Where ColumnXText column values become one "ColumnText" value per row, based on the following criteria:
If all ColumnX columns are NULL, then ColumnText = NULL
If a ColumnXValue value is "1" and the ColumnXText IS NULL, then
ColumnText = NULL
If a ColumnXValue value is "1" and the ColumnXText IS NOT NULL, then
ColumnText = ColumnXText.
There are no records with more than one ColumnXValue of "1".
What I'd tried is in this SQL Fiddle # http://sqlfiddle.com/#!6/f2e18/2
I'd tried (shown in SQL fiddle):
Unpivioting with CROSS / OUTER APPLY. I fell down on this approach because I was not able to get WHERE conditions to produce the expected results.
I'd also tried using UNPIVOT, but had no luck.
I was thinking of a brute-force approach that did not seem to be correct. The real source table has 44MM rows. I do not control the schema of the source table.
Please let me know if there's a simpler approach than a brute-force tangle of CASE WHENs. Thank you.

I don't think there is much mileage in trying to be too clever with this
SELECT
Id,
CASE
WHEN COLUMN1VALUE = 1 THEN COLUMN1TEXT
WHEN COLUMN2VALUE = 1 THEN COLUMN2TEXT
WHEN COLUMN3VALUE = 1 THEN COLUMN3TEXT
End as ColumnText
From
Sourcedata
I did have them in 321 order, but considered that the right answer might be hit sooner if the checking is done in 123 order instead (fewer checks, if there are 44million rows, might be significant)

Considering you have 44 million rows, you really don't want to experiment to much to join table on itself with apply or something like that. You need just go through it once, and that's best with simple CASE, what you call "brute-force" approach:
SELECT
Id
, CASE WHEN Column1Value = 1 THEN Column1Text
WHEN Column2Value = 1 THEN Column2Text
WHEN Column3Value = 1 THEN Column3Text
END AS ColumnText
FROM SourceData
But, if you really want to get fancy and write something without case, you could use UNION to merge different columns into one, and then join on it:
wITH CTE_Union AS
(
SELECT Id, Column1Text AS ColumnText, Column1Value AS ColumnValue
FROM SourceData
UNION ALL
SELECT Id, Column2Text, Column2Value FROM SourceData
UNION ALL
SELECT Id, Column3Text, Column3Value FROM SourceData
)
SELECT s.Id, u.ColumnText
FROM SourceData s
LEFT JOIN CTE_Union u ON s.Id = u.id and u.ColumnValue = 1
But I guarantee first approach will outperform this by a margin of 4 to 1

If you do not want to use a case expression, then you can use another outer apply() on a common table expression (or subquery/derived table) of your original unpivot with outer apply():
;with cte as (
select s.Id, oa.ColumnText, oa.ColumnValue
from sourcedata s
outer apply (values
(s.Column1Text, s.Column1Value)
, (s.Column2Text, s.Column2Value)
, (s.Column3Text, s.Column3Value)
) oa (ColumnText, ColumnValue)
)
select s.Id, x.ColumnText
from sourcedata s
outer apply (
select top 1 cte.ColumnText
from cte
where cte.Id = s.Id
and cte.ColumnValue = 1
) x
rextester demo: http://rextester.com/TMBR41346
returns:
+----+------------+
| Id | ColumnText |
+----+------------+
| 1 | NULL |
| 2 | Text |
| 3 | Text 2 |
| 4 | NULL |
+----+------------+

This will give you the first non-null text value in order. It seems as if this is what you are trying to accomplish.
select ID, Coalesce(Column3Text,Column2Text,Column1Text) ColumnText
from SourceData

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to split array data to next row in Postgres - sql

Related

Split column value into substrings and search for pattern in SQL

like clause on all columns

Query json dictionary data in SQL

SQL Server: Split values from columns with multiple values, into multiple rows [duplicate]

SQL Unpivot / Coalesce multiple columns into one column based on values

Categories

Resources