tSQL UNPIVOT of comma concatenated column into multiple rows - sql

I have a table that has a value column. The value could be one value or it could be multiple values separated with a comma:
id | assess_id | question_key | item_value
---+-----------+--------------+-----------
1 | 859 | Cust_A_1 | 1,5
2 | 859 | Cust_B_1 | 2
I need to unpivot the data based on the item_value to look like this:
id | assess_id | question_key | item_value
---+-----------+--------------+-----------
1 | 859 | Cust_A_1 | 1
1 | 859 | Cust_A_1 | 5
2 | 859 | Cust_B_1 | 2
How does one do that in tSQL on SQL Server 2012?

We have a user defined function that we use for stuff like this that we called "split_delimiter":
CREATE FUNCTION [dbo].[split_delimiter](#delimited_string VARCHAR(8000), #delimiter_type CHAR(1))
RETURNS TABLE AS
RETURN
WITH cte10(num) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,cte100(num) AS
(
SELECT 1
FROM cte10 t1, cte10 t2
)
,cte10000(num) AS
(
SELECT 1
FROM cte100 t1, cte100 t2
)
,cte1(num) AS
(
SELECT TOP (ISNULL(DATALENGTH(#delimited_string),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM cte10000
)
,cte2(num) AS
(
SELECT 1
UNION ALL
SELECT t.num+1
FROM cte1 t
WHERE SUBSTRING(#delimited_string,t.num,1) = #delimiter_type
)
,cte3(num,[len]) AS
(
SELECT t.num
,ISNULL(NULLIF(CHARINDEX(#delimiter_type,#delimited_string,t.num),0)-t.num,8000)
FROM cte2 t
)
SELECT delimited_item_num = ROW_NUMBER() OVER(ORDER BY t.num)
,delimited_value = SUBSTRING(#delimited_string, t.num, t.[len])
FROM cte3 t;
GO
It will take a varchar value up to 8000 characters and will return a table with the delimited elements broken into rows. In your example, you'll want to use an outer apply to turn those delimited values into separate rows:
SELECT my_table.id, my_table.assess_id, question_key, my_table.delimited_items.item_value
FROM my_table
OUTER APPLY(
SELECT delimited_value AS item_value
FROM my_database.dbo.split_delimiter(my_table.item_value, ',')
) AS delimited_items

Related

How can I count the total no of rows which is not containing some value in array field? It should include null values as well

| names |
| -----------------------------------|
| null |
| null |
| [{name:'test'},{name:'test1'}] |
| [{name:'test'},{name:'test1'}] |
| [{name:'test1'},{name:'test2'}] |
I want to count the no of rows which does not have the value 'test' in the name key.
Here it should give answer as 3 (Row no 1, 2 and 5th row) because all these row do not contain the value 'test'.
Use below approach
select count(*)
from your_table
where 0 = ifnull(array_length(regexp_extract_all(names, r"\b(name:'test')")), 0)
you can test it with below data (that resemble whatever you presented in your question)
with your_table as (
select null names union all
select null union all
select "[{name:'test'},{name:'test1'}]" union all
select "[{name:'test'},{name:'test1'}]" union all
select "[{name:'test1'},{name:'test2'}]"
)
with output
Below approach will work,
with your_table as (
select null names union all
select null union all
select [('name','test'),('name','test1')] union all
select [('name','test'),('name','test1')] union all
select [('name','test1'),('name','test2')]
)
SELECT COUNT(*) as count FROM your_table
WHERE NOT EXISTS (
SELECT 1
FROM UNNEST(your_table.names) AS names
WHERE names IN (('name','test'))
)

Redshift, count items in column seperated with comma

I have data that a column saved a group of number
| user | col |
| ------- | ------- |
| 1 | 3,7,11,25,44,56,77,32,34,55 |
| 2 | 3,7,25,44,37,89,56,99,103,13 |
| 1 | 3,10,11,25,44,56,33,32,34,55 |
I know I can split part the columns and count but do we have any different way to count the numbers?
|user| new-col | count|
| ------- | ------- |
| 1 | 3 | 2 |
| 1 | 7 | 1 |
| 1 | 11 | 2 |
| 1 | 25 | 2 |
| 1 | 44 |2 |
| 1 | 56 |1 |
| 1 | 77 | 1 |
| 1 | 32 | 2 |
You could use a union query along with SPLIT_PART:
WITH cte AS (
SELECT user, SPLIT_PART(col, ',', 1) AS val FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 2) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 3) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 4) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 5) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 6) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 7) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 8) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 9) FROM yourTable UNION ALL
SELECT user, SPLIT_PART(col, ',', 10) FROM yourTable
)
SELECT
user,
val,
COUNT(*) AS cnt
FROM cte
GROUP BY
user,
val;
But note that all we are doing above in the CTE is really just normalizing your data so that each user-value relationship occupies a separate record. Ideally you should change your table design and move away from storing CSV.
If you instead want just the count of numbers per user, then use:
SELECT
user,
COUNT(*) AS cnt
FROM cte
GROUP BY
user;
Query.
with t as (
select 1 as user, '3,7,11,25,44,56,77,32,34,55' as col
union all
select 2 as user, '3,7,25,44,37,89,56,99,103,13' as col
union all
select 1 as user, '3,10,11,25,44,56,33,32,34,55' as col
)
select a.user, a.val, count(*) as cnt
from (
select a.user
, SPLIT_PART(a.col, ',', b.no) as val
from t a
cross join (
select * from generate_series(1,10) as no
) b
) a
group by a.user, a.val
order by a.user, a.val
Count the number of commas in the string using REGEXP_COUNT and add 1.
CREATE TEMP TABLE examples (
user_id INT
, value_list VARCHAR
);
INSERT INTO examples
SELECT 1 , '3,7,11,25,44,56,77,32,34,55'
UNION ALL SELECT 2 , '3,7,25,44,37,89,56,99,103,13'
UNION ALL SELECT 1 , '3,10,11,25,44,56,33,32,34,55'
;
SELECT user_id
, SUM(REGEXP_COUNT(value_list,',')+1) value_count
FROM examples
GROUP BY 1
;
Output
user_id | value_count
---------+-------------
1 | 20
2 | 10
This answers the original version of the question.
You can count the number of comma-delimited values with:
select (case when col = '' then 0
else length(col) - length(replace(col, ',', '')) + 1
end) as values_count
from t;
That said, you should fix your data model so you are not storing multiple values in a column. It is particularly irksome that you are storing numbers as strings, as well. You want a junction/association table.

SQL splitfunction reverse

I am trying to make an SQL splitfunction which works reverse. Means
Input: 'Test1,Test2,Test3,Test4'
And the output should be:
| ID | Value |
| 1 | Test4 |
| 2 | Test3 |
| 3 | Test2 |
| 4 | Test1 |
I found a forwardworking function but I don't know what to change to make it work reversed.
I have tried some stuff but it doesn't work.
Here is the original
CREATE FUNCTION [dbo].[SplitString]
(
#InputString VARCHAR(8000),
#Delimiter CHAR(1)
)
RETURNS TABLE
AS
RETURN
(
WITH Split(StartPos,Endpos)
AS(
SELECT 0 AS StartPos, CHARINDEX(#Delimiter,#InputString) AS Endpos
UNION ALL
SELECT Endpos+1, CHARINDEX(#Delimiter,#InputString,Endpos+1)
FROM Split
WHERE Endpos > 0
)
SELECT 'Id' = ROW_NUMBER() OVER (ORDER BY (SELECT 1)),
'Value' = SUBSTRING(#InputString,StartPos
,COALESCE(NULLIF(Endpos,0)
,LEN(#InputString)+1)-StartPos)
FROM Split
)
GO
with
SELECT ID, Value
FROM dbo.SplitString('Test1,Test2,Test3,Test4', ',');
you will get the output.
Select * from [dbo].[udf-Str-Parse-8K-Rev]('Test1,Test2,Test3,Test4',',')
Returns
RetSeq RetVal
1 Test4
2 Test3
3 Test2
4 Test1
The UDF
CREATE FUNCTION [dbo].[udf-Str-Parse-8K-Rev] (#String varchar(max),#Delimiter varchar(25))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(#String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(#Delimiter) From cte2 t Where Substring(#String,t.N,DataLength(#Delimiter)) = #Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(#Delimiter,#String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N Desc)
,RetVal = LTrim(RTrim(Substring(#String, A.N, A.L)))
From cte4 A
);
--Orginal Source http://www.sqlservercentral.com/articles/Tally+Table/72993/
--Select * from [dbo].[udf-Str-Parse-8K-Rev]('Dog,Cat,House,Car',',')
Notice the Order By in the Final Select
you can it also do it with a query like this:
SELECT
cnt.n AS ID
, SUBSTRING_INDEX(SUBSTRING_INDEX('Test1,Test2,Test3,Test4', ',', cnt.n),',',-1) AS `VALUE` FROM
( SELECT 1 AS n UNION SELECT 2 UNION SELECT 3 UNION SELECT 4
UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9
) cnt
WHERE
cnt.n <= CHARACTER_LENGTH('Test1,Test2,Test3,Test4')
- CHARACTER_LENGTH(REPLACE('Test1,Test2,Test3,Test4',',',''))+1;
sample
MariaDB [(none)]> SELECT
-> cnt.n AS ID
-> , SUBSTRING_INDEX(SUBSTRING_INDEX('Test1,Test2,Test3,Test4', ',', cnt.n),',',-1) AS `VALUE` FROM
-> ( SELECT 1 AS n UNION SELECT 2 UNION SELECT 3 UNION SELECT 4
-> UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9
-> ) cnt
-> WHERE
-> cnt.n <= CHARACTER_LENGTH('Test1,Test2,Test3,Test4')
-> - CHARACTER_LENGTH(REPLACE('Test1,Test2,Test3,Test4',',',''))+1;
+----+-------+
| ID | VALUE |
+----+-------+
| 1 | Test1 |
| 2 | Test2 |
| 3 | Test3 |
| 4 | Test4 |
+----+-------+
4 rows in set (0.00 sec)
MariaDB [(none)]>

Using Case in a select statement

Consider the following table
create table temp (id int, attribute varchar(25), value varchar(25))
And values into the table
insert into temp select 100, 'First', 234
insert into temp select 100, 'Second', 512
insert into temp select 100, 'Third', 320
insert into temp select 101, 'Second', 512
insert into temp select 101, 'Third', 320
I have to deduce a column EndResult which is dependent on 'attribute' column. For each id, I have to parse through attribute values in the order
First, Second, Third and choose the very 1st value which is available i.e. for id = 100, EndResult should be 234 for the 1st three records.
Expected result:
| id | EndResult |
|-----|-----------|
| 100 | 234 |
| 100 | 234 |
| 100 | 234 |
| 101 | 512 |
| 101 | 512 |
I tried with the following query in vain:
select id, case when isnull(attribute,'') = 'First'
then value
when isnull(attribute,'') = 'Second'
then value
when isnull(attribute,'') = 'Third'
then value
else '' end as EndResult
from
temp
Result
| id | EndResult |
|-----|-----------|
| 100 | 234 |
| 100 | 512 |
| 100 | 320 |
| 101 | 512 |
| 101 | 320 |
Please suggest if there's a way to get the expected result.
You can use analytical function like dense_rank to generate a numbering, and then select those rows that have the number '1':
select
x.id,
x.attribute,
x.value
from
(select
t.id,
t.attribute,
t.value,
dense_rank() over (partition by t.id order by t.attribute) as priority
from
Temp t) x
where
x.priority = 1
In your case, you can conveniently order by t.attribute, since their alphabetical order happens to be the right order. In other situations you could convert the attribute to a number using a case, like:
order by
case t.attribute
when 'One' then 1
when 'Two' then 2
when 'Three' then 3
end
In case the attribute column have different values which are not in alphabetical order as is the case above you can write as:
with cte as
(
select id,
attribute,
value,
case attribute when 'First' then 1
when 'Second' then 2
when 'Third' then 3 end as seq_no
from temp
)
, cte2 as
(
select id,
attribute,
value,
row_number() over ( partition by id order by seq_no asc) as rownum
from cte
)
select T.id,C.value as EndResult
from temp T
join cte2 C on T.id = C.id and C.rownum = 1
DEMO
Here is how you can achieve this using ROW_NUMBER():
WITH t
AS (
SELECT *
,ROW_NUMBER() OVER (
PARTITION BY id ORDER BY (CASE attribute WHEN 'First' THEN 1
WHEN 'Second' THEN 2
WHEN 'Third' THEN 3
ELSE 0 END)
) rownum
FROM TEMP
)
SELECT id
,(
SELECT value
FROM t t1
WHERE t1.id = t.id
AND rownum = 1
) end_result
FROM t;
For testing purpose, please see SQL Fiddle demo here:
SQL Fiddle Example
keep it simple
;with cte as
(
select row_number() over (partition by id order by (select 1)) row_num, id, value
from temp
)
select t1.id, t2.value
from temp t1
left join cte t2
on t1.Id = t2.id
where t2.row_num = 1
Result
id value
100 234
100 234
100 234
101 512
101 512

How to comapre two columns of a table in sql?

In a table there are two columns:
-----------
| A | B |
-----------
| 1 | 5 |
| 2 | 1 |
| 3 | 2 |
| 4 | 1 |
-----------
Want a table where if A=B then
-------------------
|Match | notMatch|
-------------------
| 1 | 5 |
| 2 | 3 |
| Null | 4 |
-------------------
How can i do this?
I tried something which shows the Matched part
select distinct C.A as A from Table c inner join Table d on c.A=d.B
Try this:
;WITH TempTable(A, B) AS(
SELECT 1, 5 UNION ALL
SELECT 2, 1 UNION ALL
SELECT 3, 2 UNION ALL
SELECT 4, 1
)
,CTE(Val) AS(
SELECT A FROM TempTable UNION ALL
SELECT B FROM TempTable
)
,Match AS(
SELECT
Rn = ROW_NUMBER() OVER(ORDER BY Val),
Val
FROM CTE c
GROUP BY Val
HAVING COUNT(Val) > 1
)
,NotMatch AS(
SELECT
Rn = ROW_NUMBER() OVER(ORDER BY Val),
Val
FROM CTE c
GROUP BY Val
HAVING COUNT(Val) = 1
)
SELECT
Match = m.Val,
NotMatch= n.Val
FROM Match m
FULL JOIN NotMatch n
ON n.Rn = m.Rn
Try with EXCEPT, MINUS and INTERSECT Statements.
like this:
SELECT A FROM TABLE1 INTERSECT SELECT B FROM TABLE1;
You might want this:
SELECT DISTINCT
C.A as A
FROM
Table c
LEFT OUTER JOIN
Table d
ON
c.A=d.B
WHERE
d.ID IS NULL
Please Note that I use d.ID as an example because I don't see your schema. An alternate is to explicitly state all d.columns IS NULL in WHERE clause.
Your requirement is kind of - let's call it - interesting. Here is a way to solve it using pivot. Personally I would have chosen a different table structure and another way to select data:
Test data:
DECLARE #t table(A TINYINT, B TINYINT)
INSERT #t values
(1,5),(2,1),
(3,2),(4,1)
Query:
;WITH B AS
(
( SELECT A FROM #t
EXCEPT
SELECT B FROM #t)
UNION ALL
( SELECT B FROM #t
EXCEPT
SELECT A FROM #t)
), A AS
(
SELECT A val
FROM #t
INTERSECT
SELECT B
FROM #t
), combine as
(
SELECT val, 'A' col, row_number() over (order by (select 1)) rn FROM A
UNION ALL
SELECT A, 'B' col, row_number() over (order by (select 1)) rn
FROM B
)
SELECT [A], [B]
FROM combine
PIVOT (MAX(val) FOR [col] IN ([A], [B])) AS pvt
Result:
A B
1 3
2 4
NULL 5