Getting Number of Common Values from 2 comma-seperated strings - sql

I have a table that contains comma-separated values in a column In Postgres.
ID PRODS
--------------------------------------
1 ,142,10,75,
2 ,142,87,63,
3 ,75,73,2,58,
4 ,142,2,
Now I want a query where I can give a comma-separated string and it will tell me the number of matches between the input string and the string present in the row.
For instance, for input value ',142,87,', I want the output like
ID PRODS No. of Match
------------------------------------------------------------------------
1 ,142,10,75, 1
2 ,142,87,63, 2
3 ,75,73,2,58, 0
4 ,142,2, 1

Try this:
SELECT
*,
ARRAY(
SELECT
*
FROM
unnest(string_to_array(trim(both ',' from prods), ','))
WHERE
unnest = ANY(string_to_array(',142,87,', ','))
)
FROM
prods_table;
Output is:
1 ,142,10,75, {142}
2 ,142,87,63, {142,87}
3 ,75,73,2,58, {}
4 ,142,2, {142}
Add the cardinality(anyarray) function to the last column to get just a number of matches.
And consider changing your database design.

Check This.
select T.*,
COALESCE(No_of_Match,'0')
from TT T Left join
(
select ID,count(ID) No_of_Match
from (
select ID,unnest(string_to_array(trim(t.prods, ','), ',')) A
from TT t)a
Where A in ('142','87')
group by ID
)B
On T.Id=b.id
Demo Here
OutPut

If you install the intarray extension, this gets quite easy:
select id, prods, cardinality(string_to_array(trim(prods, ','), ',')::int[] & array[142,87])
from bad_design;
Otherwise it's a bit more complicated:
select bd.id, bd.prods, m.matches
from bad_design bd
join lateral (
select bd.id, count(v.p) as matches
from unnest(string_to_array(trim(bd.prods, ','), ',')) as l(p)
left join (
values ('142'),('87') --<< these are your input values
) v(p) on l.p = v.p
group by bd.id
) m on m.id = bd.id
order by bd.id;
Online example: http://rextester.com/ZIYS97736
But you should really fix your data model.

with data as
(
select *,
unnest(string_to_array(trim(both ',' from prods), ',') ) as v
from myTable
),
counts as
(
select id, count(t) as c from data
left join
( select unnest(string_to_array(',142,87,', ',') ) as t) tmp on tmp.t = data.v
group by id
order by id
)
select t1.id, t1.prods, t2.c as "No. of Match"
from myTable t1
inner join counts t2 on t1.id = t2.id;

Related

Comparing Column Values and returning ERROR or OK

I'm needing to verify a source system with a destination system and ensure the values are matching between them. The problem is the source system is a total mess and is proving hard to validate.
I've got the following sample data where they should all be OK, but they're showing as ERROR. Does anyone know a way of doing a comparison that would result as an OK for all for the below?
CREATE TABLE #testdata (
ID INT
,ValueSource VARCHAR(800)
,ValueDestination VARCHAR(800)
,Value_Varchar_Check AS (
CASE
WHEN coalesce(ValueSource, '0') = coalesce(ValueDestination, '0')
THEN 'OK'
ELSE 'ERROR'
END
)
)
INSERT INTO #testdata (
ID
,ValueSource
,ValueDestination
)
SELECT 1
,'hepatitis c,other (specify)' 'hepatitis c, other (specify)'
UNION ALL
SELECT 2
,'lung problems / asthma,lung problems / asthma'
,'lung problems / asthma'
UNION ALL
SELECT 3
,'lung problems / asthma,diabetes'
,'diabetes, lung problems / asthma'
UNION ALL
SELECT 4
,'seizures/epilepsy,hepatitis c,seizures/epilepsy'
,'hepatitis c, seizures/epilepsy'
I don't think you can write this as a generated column as it is quite a tricky thing to compute. If you are using SQL Server 2016 or later, you can use STRING_SPLIT to convert the ValueSource and ValueDestination values into tables and then sort them alphabetically using a query like this:
SELECT DISTINCT ID, TRIM(value) AS value,
DENSE_RANK() OVER (PARTITION BY ID ORDER BY TRIM(value)) AS rn
FROM testdata
CROSS APPLY STRING_SPLIT(ValueSource, ',')
For ValueSource, this produces:
ID value rn
1 hepatitis c 1
1 other (specify) 2
2 lung problems / asthma 1
3 diabetes 1
3 lung problems / asthma 2
4 hepatitis c 1
4 seizures/epilepsy 2
You can then FULL OUTER JOIN those two tables on ID, value and rn, and detect an error when there are null values from either side (since that implies that the values for a given ID and rn don't match):
WITH t1 AS (
SELECT DISTINCT ID, TRIM(value) AS value,
DENSE_RANK() OVER (PARTITION BY ID ORDER BY TRIM(value)) AS rn
FROM testdata
CROSS APPLY STRING_SPLIT(ValueSource, ',')
),
t2 AS (
SELECT DISTINCT ID, TRIM(value) AS value,
DENSE_RANK() OVER (PARTITION BY ID ORDER BY TRIM(value)) AS rn
FROM testdata
CROSS APPLY STRING_SPLIT(ValueDestination, ',')
)
SELECT COALESCE(t1.ID, t2.ID) AS ID,
CASE WHEN COUNT(CASE WHEN t1.value IS NULL OR t2.value IS NULL THEN 1 END) > 0 THEN 'Error'
ELSE 'OK'
END AS Status
FROM t1
FULL OUTER JOIN t2 ON t2.ID = t1.ID AND t2.rn = t1.rn AND t2.value = t1.value
GROUP BY COALESCE(t1.ID, t2.ID)
Output (for your sample data):
ID Status
1 OK
2 OK
3 OK
4 OK
Demo on SQLFiddle
You can then use the entire query above as a CTE (call it t3) to update your original table:
UPDATE t
SET t.Value_Varchar_Check = t3.Status
FROM testdata t
JOIN t3 ON t.ID = t3.ID
Output:
ID ValueSource ValueDestination Value_Varchar_Check
1 hepatitis c,other (specify) hepatitis c, other (specify) OK
2 lung problems / asthma,lung problems / asthma lung problems / asthma OK
3 lung problems / asthma,diabetes diabetes, lung problems / asthma OK
4 seizures/epilepsy,hepatitis c,seizures/epilepsy hepatitis c, seizures/epilepsy OK
Demo on SQLFiddle

How to avoid duplicates in the STRING_AGG function SQL Server

I was testing a query in SQL in which I need to concatenate values ​​in the form of a comma-separated list, and it works, I just have the problem of duplicate values.
This is the query:
SELECT t0.id_marcas AS CodMarca,
t0.nombremarcas AS NombreMarca,
t0.imagenmarcas,
(SELECT String_agg((t2.name), ', ')
FROM exlcartu_devcit.store_to_cuisine t1
INNER JOIN exlcartu_devcit.cuisine t2
ON t1.cuisine_id = t2.cuisine_id
WHERE store_id = (SELECT TOP 1 store_id
FROM exlcartu_devcit.store
WHERE id_marcas = t0.id_marcas
AND status = 1)) AS Descripcion,
t0.logo,
t0.imagen,
(SELECT TOP 1 preparing_time
FROM exlcartu_devcit.store
WHERE id_marcas = t0.id_marcas
AND status = 1) AS Tiempo,
t0.orden,
(SELECT TOP 1 Avg(minimum_amount)
FROM exlcartu_devcit.store_delivery_zone
WHERE id_marcas = t0.id_marcas) AS MontoMinimo
FROM exlcartu_devcit.[marcas] t0
I thought the solution could be just adding a DISTINCT to the query to avoid repeated values ​​in this way ...
(SELECT STRING_AGG(DISTINCT (t2.name), ', ') AS Descripcion
But apparently the STRING_AGG() function does not support it, any idea how to avoid repeated values?
Simplest way is just select from select, like this:
with dups as (select 1 as one union all select 1 as one)
select string_agg(one, ', ') from (select distinct one from dups) q;
vs original
with dups as (select 1 as one union all select 1 as one)
select string_agg(one, ', ') from dups;

For Pivot Columns need to apply group by

I want to apply the group by function based on columns generated by the PIVOT function.
In the query below, I wanted to apply Sum(TC.MB_Usage) so I could use group by at the end. However I am unable to apply the group by function to columns that are generated by the PIVOT function (SITES).
Comments are mentioned in BOLD
With TC as(
select /*+ NO_MERGE(T1) NO_MERGE(T2)
PARALLEL(T1,4) PARALLEL(T2,4) */
T1.* from (
select /*+
DRIVING_SITE(A) NO_MERGE(A) NO_MERGE(fips) NO_MERGE(fw) PARALLEL(A,4)
PARALLEL(fips,4) PARALLEL(fw,4) */
distinct
resource_value,
resource_type,
L3_IMSI as IMSI
,L9_Calling_Number as MDN
,L9_ECID as Curr_ECID
,trunc(START_TIME) as Usage_Date
,trim(TO_CHAR (TO_NUMBER (SUBSTR (L9_ECID,1,LENGTH(L9_ECID) - 2 ),'XXXXXXXXX'),'000000')) as enodeb
,substr(trim(TO_CHAR (TO_NUMBER (SUBSTR (L9_ECID,1,LENGTH(L9_ECID) - 2 ),'XXXXXXXXX'),'000000')),1,2) as fips_cd
,STATE_CODE
,STATE_NAME
,Sum(L3_ROUNDED_UNIT/1024) as MB_Usage
from RT_ET A
INNER JOIN
FIPS_STATEfips
ON trim(to_char(fips.FIPS_CODE,'00')) = substr(trim(TO_CHAR (TO_NUMBER (SUBSTR (A.L9_ECID,1,LENGTH(A.L9_ECID) - 2 ),'XXXXXXXXX'),'000000')),1,2)
INNER JOIN
DVC_ADDRfw
ON trim(L3_IMSI) = trim(to_char(FW.IMSI))
Where A.L9_ECID not in (' ','0') AND A.L3_IMSI not in (' ','0')
AND trunc(A.start_time) > trunc(sysdate-8)
AND trunc(start_time) > trunc(FW.ODS_INSERT_DATE)
group by
resource_value
,resource_type
,L3_IMSI
,L9_Calling_Number
,L9_ECID
,trunc(START_TIME)
,trim(TO_CHAR (TO_NUMBER (SUBSTR (L9_ECID,1,LENGTH(L9_ECID) - 2 ),'XXXXXXXXX'),'000000'))
,substr(trim(TO_CHAR (TO_NUMBER (SUBSTR (L9_ECID,1,LENGTH(L9_ECID) - 2 ),'XXXXXXXXX'),'000000')),1,2)
,STATE_CODE
,STATE_NAME
) T1
where NOT EXISTS (
SELECT
1 FROM DVC_ENODEB T2
WHERE T1.IMSI = trim(to_char(T2.IMSI))
AND T1.MDN = T2.MDN
AND T1.ENODEB = T2.ENODEB
)
)
select
distinct
SITES.*,
TC.resource_value,
TC.resource_type,
TC.IMSI,
TC.MDN,
TC.Curr_ECID,
TC.Usage_Date,
TC.enodeb,
TC.fips_cd,
TC.STATE_CODE,
TC.STATE_NAME,
**TC.MB_Usage -- Need to apply Sum(TC.MB_Usage)**
from
(
with
endb as
(select
e.IMSI, e.MDN,e.site_id, E.ENODEB,
ROW_NUMBER ()OVER (PARTITION BY e.IMSI||e.MDN
ORDER BY e.IMSI||e.MDN ) row_id
from
FIXED_WIRELESS_DVC_ADDR_ENODEB e
)
select *
from (select IMSI,MDN,IMSI||MDN as IMSI_MDN,site_id,row_id
from endb
)
pivot (max(site_id) siteid for row_id in (1,2,3,4,5,6,7,8,9,10,11,12,13)) ) SITES
INNER JOIN TC
ON ((TC.IMSI = trim(to_char(SITES.IMSI))) AND TC.MDN = SITES.MDN)
**Unable to apply Group by
based on below PIVOT columns (SITES.*), if I use SUM for MD_USAGE
Group BY
TC.resource_value,
TC.resource_type,
TC.IMSI,
TC.MDN,
TC.Curr_ECID,
TC.Usage_Date,
TC.enodeb,
TC.fips_cd,
TC.STATE_CODE,
TC.STATE_NAME,
SITES.***
You will need to specify the individual columns in your GROUP BY.
It looks like your SITES alias will have the columns IMSI, MDN, IMSI_MDN, and then 1,2,3,4,5,6,7,8,9,10,11,12,13 from the PIVOT clause.
So try listing those individually, and for the numeric ones put them in quotes:
GROUP BY
...
SITES."1", SITES."2", etc.

Get every combination of sort order and value of a csv

If I have a string with numbers separated by commas, like this:
Declare #string varchar(20) = '123,456,789'
And would like to return every possible combination + sort order of the values by doing this:
Select Combination FROM dbo.GetAllCombinations(#string)
Which would in result return this:
123
456
789
123,456
456,123
123,789
789,123
456,789
789,456
123,456,789
123,789,456
456,789,123
456,123,789
789,456,123
789,123,456
As you can see not only is every combination returned, but also each combination+sort order as well. The example shows only 3 values separated by commas, but should parse any amount--Recursive.
The logic needed would be somewhere in the realm of using a WITH CUBE statement, but the problem with using WITH CUBE (in a table structure instead of CSV of course), is that it won't shuffle the order of the values 123,456 456,123 etc., and will only provide each combination, which is only half of the battle.
Currently I have no idea what to try. If someone can provide some assistance it would be appreciated.
I use a User Defined Table-valued Function called split_delimiter that takes 2 values: the #delimited_string and the #delimiter_type.
CREATE FUNCTION [dbo].[split_delimiter](#delimited_string VARCHAR(8000), #delimiter_type CHAR(1))
RETURNS TABLE AS
RETURN
WITH cte10(num) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,cte100(num) AS
(
SELECT 1
FROM cte10 t1, cte10 t2
)
,cte10000(num) AS
(
SELECT 1
FROM cte100 t1, cte100 t2
)
,cte1(num) AS
(
SELECT TOP (ISNULL(DATALENGTH(#delimited_string),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM cte10000
)
,cte2(num) AS
(
SELECT 1
UNION ALL
SELECT t.num+1
FROM cte1 t
WHERE SUBSTRING(#delimited_string,t.num,1) = #delimiter_type
)
,cte3(num,[len]) AS
(
SELECT t.num
,ISNULL(NULLIF(CHARINDEX(#delimiter_type,#delimited_string,t.num),0)-t.num,8000)
FROM cte2 t
)
SELECT delimited_item_num = ROW_NUMBER() OVER(ORDER BY t.num)
,delimited_value = SUBSTRING(#delimited_string, t.num, t.[len])
FROM cte3 t;
Using that I was able to parse the CSV to a table and join it back to itself multiple times and use WITH ROLLUP to get the permutations you are looking for.
WITH Numbers as
(
SELECT delimited_value
FROM dbo.split_delimiter('123,456,789',',')
)
SELECT CAST(Nums1.delimited_value AS VARCHAR)
,ISNULL(CAST(Nums2.delimited_value AS VARCHAR),'')
,ISNULL(CAST(Nums3.delimited_value AS VARCHAR),'')
,CAST(Nums4.delimited_value AS VARCHAR)
FROM Numbers as Nums1
LEFT JOIN Numbers as Nums2
ON Nums2.delimited_value not in (Nums1.delimited_value)
LEFT JOIN Numbers as Nums3
ON Nums3.delimited_value not in (Nums1.delimited_value, Nums2.delimited_value)
LEFT JOIN Numbers as Nums4
ON Nums4.delimited_value not in (Nums1.delimited_value, Nums2.delimited_value, Nums3.delimited_value)
GROUP BY CAST(Nums1.delimited_value AS VARCHAR)
,ISNULL(CAST(Nums2.delimited_value AS VARCHAR),'')
,ISNULL(CAST(Nums3.delimited_value AS VARCHAR),'')
,CAST(Nums4.delimited_value AS VARCHAR) WITH ROLLUP
If you will potentially have more than 3 or 4, you'll want to expand your code accordingly.

SQL query for finding first missing sequence string (prefix+no)

T-SQL query for finding first missing sequence string (prefix+no)
Sequence can have a prefix + a continuing no.
ex sequence will be
ID
-------
AUTO_500
AUTO_501
AUTO_502
AUTO_504
AUTO_505
AUTO_506
AUTO_507
AUTO_508
So above the missing sequence is AUTO_503 or if there is no missing sequence then it must return next sequence.
Also starting no is to specified ex. 500 in this case and prefix can be null i.e. no prefix only numbers as sequence.
You could LEFT JOIN the id numbers on shifted(+1) values to find gaps in sequential order:
SELECT
MIN(a.offsetnum) AS first_missing_num
FROM
(
SELECT 500 AS offsetnum
UNION
SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) + 1
FROM tbl
) a
LEFT JOIN
(SELECT CAST(REPLACE(id, 'AUTO_', '') AS INT) AS idnum FROM tbl) b ON a.offsetnum = b.idnum
WHERE
a.offsetnum >= 500 AND b.idnum IS NULL
SQLFiddle Demo
Using a recursive CTE to dynamically generate the sequence between the min and max of the ID Numbers maybe over complicated things a bit but it seems to work -
LIVE ON FIDDLE
CREATE TABLE tbl (
id VARCHAR(55)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
;WITH
data_cte(id)AS
(SELECT [id] = CAST(REPLACE(id, 'AUTO_', '') AS INT) FROM tbl)
,maxmin_cte(minId, maxId)AS
(SELECT [minId] = min(id),[maxId] = max(id) FROM data_cte)
,recursive_cte(n) AS
(
SELECT [minId] n from maxmin_cte
UNION ALL
SELECT (1 + n) n FROM recursive_cte WHERE n < (SELECT [maxId] from maxmin_cte)
)
SELECT x.n
FROM
recursive_cte x
LEFT OUTER JOIN data_cte y ON
x.n = y.id
WHERE y.id IS NULL
Check this solution.Here you just need to add identity column.
CREATE TABLE tbl (
id VARCHAR(55),
idn int identity(0,1)
);
INSERT INTO tbl VALUES
('AUTO_500'),
('AUTO_501'),
('AUTO_502'),
('AUTO_504'),
('AUTO_505'),
('AUTO_506'),
('AUTO_507'),
('AUTO_508'),
('509');
SELECT min(idn+500) FROM tbl where 'AUTO_'+cast((idn+500) as varchar)<>id
try this:
with cte as(
select cast(REPLACE(id,'AUTO_','') as int)-500+1 [diff],ROW_NUMBER()
over(order by cast(REPLACE(id,'AUTO_','') as int)) [rnk] from tbl)
select top 1 'AUTO_'+cast(500+rnk as varchar(50)) [ID] from cte
where [diff]=[rnk]
order by rnk desc
SQL FIddle Demo
Had a similar situation, where we have R_Cds that were like this R01005
;with Active_R_CD (R_CD)
As
(
Select Distinct Cast(Replace(R_CD,'R', ' ') as Int)
from table
where stat = 1)
select Arc.R_CD + 1 as 'Gaps in R Code'
from Active_R_CD as Arc
left outer join Active_R_CD as r on ARC.R_CD + 1 = R.R_CD
where R.R_CD is null
order by 1