ORACLE SQL Query Table using criteria from other table - sql

TABLEA contains the data, while TABLEB contains the search criteria
Here is a SQL Fiddle with the data
Tables
TABLEA
visited_states_time
AL= Alabama,2, AK=Alaska,5
AR=Arkansas,6
AZ=Arizona,10
CA=California, 10,CT=Connecticut,20
TABLEB
CRITERIA
AL
HI
CA
CT
AK
Desired Result
visited_states ................................... total_time_spent
AL= Alabama, AK=Alaska ............................ 7
CA=California, CT=Connecticut................... 30

That's a terrible data model. also you didn't say the condition for tableb. if any state matches, or if all?
as we need to split the rows up (to sum()) and then recombine them you can use:
SQL> with v as (select rownum r,
2 ','||visited_states_time||',' visited_states_time,
3 length(
4 regexp_replace(visited_states_time, '[^,]', '')
5 )+1 fields
6 from tablea)
7 select trim(both ',' from visited_states_time) visited_states_time,
8 sum(total_time_spent) total_time_spent
9 from (select *
10 from v
11 model
12 partition by (r)
13 dimension by (0 as f)
14 measures (visited_states_time, cast('' as varchar2(2)) state,
15 0 as total_time_spent, fields)
16 rules (
17 state[for f from 0 to fields[0]-1 increment 2]
18 = trim(
19 substr(visited_states_time[0],
20 instr(visited_states_time[0], ',', 1, cv(f)+1)+1,
21 instr(visited_states_time[0], '=', 1, (cv(f)/2)+1)
22 - instr(visited_states_time[0], ',', 1, cv(f)+1)-1
23 )),
24 visited_states_time[any]= visited_states_time[0],
25 total_time_spent[any]
26 = substr(visited_states_time[0],
27 instr(visited_states_time[0], ',', 1, (cv(f)+2))+1,
28 instr(visited_states_time[0], ',', 1, (cv(f)+3))
29 - instr(visited_states_time[0], ',', 1, (cv(f)+2))-1
30 )
31 ))
32 where state in (select criteria from tableb)
33 group by visited_states_time;
VISITED_STATES_TIME TOTAL_TIME_SPENT
------------------------------------- ----------------
CA=California, 10,CT=Connecticut,20 30
AL=Alabama,2, AK=Alaska,5 7
but seriously, rewrite that data model to store them separately to start with.

Related

Oracle SQL display distinct none "standard alphanumeric caracters

I need to find the way to list all the characters used in the column in order to narrow down the "Approved" values within the insert template we are creating...
the idea is to allow all letters (only standard) without any dialect / country specific ones.
trying something like this... but need to have a list of the characters left over... like "$%()* etc.
SELECT * FROM mytable WHERE REGEXP_LIKE(column_1,^[a-zA-Z0-9-]+$)
To find the other characters, you could remove the ones you do expect and then see what is left:
SELECT REGEXP_REPLACE( column1, '[a-zA-Z0-9-]' ) AS other_characters
FROM mytable
WHERE REGEXP_REPLACE( column1, '[a-zA-Z0-9-]' ) IS NOT NULL
If you want to concatenate and remove duplicate characters:
WITH replace_expected ( str ) AS (
SELECT REGEXP_REPLACE( column1, '[a-zA-Z0-9-]' )
FROM mytable
WHERE REGEXP_REPLACE( column1, '[a-zA-Z0-9-]' ) IS NOT NULL
),
split_strings ( str, pos, ch ) AS (
SELECT str, 1, SUBSTR(str, 1, 1)
FROM replace_expected
UNION ALL
SELECT str, pos + 1, SUBSTR(str, pos + 1, 1)
FROM split_strings
WHERE pos < LENGTH(str)
)
SELECT LISTAGG(DISTINCT ch) WITHIN GROUP (ORDER BY ch) AS other_characters
FROM split_strings;
fiddle
Two steps:
Extract the special characters from all strings in the column and concatenate them into one long string (with possibly many characters appearing multifold in the string).
Use a recursive query to loop through the characters and return a string with distinct characters.
The query:
with one_row (str) as
(
select listagg(regexp_replace(column_1, '[a-zA-Z0-9\-]'))
from mytable
where regexp_like(column_1, '[^a-zA-Z0-9\-]')
)
select listagg(distinct substr(str, level, 1)) as c
from one_row
connect by level <= length(str);
create table dummy_data as
select rownum as id, dbms_random.string(opt, len) str
from (
select case round(dbms_random.value(0, 3))
when 0 then 'a'
when 1 then 'x'
else 'p'
end opt,
round(dbms_random.value(5, 60)) len
from dual connect by level <= 30 );
SELECT * FROM DUMMY_DATA;
ID STR
1 UMUUJ0R5VM1T3X10TDCNIWC3MQ5ELOB041YMNEJSLT
2 _t8 }LeZhjiMB"8/a'/~a
3 BLSE6XX6SL3M7W0DG3HH28SCHPSAT11ZH2E5DOSKEV3KW9
4 1]Mh58(l<Wa}{
5 :_QiWUkwp}V$}O
6 NC911A4SRN35CNXT2EU5H2GZ67IQQLKH
7 e"8,z$=Yvy5egvEH2KUkNoVjkitd9IMm0ZktsB i(bk4uU]c3;E
8 MgbpIsLZpWEcAghOUKOISA
9 7H02ASKO3CZRN4D5FUNPEU6YUZD
10 KbJ+QrI\l.th%>^f!Io%wshsVA%
11 PO9A47VU7AXI17XYD5VMSWW8E
12 1ILWL4V
13 FgubwibYBytNvmJHxUfG
14 ?[ngH?0!k.onN>mF(nrkO
15 86G0HP3
16 WXDBV3OBMVSDKQ59YT73G0II3U94
17 GP375CFIQPPN6216I5A7L54O
18 i\L<K,"d'ye 6s~_MB0O1 aC$q;T"EaqpZ^s\gIiYu&:%OnhVj]<a]CmOgqM
19 WxUEtr\II(97i7PQ-Z]yqd#&`#CQB0M"c0;{.by9qo#HT
20 IF5OP7KS9AXW91
21 HNcKwxXozXjTVwKeFZDLdmNOzFKKKq
22 4D8CINXIVT244RDDRZ5TSDQ4CRF4
23 3)oxevW-(~=+#cP[^g)##|1.TL-_N9O-Zdgj"cwJC'*NR; FtK)K
24 AndzeLIEPklDuTWWEBrKrdNKdXwMGeLauJkRzKpKHEGAsxlEXliwBTHdK
25 dlEX1tGFuU5\5+{5`R
26 /W0.{B&)ax&lWEE#OSw
27 CBKOVLKDFKC3EVR
28 :V#Lc.Z"8[O-)cAWUpMjc?j\Kj?xV#%`Yp [VkEV1
29 P9P047
30 W)S<fB`F;N_brMP
with
h (ok_chars) as (
select '0123456789' || 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' ||
'abcdefghijklmnopqrstuvwxyz'
from dual
)
select c, count(*) as cnt
from dummy_data cross join lateral (
select substr(str, level, 1) as c
from h
where instr(ok_chars, substr(str, level, 1)) = 0
connect by level <= length(str)
)
group by c;
C CNT
} 4
- 5
' 3
< 4
, 2
= 2
_ 5
( 5
^ 3
[ 4
" 7
/ 3
: 3
. 6
% 5
! 2
? 4
` 4
8
; 5
+ 3
> 2
) 6
| 1
* 1
~ 3
\ 6
& 4
# 5
] 5
{ 4
# 4
$ 3

split column value into multiple columns in oracle sql

I have data into table like this:
I want the output like this:
how we can achive this with orace sql statment.Kindly assist.
instr(comp,':')
yields the first colon occurrence
instr(comp,':',1,2)
yields the second colon occurrence, and so forth
After that, its just math with combinations of INSTR on : and |
For example
substr(comp,2,instr(comp,':')-2)
for the first column before the colon
substr(comp,instr(comp,':')+1,instr(comp,'|')-instr(comp,':')-1)
for the first element after the colon and before the bar
and so forth.
This is one option; not dynamic at all as you'll have to know how many items there are so that you could adjust the final query. Read comments within code.
SQL> with test (id, component) as
2 -- sample data
3 (select 1, '|TD-2-2720A-NVE-C:2|TD-2-2720A-TPM-C:2|TD-PREM-NLSAS-01-PR:480|TD-ONTAP-NLSAS-01-PR:480' from dual union all
4 select 2, '|TD-2-A220A-TPM-C:2|DD-FLASH-PREM-01-PR:115|DD-FLASH-ONTAP-01-PR:115' from dual union all
5 select 5, '|TD-2-2650A-NVE-C:2|TD-2-2650A-TPM-C:2' from dual
6 ),
7 temp as
8 -- split each component into rows, separated by the pipe character
9 (select id,
10 column_value cv,
11 regexp_substr(component, '[^|]+', 1, column_value) comp
12 from test cross join
13 table(cast(multiset(select level from dual
14 connect by level <= regexp_count(component, '\|')
15 ) as sys.odcinumberlist))
16 )
17 -- final result
18 select id,
19 max(case when cv = 1 then substr(comp, 1, instr(comp, ':') - 1) end) as comp_1,
20 max(case when cv = 1 then regexp_substr(comp, '\d+$') end) as cnt_1,
21 --
22 max(case when cv = 2 then substr(comp, 1, instr(comp, ':') - 1) end) as comp_2,
23 max(case when cv = 2 then regexp_substr(comp, '\d+$') end) as cnt_2,
24 --
25 max(case when cv = 3 then substr(comp, 1, instr(comp, ':') - 1) end) as comp_3,
26 max(case when cv = 3 then regexp_substr(comp, '\d+$') end) as cnt_3,
27 --
28 max(case when cv = 4 then substr(comp, 1, instr(comp, ':') - 1) end) as comp_4,
29 max(case when cv = 4 then regexp_substr(comp, '\d+$') end) as cnt_4
30 from temp
31 group by id
32 order by id;
ID COMP_1 CNT_1 COMP_2 CNT_2 COMP_3 CNT_3 COMP_4 CNT_4
--- -------------------- ----- -------------------- ----- -------------------- ----- -------------------- -----
1 TD-2-2720A-NVE-C 2 TD-2-2720A-TPM-C 2 TD-PREM-NLSAS-01-PR 480 TD-ONTAP-NLSAS-01-PR 480
2 TD-2-A220A-TPM-C 2 DD-FLASH-PREM-01-PR 115 DD-FLASH-ONTAP-01-PR 115
5 TD-2-2650A-NVE-C 2 TD-2-2650A-TPM-C 2
SQL>

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

Assign ID based on commonality/groups

I've put together what I view to be overly complicated SQL to get to what I'm after. I'm hoping for insight into a quicker and less complicated method.
What I'm after is the ability to assign an ID to groups of data where there is common groups of data across two columns.
For example I have the following subset of data:
CustID PartID RplcID
28 4 4
28 4 16
28 4 17
28 16 4
28 16 16
28 16 17
28 17 4
28 17 16
28 17 17
I want to create an ID for CustID=28 where there is overlap in the RplcID and PartID. So in this example, PartID 4, 16, 17 all have RplcIDs in common (4, 16, 17). As such, all of these pairs should have the same ID.
The method I'm using works (and is faster with temp tables instead of solely using CTEs) except for large datasets this thing is S-L-O-W. I'm sure there's a more efficient method out there and hoping someone can lend their expertise.
I'm outlining my current approach for as much clarity into my muddled thinking as possible.
STEP 1
Generate temporary ID using DENSE_RANK() partitioned by CustID, ordered by PartID.
RowID CustID PartID RplcID
1 28 16 16
1 28 17 16
1 28 4 16
2 28 16 17
2 28 17 17
2 28 4 17
3 28 16 4
3 28 17 4
3 28 4 4
STEP 2:
Then use these results and aggregate the PartIDs by using XML to create a comma separated string with which to group by.
RowID CustID RplcID PartIDS
4 28 16 16,17,4
4 28 17 16,17,4
4 28 4 16,17,4
STEP 3:
And finally split out these groups using the assigned ID by parsing the XML.
RowID CustID PartID RplcID
4 28 16 16
4 28 16 17
4 28 16 4
4 28 17 16
4 28 17 17
4 28 17 4
4 28 4 16
4 28 4 17
4 28 4 4
And the entirety of the SQL:
DECLARE #Parts TABLE
(
CustID VARCHAR(10),
PartID VARCHAR(10),
RplcID VARCHAR(10)
)
Insert Into #Parts VALUES
('26','19','93'),('26','19','63'),
('26','31','93'),('26','31','63'),('26','32','93'),('26','32','63'),('26','33','93'),('26','33','63'),('26','34','93'),
('26','34','63'),('26','35','93'),('26','35','63'),('26','36','93'),('26','36','63'),('26','37','93'),('26','37','63'),
('26','38','93'),('26','38','63'),('26','39','93'),('26','39','63'),('27','40','95'),('27','41','94'),
('27','41','95'),('27','42','94'),('27','42','95'),('27','43','94'),('27','43','95'),('27','44','94'),('27','44','95'),
('27','45','94'),('27','45','95'),('27','46','94'),('27','46','95'),('27','47','94'),('27','47','95'),('27','48','94'),
('27','48','95'),('27','49','94'),('27','49','95'),('27','50','94'),('27','50','95'),('27','17','94'),('27','17','95'),
('27','51','94'),('27','51','95'),('27','52','94'),('27','52','95'),('27','53','94'),('27','53','95'),('27','54','94'),
('27','54','95'),('27','33','94'),('27','33','95'),('27','55','94'),('27','55','95'),('27','34','94'),('27','34','95'),
('27','56','94'),('27','56','95'),('27','35','94'),('27','35','95'),('27','57','94'),('27','57','95'),('27','58','94'),
('27','58','95'),('27','59','94'),('27','59','95'),('27','37','94'),('27','37','95'),('27','60','94'),('27','60','95'),
('27','61','94'),('27','61','95'),('27','62','94'),('27','62','95'),('27','63','94'),('27','63','95'),('27','64','94'),
('27','64','95'),('27','3','96'),('27','3','97'),('27','3','98'),('27','3','99'),('27','3','100'),('28','4','4'),
('28','4','16'),('28','4','17'),('28','16','4'),('28','16','16'),('28','16','17'),('28','17','4'),('28','17','16'),
('28','17','17')
;
--Step 1: Create the initial ID
SELECT DISTINCT DENSE_RANK()
OVER(
partition BY r.CustID
ORDER BY r2.RplcID) AS RowID,
r.CustID,
r.BuyID,
r2.RplcID
INTO #tmp
FROM #Parts r
JOIN #Parts r1
ON r.CustID = r1.CustID
AND r.RplcID = r1.RplcID
JOIN #Parts r2
ON r.CustID = r2.CustID
AND r1.BuyID = r2.BuyID
--Step 2: Group the BuyIDs
SELECT DENSE_RANK()
OVER(
ORDER BY CustID, BuyIDs) AS RowID,
*
INTO #tmp2
FROM (SELECT CustID,
Rtrim(RplcID) RplcID,
Stuff((SELECT ',' + Rtrim(BuyID)
FROM #tmp RSLT2
WHERE RSLT2.ROWID = RSLT.ROWID
AND RSLT2.CustID = RSLT.CustID
FOR xml path('')), 1, 1, '') [BuyIDs]
FROM #tmp RSLT
GROUP BY RSLT.CustID,
RSLT.ROWID,
RSLT.RplcID)A
--Step 3: Using the grouped BuyIDs, split the strings using XML and assign RowID
SELECT RowID,
CustID,
BuyID,
RplcID
INTO #tmp3
FROM (SELECT RowID,
CustID,
n.r.value('.','varchar(10)') AS BuyID,
RplcID
FROM #tmp2
CROSS APPLY(SELECT Cast('<r>' + Replace(BuyIDs, ',', '</r><r>')
+ '</r>' AS XML)) AS S(xmlcol)
CROSS APPLY s.xmlcol.nodes('r') AS n(r))A
Order by RowID
Select * from #tmp3 where CustID='28'
Select distinct BuyID
from #tmp3
where CustID='28'
Select distinct RplcID
from #tmp3
where CustID='28'

Find the largest sum of three sequential values in SQL?

Say I have the following table, called revenues.
id | revenue
------------
1 | 345
2 | 5673
3 | 0
4 | 45
5 | 4134
6 | 35
7 | 533
8 | 856
9 | 636
10 | 35
I want to find the largest sum of the grouping of sequential 3 values. Here's what I mean:
ids 1 + 2 + 3 => 345 + 5673 + 0 = 6018
ids 2 + 3 + 4 => 5673 + 0 + 45 = 5718
ids 3 + 4 + 5 => 0 + 45 + 4134 = 4179
ids 4 + 5 + 6 => 45 + 4134 + 35 = 4214
ids 5 + 6 + 7 => 4134 + 35 + 533 = 4702
ids 6 + 7 + 8 => 35 + 533 + 856 = 1424
ids 7 + 8 + 9 => 533 + 856 + 636 = 2025
ids 8 + 9 + 10 => 856 + 636 + 35 = 1527
In this case, I would want the result to be 6018, since it's the largest sum of 3 sequential values. I'm just starting to learn SQL, with my only other previous language being Java, and all I can think is how easy this would be to do with a for loop. Does anyone have any idea on how I could get started writing a query like this? Does a similar thing exist in SQL?
Edit: Furthermore, is it possible to scale something like this? What if I had a really big table and I wanted to find the largest sum of a hundred sequential values?
One approach would be to use two joins to get to id+1 and id+2:
SELECT max(t1.revenue+t2.revenue+t3.revenue)
FROM revenues t1
JOIN revenues t2 ON t1.id+1 = t2.id
JOIN revenues t3 ON t1.id+2 = t3.id
Demo.
If your database supports the lag() window function, you can retrieve the result in a single table scan:
select max(rev3)
from (
select revenue +
lag(revenue) over (order by id) +
lag(revenue, 2) over (order by id) as rev3
from revenues
) as SubQueryAlias
See it working at SQL Fiddle.
with t as (
SELECT 1 as id, 345 as rev
UNION SELECT 2, 5673
UNION SELECT 3, 0
UNION SELECT 4, 45
UNION SELECT 5, 4134
UNION SELECT 6, 35
UNION SELECT 7, 533
UNION SELECT 8, 856
UNION SELECT 9, 636
UNION SELECT 10, 35)
SELECT TOP 1 id, SUM (rev) OVER (ORDER BY id ROWS 2 PRECEDING) r
FROM t
ORDER BY r desc;
Provides answer 3, 6018* on SQL Server 2012.
EDIT
Query that makes sure that we only get rows that are made up from 3 revenues:
with t as (
SELECT 1 as id, 345 as rev
UNION SELECT 2, 5673
UNION SELECT 3, 0
UNION SELECT 4, 45
UNION SELECT 5, 4134
UNION SELECT 6, 35
UNION SELECT 7, 533
UNION SELECT 8, 856
UNION SELECT 9, 636
UNION SELECT 10, 35)
SELECT TOP 1 id, r FROM
(SELECT id
, SUM (rev) OVER (ORDER BY id ROWS 2 PRECEDING) r
, SUM (1) OVER (ORDER BY id ROWS 2 PRECEDING) cnt
FROM t) as subslt
WHERE cnt = 3
ORDER BY r desc;
*Actually non-deterministic between 3, 6018 and 2, 6018. The second/edited query is deterministic.
Something like this:
select rev1,rev2,rev3, rev1.revenue+rev2.revenue+rev3.revenue total_rev from
revenue rev1,
revenue rev2,
revenue rev3
where rev1.id1+1=rev2.id and rev2.id+1=rev3.id and total_rev=
(select max(rev1.revenue+rev2.revenue+rev3.revenue) from
revenue rev1,
revenue rev2,
revenue rev3
where rev1.id1+1=rev2.id and rev2.id+1=rev3.id)