sql server join with regex performance - sql

i have to match 2 tables with a column that have values with similar values.
ex:
select * from(
select 'value**'c1 union all
select 'value11'c1 union all
select 'value**\value22'c1 union all
select 'value34\value**'c1 union all
select 'asd\value**\sdf**'c1)t1
right join
(
select 'value11'c1 union all
select 'value24'c1 union all
select 'value54'c1 union all
select 'value54\value22'c1 union all
select 'value11\value43'c1 union all
select 'asd\value21\sdf22'c1 union all
select 'asd\value21\sdf23'c1 union all
select 'asd\value21\sdf24'c1 union all
select 'rew\value21\sdf24'c1 union all
select 'asd\value21'c1)t2
on t2.c1 like replace(t1.c1,'*','[a-Z0-9]')
that s working fine, but performance with lots of records(>300000) needs almost a hour.
someone have ideas to have a better query?
thank you in advance!
i tried to add the condition on join
and LEN(t2.c1)=LEN(t1.c1)
but same slow result...

Related

SQL Select query optimization with indexing

A posts table contains 1 million rows. This table has a field with the name poster_id.
I have a list of followers by this poster_id.
I am trying to get a list of all activities from this followers (35 in this case but less or more is possible) in the last 48 hours.
I use this query:
SELECT post_id
, topic_id
, poster_id
, post_time
FROM posts
WHERE post_time > 1606833542
AND poster_id IN (80202, 74247, 79290, 72488,
111751, 85040, 100256, 68025,
101088, 101598, 101950, 103252,
103071, 80063, 100372, 102530, 109961,
109854, 105626, 108967, 110391, 104423,
113243, 111673, 113979, 104670, 127318,
68252, 109606, 121393, 122991, 124489,
127723, 126525)
ORDER
by post_time
LIMIT 100
Problem:
This query takes too long (0.4000 seconds) to execute.
The poster_id has an index of the post table.
How can I make this query faster?
try avoid the IN clause and use a join
SELECT
p.post_id
, p.topic_id
, p.poster_id
, p.post_time
FROM posts p
INNER JOIN (
SELECT 80202 poster_id
UNION SELECT 74247
UNION SELECT 79290
UNION SELECT 72488
UNION SELECT 111751
UNION SELECT 85040
UNION SELECT 100256
UNION SELECT 68025
UNION SELECT 101088
UNION SELECT 101598
UNION SELECT 101950
UNION SELECT 103252
UNION SELECT 103071
UNION SELECT 80063
UNION SELECT 100372
UNION SELECT 102530
UNION SELECT 109961
UNION SELECT 109854
UNION SELECT 105626
UNION SELECT 108967
UNION SELECT 110391
UNION SELECT 104423
UNION SELECT 113243
UNION SELECT 111673
UNION SELECT 113979
UNION SELECT 104670
UNION SELECT 127318
UNION SELECT 68252
UNION SELECT 109606
UNION SELECT 121393
UNION SELECT 122991
UNION SELECT 124489
UNION SELECT 127723
UNION SELECT 126525
) t ON t.poster_id = p.poster_id
AND p.post_time > 1606833542
ORDER by p.post_time LIMIT 100
could be the value in the IN clause are form some subquery in this case ypou could use the related subquery instead of the UNION .....
WHERE IN clase is the same as serverl OR condition (several internal subquery) .. instead INNER JOIN just check the values in a single operation
for better performance, instead of you actual index on poster_id, you could try using a redundant index adding all the columns you select in your index eg:
create index my_index on posts (poster_id, post_id, topic_id , post_time )
in this way all the query value are obtained using the index and the query don't need to access at the table ..

I need some SQL help trying to find what is not in a table

I need some help. I tried this and it does not work.
I selected all the data I needed and tried to say what's not in
the original file.
Could someone show me the correct way?
SELECT *
FROM acknowledgement t1
where t1.st01 = '110'
and (t1.shipment ='S640D14268424' or t1.Shipment ='S640D14268924' or
t1.Shipment ='S640D14268925' or t1.Shipment ='S646D14261190' or
t1.Shipment ='S646I14265886' or t1.Shipment ='S640D14268423' ...)
AND [shipment]
NOT IN(
SELECT [shipment] FROM acknowledgement
)
Any help would be useful.
You are looking for something like:
SELECT * FROM (
SELECT 'S640D14268424' AS Shipment
UNION SELECT 'S640D14268924'
UNION SELECT 'S640D14268925'
UNION SELECT 'S646D14261190'
UNION SELECT 'S646I14265886'
UNION SELECT 'S640D14268423'
UNION SELECT 'S646D14269951'
UNION SELECT 'S646D14269939'
UNION SELECT 'S646D14269034'
UNION SELECT 'S646D14269962'
UNION SELECT 'S646D14269953'
UNION SELECT 'S646D14271620'
UNION SELECT 'S646D14269030'
UNION SELECT 'S646D14269941'
UNION SELECT 'S646D14269251'
UNION SELECT 'S646D14273089'
UNION SELECT 'S646D14272388'
UNION SELECT 'S646D14273197'
UNION SELECT 'S646D14273399'
UNION SELECT 'S640D14273543'
UNION SELECT 'S640D14272952'
UNION SELECT 'S640D14272953'
UNION SELECT 'S640D14276996'
UNION SELECT 'S640D14277005'
UNION SELECT 'S640D14277006'
UNION SELECT 'S646D148047394'
UNION SELECT 'S640D14277004'
UNION SELECT 'S646D158049311'
UNION SELECT 'S646D158049791'
UNION SELECT 'S646D158049797'
UNION SELECT 'S646D158049806'
UNION SELECT 'S646D158049781'
UNION SELECT 'S646D158049557'
UNION SELECT 'S646D158049064'
UNION SELECT 'S646D158049561'
UNION SELECT 'S646D158049794'
UNION SELECT 'S646D158049362'
UNION SELECT 'S646D158049361'
UNION SELECT 'S646D158049792'
UNION SELECT 'S646D158049808'
UNION SELECT 'S646D158049788'
UNION SELECT 'S646D158049365'
UNION SELECT 'S646D158049800'
UNION SELECT 'S646D158049790'
UNION SELECT 'S646D158049799'
UNION SELECT 'S646D158049803'
UNION SELECT 'S646D158049784'
UNION SELECT 'S646D158049522'
UNION SELECT 'S646D158049340'
UNION SELECT 'S646D158049796'
UNION SELECT 'S646D158049687'
UNION SELECT 'S646D158049491'
UNION SELECT 'S646D158049499'
UNION SELECT 'S646D158049484'
UNION SELECT 'S646D158049486'
UNION SELECT 'S646D158049494') numbers_to_look
WHERE Shipment NOT IN
(SELECT [shipment] FROM acknowledgement WHERE shipment>'' )
Better way to do this is create a temp table to store the numbers you want to check.

Write a query that gets Poorly Mastered records and correctly Mastered Records?

I have to write a query to that has all correctly mastered recipients ( group by first_name and last_name)
I have to write another query that have all poorly mastered recipients ( group by first_name , last_name)
Please see the images below if there are multiple Master Id's against First Name and Last Name then its poorly Mastered.. if it have same Master ID then its correctly Mastered.
Sample data for the query is provided below
WITH DATA1 AS
(
SELECT 5175133 ID,'Yun' FIRST_NAME,'Yue' LAST_NAME,NULL MASTER_ID FROM dual UNION ALL
SELECT 5157093,'Yun','Yue',5157093 FROM dual UNION ALL
SELECT 5226656,'Yun','Yue',NULL FROM dual UNION ALL
SELECT 6345852,'Yun','Yue',5157093 FROM dual UNION ALL
SELECT 5882603,'Ye','Han',5157093 FROM dual UNION ALL
SELECT 5902219,'Ye','Han',5157093 FROM dual UNION ALL
SELECT 6362890,'Rick','Kaylor',NULL FROM dual UNION ALL
SELECT 6362940,'Rick','Kaylor',NULL FROM dual UNION ALL
SELECT 5215659,'Rick','Kaylor',NULL FROM dual UNION ALL
SELECT 5962837,'Rick','Kaylor',5962837 FROM dual UNION ALL
SELECT 5841556,'Rick','Kaylor',5841556 FROM dual UNION ALL
SELECT 5916218,'Sherlene','Heard',5916218 FROM dual UNION ALL
SELECT 6356086,'Sherlene','Heard',5916218 FROM dual UNION ALL
SELECT 5885157,'Ye','Kong',5884937 FROM dual UNION ALL
SELECT 5884937,'Ye','Kong',NULL FROM dual UNION ALL
SELECT 5898890,'Ye','Kong',5884937 FROM dual
)
SELECT * FROM DATA1
I think its a simple query please provide help?
Thanks
As this is very probably some kind of homework or assignment, just a clue:
Have you think about using COUNT(*) in a sub-query ? As far as I can tell, "correctly mastered recipients" will all have one and only one master_id...

Get Var() and AVG() from many columns

I'm building a query to show average and variance from many columns.
To get the average I use this:
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
The result is:
Now I need a new column with VAR(media) comparing each row with first row.
Any idea?
I think this is a case where cross apply is appropriate. I am assuming that you want the variance of the values as calculated by the var() function:
SELECT *, t.avgval as [media], t.varval
from Durabilidade d cross apply
(select avg(t.val) as avgval, var(t.val) as varval
from (select d.peca_1 union all
select d.peca_2 union all
select d.peca_3 union all
select d.peca_4 union all
select d.peca_5 union all
select d.peca_6 union all
select d.peca_7 union all
select d.peca_8 union all
select d.peca_9 union all
select d.peca_10
) t(val) -- t(val) to work
) t
where cd_durabilidade = 1
Something like this?
SELECT *,
VAR(media) AS [variance]
FROM
(
SELECT *,
(SELECT AVG(t.c)
FROM (
SELECT peca_1 UNION ALL
SELECT peca_2 UNION ALL
SELECT peca_3 UNION ALL
SELECT peca_4 UNION ALL
SELECT peca_5 UNION ALL
SELECT peca_6 UNION ALL
SELECT peca_7 UNION ALL
SELECT peca_8 UNION ALL
SELECT peca_9 UNION ALL
SELECT peca_10
) t(c)
) as [media]
from Durabilidade
where cd_durabilidade = 1
) x
GROUP BY
column1_from_durabilidade
,column2_from_durabilidade
--etc
,media

cross-dbms way to check if string is numeric

Ok, I have this field: code varchar(255). It contains some values used in our export routine like
DB84
DB34
3567
3568
I need to select only auto-generated (fully numeric) fields
WHERE is_numeric(table.code)
is_numeric() checks if code field contains only positive digits.
Can you propose anything that will work both under mysql 5.1 and oracle 10g?
Below are three separate implementations for each of SQL Server, MySQL and Oracle. None use (or can) the same approach, so there doesn't seem to be a cross DBMS way to do it.
For MySQL and Oracle, only the simple integer test is show; for SQL Server, the full numeric test is shown.
For SQL Server:
note that isnumeric('.') returns 1.. but it can not actually be converted to float. Some text like '1e6' cannot be converted to numeric directly, but you can pass through float, then numeric.
;with tmp(x) as (
select 'db01' union all select '1' union all select '1e2' union all
select '1234' union all select '' union all select null union all
select '1.2e4' union all select '1.e10' union all select '0' union all
select '1.2e+4' union all select '1.e-10' union all select '1e--5' union all
select '.' union all select '.123' union all select '1.1.23' union all
select '-.123' union all select '-1.123' union all select '--1' union all
select '---1.1' union all select '+1.123' union all select '++3' union all
select '-+1.123' union all select '1 1' union all select '1e1.3' union all
select '1.234' union all select 'e4' union all select '+.123' union all
select '1-' union all select '-3e-4' union all select '+3e-4' union all
select '+3e+4' union all select '-3.2e+4' union all select '1e1e1' union all
select '-1e-1-1')
select x, isnumeric(x),
case when x not like '%[^0-9]%' and x >'' then convert(int, x) end as SimpleInt,
case
when x is null or x = '' then null -- blanks
when x like '%[^0-9e.+-]%' then null -- non valid char found
when x like 'e%' or x like '%e%[e.]%' then null -- e cannot be first, and cannot be followed by e/.
when x like '%e%_%[+-]%' then null -- nothing must come between e and +/-
when x='.' or x like '%.%.%' then null -- no more than one decimal, and not the decimal alone
when x like '%[^e][+-]%' then null -- no more than one of either +/-, and it must be at the start
when x like '%[+-]%[+-]%' and not x like '%[+-]%e[+-]%' then null
else convert(float,x)
end
from tmp order by 2, 3
For MySQL
create table tmp(x varchar(100));
insert into tmp
select 'db01' union all select '1' union all select '1e2' union all
select '1234' union all select '' union all select null union all
select '1.2e4' union all select '1.e10' union all select '0' union all
select '1.2e+4' union all select '1.e-10' union all select '1e--5' union all
select '.' union all select '.123' union all select '1.1.23' union all
select '-.123' union all select '-1.123' union all select '--1' union all
select '---1.1' union all select '+1.123' union all select '++3' union all
select '-+1.123' union all select '1 1' union all select '1e1.3' union all
select '1.234' union all select 'e4' union all select '+.123' union all
select '1-' union all select '-3e-4' union all select '+3e-4' union all
select '+3e+4' union all select '-3.2e+4' union all select '1e1e1' union all
select '-1e-1-1';
select x,
case when x not regexp('[^0-9]') then x*1 end as SimpleInt
from tmp order by 2
For Oracle
case when REGEXP_LIKE(col, '[^0-9]') then col*1 end