Query performance of T-SQL in SQL Server - sql

I have two ad-hoc query in SQL Server like below,
select *
from Product(nolock)
where id = '12345' and name = 'ABC';
select *
from Product(nolock)
where name = 'ABC' and id = '12345';
We have clustered index on id column and no index on name column. Which query will be faster? And why?

The performance will be the same.
The SQL Server query optimizer is smart enough to see that the conditions are the same - just in different order.
Ordering of the conditions in the WHERE clause isn't relevant.
SQL Server will select foremost by the id, and then in a second step sequentially by name.

In this case, it is irrelevant:
I created a products table with primary key clustered on ID and replicated your test case
Enabling execution plan will give you the answer :)
Code for the test:
drop table if exists products
Create table products
(
id int primary key clustered,
Value1 float
)
;with randowvalues
as(
select 1 id, CAST(RAND(CHECKSUM(NEWID()))*100 as varchar(100)) randomnumber
--select 1 id, RAND(CHECKSUM(NEWID()))*100 randomnumber
union all
select id + 1, CAST(RAND(CHECKSUM(NEWID()))*100 as varchar(100)) randomnumber
from randowvalues
where
id < 1000
)
insert into products
select *
from randowvalues
OPTION(MAXRECURSION 0)
select *
from products
where id = 9 and value1 = 75.6648
select *
from products
where value1 = 75.6648 and id = 9

Related

Hive Query with a large WHERE Condition

I am writing a HIVE query to pull about 2,000 unique keys from a table.
I keep getting this error - java.lang.StackOverflowError
My query is basic but looks like this:
SELECT * FROM table WHERE (Id = 1 or Id = 2 or Id = 3 Id = 4)
my WHERE clause goes all the way up to 2000 unique id's and I receive the error above. Does anyone know of a more efficient way to do this or get this query to work?
Thanks!
You may use the SPLIT and EXPLODE to convert the comma separated string to rows and then use IN or EXISTS.
using IN
SELECT * FROM yourtable t WHERE
t.ID IN
(
SELECT
explode(split('1,2,3,4,5,6,1998,1999,2000',',')) as id
) ;
Using EXISTS
SELECT * FROM yourtable t WHERE
EXISTS
(
SELECT 1 FROM (
SELECT
explode(split('1,2,3,4,5,6,1998,1999,2000',',')) as id
) s
WHERE s.id = t.id
);
Make use of the Between clause instead of specifying all unique ids:
SELECT ID FROM table WHERE ID BETWEEN 1 AND 2000 GROUP BY ID;
i you can create a table for these IDs and after use the condition of exist in the new table to get only your specific IDs

Select rows base on Subset

I've a scenario where I need to write sql query base on result of other query.
Consider the table data:
id attribute
1 a
1 b
2 a
3 a
3 b
3 c
I want to write query to select id base on attribute set.
I mean first I need to check attribute of id 1 using this query:
select attribute from table where id = 1
then base on this result I need to select subset of attribute. like in our case 1(a,b) is the subset of 3(a,b,c). My query should return 3 on that case.
And if I want to check base on 2(a) which is the subset of 1(a,b) and 3(a,b,c), it should return 1 and 3.
I hope, it's understandable. :)
You could use this query.
Logic is simple: If there isn't any item in A and isn't in B --> A is subset of B.
DECLARE #SampleData AS TABLE
(
Id int, attribute varchar(5)
)
INSERT INTO #SampleData
VALUES (1,'a'), (1,'b'),
(2,'a'),
(3,'a'),(3,'b'),(3,'c')
DECLARE #FilterId int = 1
;WITH temp AS
(
SELECT DISTINCT sd.Id FROM #SampleData sd
)
SELECT * FROM temp t
WHERE t.Id <> #FilterId
AND NOT EXISTS (
SELECT sd2.attribute FROM #SampleData sd2
WHERE sd2.Id = #FilterId
AND NOT EXISTS (SELECT * FROM #SampleData sd WHERE sd.Id = t.Id AND sd.attribute = sd2.attribute)
)
Demo link: Rextester
I would compose a query for that in three steps: first I'd get the attributes of the desired id, and this is the query you wrote
select attribute from table where id = 1
Then I would get the number of attributes for the required id
select count(distinct attribute) from table where id = 1
Finally I would use the above results as filters
select id
from table
where id <> 1 and
attribute in (
select attribute from table where id = 1 /* Step 1 */
)
group by id
having count(distinct attribute) = (
select count(distinct attribute) from table where id = 1 /* Step 2 */
)
This will get you all the id's that have a number of attributes among those of the initially provided id equal to the number the initial id has.

Oracle SQL: One select takes to long, another select are fast

I have two select on the same view. One Select will be filtered with the primary key, the other select will be filterd on a non unique index. The used view are complicated. The Select with Primary Key needs approximately 15 seconds. The select with the non unique index needs 0,5 seconds.
Why is the query which using the primary key so slow?
I use "EXPLAIN PLAN FOR" to create a execution plan for both.
The execution plan for fast select: fast select
The execution plan for slow select: slow select
--Pseudocode
create table TableA
(
ID number, --(Primary Key)
ProjectID number, --(Not unique index)
TableB_id number, --(Foreign Key to Table TableB)
TableC_id number, --(Foreign Key to Table TableC)
TableD_id number --(Foreign Key to Table TableD)
);
Create view viewX
as
Select
ID as TableB_ID,
0 as TableC_ID,
0 as TableD_ID,
Value1,
Value2
from TableB
union all
Select
0 as TableB_ID,
ID as TableC_ID,
0 as TableD_ID,
Value1,
value2
from TableC
union all
Select
0 as TableB_ID,
0 as TableC_ID,
id as TableD_ID,
value1,
value2
from viewz;
Create view viewA
as
Select
t.id,
t.ProjectID,
x.TableB_ID,
x.TableC_ID,
x.TableD_ID
from TableA t
inner join viewX x
on t.TableB_ID = x.TableB_ID and
t.TableC_ID = x.TableC_ID and
t.TableD_ID = x.TableD_ID;
--this select needs o,5 seconds
Select *
from ViewA
where ProjectID = 2220;
--this select needs 15 seconds
Select *
from viewA
where id = 5440;
The select on TableA and on ViewX separatly are fast.
--this select needs 0,5 seconds
select *
from TableA
where id = 5440;
Result: ID = 5440, ProjektID = 2220, TableB_ID = 123, TableC_ID = 5325, TableD_ID = 7654
--this select needs 0,3 seconds
Select *
viewX x
where TableB_ID = 123 and
TableC_ID = 5325 and
TableD_ID = 7654;
Thanks for your support
I would say it is because the optimizer will decompose the select against the view to selects against he base tables. In the second case, you are not union-ing all the rows of the other tables, just the rows that meet the where clause for that table, therefore the second query is faster because it has to go through less rows.

Tricky MS Access SQL query to remove surplus duplicate records

I have an Access table of the form (I'm simplifying it a bit)
ID AutoNumber Primary Key
SchemeName Text (50)
SchemeNumber Text (15)
This contains some data eg...
ID SchemeName SchemeNumber
--------------------------------------------------------------------
714 Malcolm ABC123
80 Malcolm ABC123
96 Malcolms Scheme ABC123
101 Malcolms Scheme ABC123
98 Malcolms Scheme DEF888
654 Another Scheme BAR876
543 Whatever Scheme KJL111
etc...
Now. I want to remove duplicate names under the same SchemeNumber. But I want to leave the record which has the longest SchemeName for that scheme number. If there are duplicate records with the same longest length then I just want to leave only one, say, the lowest ID (but any one will do really). From the above example I would want to delete IDs 714, 80 and 101 (to leave only 96).
I thought this would be relatively easy to achieve but it's turning into a bit of a nightmare! Thanks for any suggestions. I know I could loop it programatically but I'd rather have a single DELETE query.
See if this query returns the rows you want to keep:
SELECT r.SchemeNumber, r.SchemeName, Min(r.ID) AS MinOfID
FROM
(SELECT
SchemeNumber,
SchemeName,
Len(SchemeName) AS name_length,
ID
FROM tblSchemes
) AS r
INNER JOIN
(SELECT
SchemeNumber,
Max(Len(SchemeName)) AS name_length
FROM tblSchemes
GROUP BY SchemeNumber
) AS w
ON
(r.SchemeNumber = w.SchemeNumber)
AND (r.name_length = w.name_length)
GROUP BY r.SchemeNumber, r.SchemeName
ORDER BY r.SchemeName;
If so, save it as qrySchemes2Keep. Then create a DELETE query to discard rows from tblSchemes whose ID value is not found in qrySchemes2Keep.
DELETE
FROM tblSchemes AS s
WHERE Not Exists (SELECT * FROM qrySchemes2Keep WHERE MinOfID = s.ID);
Just beware, if you later use Access' query designer to make changes to that DELETE query, it may "helpfully" convert the SQL to something like this:
DELETE s.*, Exists (SELECT * FROM qrySchemes2Keep WHERE MinOfID = s.ID)
FROM tblSchemes AS s
WHERE (((Exists (SELECT * FROM qrySchemes2Keep WHERE MinOfID = s.ID))=False));
DELETE FROM Table t1
WHERE EXISTS (SELECT 1 from Table t2
WHERE t1.SchemeNumber = t2.SchemeNumber
AND Length(t2.SchemeName) > Length(t1.SchemeName)
)
Depend on your RDBMS you may use function different from Length (Oracle - length, mysql - length, sql server - LEN)
delete ShortScheme
from Scheme ShortScheme
join Scheme LongScheme
on ShortScheme.SchemeNumber = LongScheme.SchemeNumber
and (len(ShortScheme.SchemeName) < len(LongScheme.SchemeName) or (len(ShortScheme.SchemeName) = len(LongScheme.SchemeName) and ShortScheme.ID > LongScheme.ID))
(SQL Server flavored)
Now updated to include the specified tie resolution. Although, you may get better performance doing it in two queries: first deleting the schemes with shorter names as in my original query and then going back and deleting the higher ID where there was a tie in name length.
I'd do this in multiple steps. Large delete operations done in a single step make me too nervous -- what if you make a mistake? There's no sql 'undo' statement.
-- Setup the data
DROP Table foo;
DROP Table bar;
DROP Table bat;
DROP Table baz;
CREATE TABLE foo (
id int(11) NOT NULL,
SchemeName varchar(50),
SchemeNumber varchar(15),
PRIMARY KEY (id)
);
insert into foo values (714, 'Malcolm', 'ABC123' );
insert into foo values (80, 'Malcolm', 'ABC123' );
insert into foo values (96, 'Malcolms Scheme', 'ABC123' );
insert into foo values (101, 'Malcolms Scheme', 'ABC123' );
insert into foo values (98, 'Malcolms Scheme', 'DEF888' );
insert into foo values (654, 'Another Scheme ', 'BAR876' );
insert into foo values (543, 'Whatever Scheme ', 'KJL111' );
-- Find all the records that have dups, find the longest one
create table bar as
select max(length(SchemeName)) as max_length, SchemeNumber
from foo
group by SchemeNumber
having count(*) > 1;
-- Find the one we want to keep
create table bat as
select min(a.id) as id, a.SchemeNumber
from foo a join bar b on a.SchemeNumber = b.SchemeNumber
and length(a.SchemeName) = b.max_length
group by SchemeNumber;
-- Select into this table all the rows to delete
create table baz as
select a.id from foo a join bat b where a.SchemeNumber = b.SchemeNumber
and a.id != b.id;
This will give you a new table with only records for rows that you want to remove.
Now check these out and make sure that they contain only the rows you want deleted. This way you can make sure that when you do the delete, you know exactly what to expect. It should also be pretty fast.
Then when you're ready, use this command to delete the rows using this command.
delete from foo where id in (select id from baz);
This seems like more work because of the different tables, but it's safer probably just as fast as the other ways. Plus you can stop at any step and make sure the data is what you want before you do any actual deletes.
If your platform supports ranking functions and common table expressions:
with cte as (
select row_number()
over (partition by SchemeNumber order by len(SchemeName) desc) as rn
from Table)
delete from cte where rn > 1;
try this:
Select * From Table t
Where Len(SchemeName) <
(Select Max(Len(Schemename))
From Table
Where SchemeNumber = t.SchemeNumber )
And Id >
(Select Min (Id)
From Table
Where SchemeNumber = t.SchemeNumber
And SchemeName = t.SchemeName)
or this:,...
Select * From Table t
Where Id >
(Select Min(Id) From Table
Where SchemeNumber = t.SchemeNumber
And Len(SchemeName) <
(Select Max(Len(Schemename))
From Table
Where SchemeNumber = t.SchemeNumber))
if either of these selects the records that should be deleted, just change it to a delete
Delete
From Table t
Where Len(SchemeName) <
(Select Max(Len(Schemename))
From Table
Where SchemeNumber = t.SchemeNumber )
And Id >
(Select Min (Id)
From Table
Where SchemeNumber = t.SchemeNumber
And SchemeName = t.SchemeName)
or using the second construction:
Delete From Table t Where Id >
(Select Min(Id) From Table
Where SchemeNumber = t.SchemeNumber
And Len(SchemeName) <
(Select Max(Len(Schemename))
From Table
Where SchemeNumber = t.SchemeNumber))

Make SQL Select same row multiple times

I need to test my mail server. How can I make a Select statement
that selects say ID=5469 a thousand times.
If I get your meaning then a very simple way is to cross join on a derived query on a table with more than 1000 rows in it and put a top 1000 on that. This would duplicate your results 1000 times.
EDIT: As an example (This is MSSQL, I don't know if Access is much different)
SELECT
MyTable.*
FROM
MyTable
CROSS JOIN
(
SELECT TOP 1000
*
FROM
sysobjects
) [BigTable]
WHERE
MyTable.ID = 1234
You can use the UNION ALL statement.
Try something like:
SELECT * FROM tablename WHERE ID = 5469
UNION ALL
SELECT * FROM tablename WHERE ID = 5469
You'd have to repeat the SELECT statement a bunch of times but you could write a bit of VB code in Access to create a dynamic SQL statement and then execute it. Not pretty but it should work.
Create a helper table for this purpose:
JUST_NUMBER(NUM INT primary key)
Insert (with the help of some (VB) script) numbers from 1 to N. Then execute this unjoined query:
SELECT MYTABLE.*
FROM MYTABLE,
JUST_NUMBER
WHERE MYTABLE.ID = 5469
AND JUST_NUMBER.NUM <= 1000
Here's a way of using a recursive common table expression to generate some empty rows, then to cross join them back onto your desired row:
declare #myData table (val int) ;
insert #myData values (666),(888),(777) --some dummy data
;with cte as
(
select 100 as a
union all
select a-1 from cte where a>0
--generate 100 rows, the max recursion depth
)
,someRows as
(
select top 1000 0 a from cte,cte x1,cte x2
--xjoin the hundred rows a few times
--to generate 1030301 rows, then select top n rows
)
select m.* from #myData m,someRows where m.val=666
substitute #myData for your real table, and alter the final predicate to suit.
easy way...
This exists only one row into the DB
sku = 52 , description = Skullcandy Inkd Green ,price = 50,00
Try to relate another table in which has no constraint key to the main table
Original Query
SELECT Prod_SKU , Prod_Descr , Prod_Price FROM dbo.TB_Prod WHERE Prod_SKU = N'52'
The Functional Query ...adding a not related table called 'dbo.TB_Labels'
SELECT TOP ('times') Prod_SKU , Prod_Descr , Prod_Price FROM dbo.TB_Prod,dbo.TB_Labels WHERE Prod_SKU = N'52'
In postgres there is a nice function called generate_series. So in postgreSQL it is as simple as:
select information from test_table, generate_series(1, 1000) where id = 5469
In this way, the query is executed 1000 times.
Example for postgreSQL:
CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; --To be able to use function uuid_generate_v4()
--Create a test table
create table test_table (
id serial not null,
uid UUID NOT NULL,
CONSTRAINT uid_pk PRIMARY KEY(id));
-- Insert 10000 rows
insert into test_table (uid)
select uuid_generate_v4() from generate_series(1, 10000);
-- Read the data from id=5469 one thousand times
select id, uid, uuid_generate_v4() from test_table, generate_series(1, 1000) where id = 5469;
As you can see in the result below, the data from uid is read 1000 times as confirmed by the generation of a new uuid at every new row.
id |uid |uuid_generate_v4
----------------------------------------------------------------------------------------
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"5630cd0d-ee47-4d92-9ee3-b373ec04756f"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"ed44b9cb-c57f-4a5b-ac9a-55bd57459c02"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"3428b3e3-3bb2-4e41-b2ca-baa3243024d9"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"7c8faf33-b30c-4bfa-96c8-1313a4f6ce7c"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"b589fd8a-fec2-4971-95e1-283a31443d73"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"8b9ab121-caa4-4015-83f5-0c2911a58640"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"7ef63128-b17c-4188-8056-c99035e16c11"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"5bdc7425-e14c-4c85-a25e-d99b27ae8b9f"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"9bbd260b-8b83-4fa5-9104-6fc3495f68f3"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"c1f759e1-c673-41ef-b009-51fed587353c"
5469|"10791df5-ab72-43b6-b0a5-6b128518e5ee"|"4a70bf2b-ddf5-4c42-9789-5e48e2aec441"
Of course other DBs won't necessarily have the same function but it could be done:
See here.
If your are doing this in sql Server
declare #cnt int
set #cnt = 0
while #cnt < 1000
begin
select '12345'
set #cnt = #cnt + 1
end
select '12345' can be any expression
Repeat rows based on column value of TestTable. First run the Create table and insert statement, then run the following query for the desired result.
This may be another solution:
CREATE TABLE TestTable
(
ID INT IDENTITY(1,1),
Col1 varchar(10),
Repeats INT
)
INSERT INTO TESTTABLE
VALUES ('A',2), ('B',4),('C',1),('D',0)
WITH x AS
(
SELECT TOP (SELECT MAX(Repeats)+1 FROM TestTable) rn = ROW_NUMBER()
OVER (ORDER BY [object_id])
FROM sys.all_columns
ORDER BY [object_id]
)
SELECT * FROM x
CROSS JOIN TestTable AS d
WHERE x.rn <= d.Repeats
ORDER BY Col1;
This trick helped me in my requirement.
here, PRODUCTDETAILS is my Datatable
and orderid is my column.
declare #Req_Rows int = 12
;WITH cte AS
(
SELECT 1 AS Number
UNION ALL
SELECT Number + 1 FROM cte WHERE Number < #Req_Rows
)
SELECT PRODUCTDETAILS.*
FROM cte, PRODUCTDETAILS
WHERE PRODUCTDETAILS.orderid = 3
create table #tmp1 (id int, fld varchar(max))
insert into #tmp1 (id, fld)
values (1,'hello!'),(2,'world'),(3,'nice day!')
select * from #tmp1
go
select * from #tmp1 where id=3
go 1000
drop table #tmp1
in sql server try:
print 'wow'
go 5
output:
Beginning execution loop
wow
wow
wow
wow
wow
Batch execution completed 5 times.
The easy way is to create a table with 1000 rows. Let's call it BigTable. Then you would query for the data you want and join it with the big table, like this:
SELECT MyTable.*
FROM MyTable, BigTable
WHERE MyTable.ID = 5469