SQL speed issue - sql

I am using PostgreSQL and want to use a query like this:
SELECT device, value, id
FROM myTable s
WHERE (SELECT COUNT(*) FROM myTable f WHERE f.id = s.id AND f.value >= s.value ) <= 2
This works but the problem is it takes minutes to execute over large data. Is there a faster way that can happen in seconds? What I am trying to do is take only two items from a row where both values are sorted in asc order.
id | device | value
1 123 40
1 456 30
1 789 45
2 12 10
2 11 9
The above is my table (I know ids are not unique, not my design, but it has a purpose) but within the id lets say id = 1, I want to select id, device and value of the smallest 2, so my result would be 1, 123, 30 and 1, 456, 40 and so on for other ids.
Also, if anyone knows, if you insert sorted data into a database is it a guarantee to read back out in the same order?

Try below query:
SELECT s.device,s.id,s.value
FROM myTable s
INNER JOIN myTable f ON s.id = f. id AND f.value >= s.value
GROUP BY s.device,s.id,s.value
HAVING COUNT(s.id) <= 2

This can be done using window functions:
select id, device, value
from (
select id, device, value,
row_number() over (partition by id order by value) as rn
from the_table
) t
where rn <= 2
order by id, device, value;
Example:
postgres> create table the_table (id integer, device integer, value integer);
CREATE TABLE
postgres> insert into the_table values
...> (1, 123, 40),
...> (1, 456, 30),
...> (1, 789, 45),
...> (2, 12 , 10),
...> (2, 11 , 9);
INSERT 0 5
postgres> select id, device, value
...> from (
...> select id, device, value,
...> row_number() over (partition by id order by value) as rn
...> from the_table
...> ) t
...> where rn <= 2;
id | device | value
----+--------+-------
1 | 123 | 40
1 | 456 | 30
2 | 11 | 9
2 | 12 | 10
(4 rows)

Related

SQL Server - How to query the set of maximum numbers from a list of numbers from top to bottom

Best way to explain this would be through an example. Let's say I have this simple 2 column table:
Id | Score
1 | 10
2 | 5
3 | 20
4 | 15
5 | 20
6 | 25
7 | 30
8 | 30
9 | 10
10 | 40
The query should return the IDs of each item where the max score changed. So, from the top, 10 would be the top score since item 1 has 10 the first time through but then on item 3 it has a score of 20 so it just had a new max score and this continues until the bottom of the table. So eventually, the query will result to:
1, 3, 6, 7, 10
I tried doing a Cursor and loop through the table but I was wondering if there was a much simple way of doing this.
Thanks
Solution (SQL2012+):
SELECT v.MaxScore, MIN(v.Id) AS FirstId
FROM (
SELECT *, MAX(t.Score) OVER(ORDER BY t.Id ASC) AS MaxScore
FROM #Table AS t
) v
GROUP BY v.MaxScore
Demo
one more version,works for versions >= 2008,you can remove apply to make it work for 2005 as well
;with cte
(Id , Score)
as
(
select 1 , 10 union all
select 2 , 5 union all
select 3 , 20 union all
select 4 , 15 union all
select 5 , 20 union all
select 6 , 25 union all
select 7 , 30 union all
select 8 , 30 union all
select 9 , 10 union all
select 10 , 40
)
select min(id)
from
cte c2
cross apply
(select case when score -(select max(score) from cte c1 where c1.id<=c2.id )=0
then 1 else 0 end) b(val)
where val=1
group by Score
Output:
1
3
6
7
10
I think you can just do a MIN on the id with a GROUP BY Score. Like this:
SELECT MIN(Id) FROM table GROUP BY Score
Using LAG function, that returns prev value of score:
DECLARE #Table TABLE(Id int, Score int)
INSERT INTO #Table
VALUES
(1 , 10),
(2 , 10),
(3 , 20),
(4 , 20),
(5 , 20),
(6 , 25),
(7 , 30),
(8 , 30),
(9 , 30),
(10 , 40)
SELECT *
FROM
(
SELECT
*,
LAG(t.Score, 1, NULL) OVER (ORDER BY t.Id) AS PrevScore
FROM #Table AS t
) AS p
WHERE p.Score <> p.PrevScore OR p.PrevScore IS NULL
Try This
declare #scores varchar(max)
select #scores = isnull(#scores+',','')+convert(varchar,min(id))
from #temp group by score
select #scores

select based on specific values

I have this table:
ID NO.
111 6
222 7
333 9
111 8
333 4
222 3
111 7
222 5
333 2
I want to select only 2 ID numbers from table where NO. column equal specific values.
For example i tried this query but i didn't get the expected result:
SELECT top 2 * FROM mytable where NO. in
(select NO. from mytable )
Expected result:
111 6
111 8
222 7
222 3
333 9
333 3
You seem to want to select two rows in the table for each id, based on a condition on the No column. For this, one method uses row_number():
select t.*
from (select t.*, row_number() over (partition by id order by id) as seqnum
from mytable t
where <condition goes here>
) t
where seqnum <= 2;
I'm guessing (333,3) is a mistake and you expect (333,2). If not I have no idea.
SELECT
ua.ID
, ua.[NO.]
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY t.[NO.] ASC) AS RowNum
, t.ID
, t.[NO.]
FROM dbo.t1 AS t
UNION ALL
SELECT
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY t.[NO.] DESC)
, ID
, t.[NO.]
FROM dbo.t1 AS t
) ua
WHERE ua.RowNum = 1
ORDER BY ID, ua.[NO.] DESC
If you're just trying to get top 2 values for each group, you need something to define the order, ie. a third column. Then you don't need UNION ALL, just use WHERE ua.RowNum < 3.
/*Select 2 random rows per id where the number of rows per id can vary between 1 and infinity
A good article for this:-*/
--https://www.mssqltips.com/sqlservertip/3157/different-ways-to-get-random-data-for-sql-server-data-sampling/
DECLARE #TABLE TABLE(ID INT,NO INT)
INSERT INTO #TABLE
VALUES
(111, 6),
(222, 7),
(333 , 9),
(111 , 8),
(333 , 4),
(222 , 3),
(111 , 7),
(222 , 5),
(333 , 2)
select t.* from
(
Select s.* ,ROW_NUMBER() OVER(PARTITION BY ID ORDER BY randomnumber) ROWNUMBER
from
(
SELECT ID,NO,
(ABS(CHECKSUM(NEWID())) % 100001) + ((ABS(CHECKSUM(NEWID())) % 100001) * 0.00001) [randomnumber]
FROM #TABLE
) s
) t
where t.rownumber < 3

Count Top 5 Elements spread over rows and columns

Using T-SQL for this table:
+-----+------+------+------+-----+
| No. | Col1 | Col2 | Col3 | Age |
+-----+------+------+------+-----+
| 1 | e | a | o | 5 |
| 2 | f | b | a | 34 |
| 3 | a | NULL | b | 22 |
| 4 | b | c | a | 55 |
| 5 | b | a | b | 19 |
+-----+------+------+------+-----+
I need to count the TOP 3 names (Ordered by TotalCount DESC) across all rows and columns, for 3 Age groups: 0-17, 18-49, 50-100. Also, how do I ignore the NULLS from my results?
If it's possible, how I can also UNION the results for all 3 age groups into one output table to get 9 results (TOP 3 x 3 Age groups)?
Output for only 1 Age Group: 18-49 would look like this:
+------+------------+
| Name | TotalCount |
+------+------------+
| b | 4 |
| a | 3 |
| f | 1 |
+------+------------+
You need to unpivot first your table and then exclude the NULLs. Then do a simple COUNT(*):
WITH CteUnpivot(Name, Age) AS(
SELECT x.*
FROM tbl t
CROSS APPLY ( VALUES
(col1, Age),
(col2, Age),
(col3, Age)
) x(Name, Age)
WHERE x.Name IS NOT NULL
)
SELECT TOP 3
Name, COUNT(*) AS TotalCount
FROM CteUnpivot
WHERE Age BETWEEN 18 AND 49
GROUP BY Name
ORDER BY COUNT(*) DESC
ONLINE DEMO
If you want to get the TOP 3 for each age group:
WITH CteUnpivot(Name, Age) AS(
SELECT x.*
FROM tbl t
CROSS APPLY ( VALUES
(col1, Age),
(col2, Age),
(col3, Age)
) x(Name, Age)
WHERE x.Name IS NOT NULL
),
CteRn AS (
SELECT
AgeGroup =
CASE
WHEN Age BETWEEN 0 AND 17 THEN '0-17'
WHEN Age BETWEEN 18 AND 49 THEN '18-49'
WHEN Age BETWEEN 50 AND 100 THEN '50-100'
END,
Name,
COUNT(*) AS TotalCount
FROM CteUnpivot
GROUP BY
CASE
WHEN Age BETWEEN 0 AND 17 THEN '0-17'
WHEN Age BETWEEN 18 AND 49 THEN '18-49'
WHEN Age BETWEEN 50 AND 100 THEN '50-100'
END,
Name
)
SELECT
AgeGroup, Name, TotalCount
FROM(
SELECT *,
rn = ROW_NUMBER() OVER(PARTITION BY AgeGroup, Name ORDER BY TotalCount DESC)
FROM CteRn
) t
WHERE rn <= 3;
ONLINE DEMO
The unpivot technique using CROSS APPLY and VALUES:
An Alternative (Better?) Method to UNPIVOT (SQL Spackle) by Dwain Camps
You can check below multiple-CTE SQL select statement
Row_Number() with Partition By clause is used ordering records within each group categorized by ages
/*
CREATE TABLE tblAges(
[No] Int,
Col1 VarChar(10),
Col2 VarChar(10),
Col3 VarChar(10),
Age SmallInt
)
INSERT INTO tblAges VALUES
(1, 'e', 'a', 'o', 5),
(2, 'f', 'b', 'a', 34),
(3, 'a', NULL, 'b', 22),
(4, 'b', 'c', 'a', 55),
(5, 'b', 'a', 'b', 19);
*/
;with cte as (
select
col1 as col, Age
from tblAges
union all
select
col2, Age
from tblAges
union all
select
col3, Age
from tblAges
), cte2 as (
select
col,
case
when age < 18 then '0-17'
when age < 50 then '18-49'
else '50-100'
end as grup
from cte
where col is not null
), cte3 as (
select
grup,
col,
count(grup) cnt
from cte2
group by
grup,
col
)
select * from (
select
grup, col, cnt, ROW_NUMBER() over (partition by grup order by cnt desc) cnt_grp
from cte3
) t
where cnt_grp <= 3
order by grup, cnt

How to select multi record depending on some column's condition?

Say there is a SQL Server table which contain 2 columns: ID, Value
The sample data looks like this:
ID value
------------------
1 30
1 30
2 50
2 50
3 50
When I run this query:
select ID, NEWID(), value
from table1
order by ID
The result looks like this:
1 30 E152AD19-9920-4567-87FF-C4822FD9E485
1 30 54F28C58-ABA9-4DFB-9A80-CE9C4C390CBB
2 50 ........
2 50 ........
3 50 4E5A9E26-FEEC-4CC7-9AC5-96747053B6B2
But what I want is : how many record of ID depending on (sum of value /30 )'s result, for example of ID 2, it's value's sum is 50+50=100, and 100/30=3, so ID 2 will display in query result three times
The final result i want is like this:
1 E152AD19-9920-4567-87FF-C4822FD9E485
1 54F28C58-ABA9-4DFB-9A80-CE9C4C390CBB
2 4E5A9E26-FEEC-4CC7-9AC5-96747053B6B2
2 ....
2 ....
3 D861563E-E01A-4198-9E92-7BEB4678E5D1
Please note ID of 2 display three times, wait for your helps, thanks.
How about something like
CREATE TABLE Table1
([ID] int, [value] int)
;
INSERT INTO Table1
([ID], [value])
VALUES
(1, 30),
(1, 30),
(2, 50),
(2, 50),
(3, 50)
;
;WITH SummedVals AS (
SELECT ID,
SUM(value) / 30 Cnt
FROM Table1
GROUP BY ID
)
, Vals AS (
SELECT ID,
Cnt - 1 Cnt
FROM SummedVals
UNION ALL
SELECT ID,
Cnt - 1 Cnt
FROM Vals
WHERE Cnt > 0
)
SELECT ID,
NEWID()
FROM Vals
ORDER BY 1
SQL Fiddle DEMO

Merging data in a single SQL table without a Cursor

I have a table with an ID column and another column with a number. One ID can have multiple numbers. For example
ID | Number
1 | 25
1 | 26
1 | 30
1 | 24
2 | 4
2 | 8
2 | 5
Now based of this data, in a new table, I want to have this
ID | Low | High
1 | 24 | 26
1 | 30 | 30
2 | 4 | 5
2 | 8 | 8
As you can see, I want to merge any data where the numbers are consecutive, like 24, 25, 26. So now the low was 24, the high was 26, and then 30 is still a separate range. I am dealing with large amounts of data, so I would prefer to not use a cursor for performance sake (which is what I was previously doing, and was slowing things down quite a bit)...What is the best way to achieve this? I'm no SQL pro, so I'm not sure if there is a function available that could make this easier, or what the fastest way to accomplish this would be.
Thanks for the help.
The key observation is that a sequence of numbers minus another sequence is a constant. We can generate another sequence using row_number. This identifies all the groups:
select id, MIN(number) as low, MAX(number) as high
from (select t.*,
(number - ROW_NUMBER() over (partition by id order by number) ) as groupnum
from t
) t
group by id, groupnum
The rest is just aggregation.
Solution with CTE and recursion:
WITH CTE AS (
SELECT T.ID, T.NUMBER, T.NUMBER AS GRP
FROM T
LEFT OUTER JOIN T T2 ON T.ID = T2.ID AND T.NUMBER -1 = T2.NUMBER
WHERE T2.ID IS NULL
UNION ALL
SELECT T.ID, T.NUMBER, GRP
FROM CTE
INNER JOIN T
ON T.ID = CTE.ID AND T.NUMBER = CTE.NUMBER + 1
)
SELECT ID, MAX( NUMBER ), MIN(NUMBER)
FROM CTE
GROUP BY ID, GRP
Results at fiddlesql
I'd suggest using a WHILE loop structure with a table variable instead of the cursor.
For example,
DECLARE #TableVariable TABLE
(
MyID int IDENTITY (1, 1) PRIMARY KEY NOT NULL,
[ID] int,
[Number] int
)
DECLARE #Count int, #Max int
INSERT INTO #TableVariable (ID, Number)
SELECT ID, Number
FROM YourSourceTable
SELECT #Count = 1, #Max = MAX(MyID)
FROM #TableVariable
WHILE #Count <= #Max
BEGIN
...do your processing here...
SET #Count = #Count + 1
END
CREATE TABLE Table1
([ID] int, [Number] int)
;
INSERT INTO Table1
([ID], [Number])
VALUES
(1, 25),
(1, 26),
(1, 30),
(1, 24),
(2, 4),
(2, 8),
(2, 5)
;
select ID,
MIN(Number)
,(SELECT MIN(Number)
FROM (SELECT TOP 2 Number from Table1 WHERE ID =
T1.Id ORDER BY Number DESC) as DT)
from Table1 as T1
GROUP BY ID
UNION
SELECT ID, MAX(Number), MAX(Number)
FROM Table1 as T1
GROUP BY ID;
Live Example