Select with column that no in the group by SQL Server - sql

I want to select a column that is not in the GROUP BY.
My code:
SELECT
dbo.func(field1, field2), field3
FROM
table
WHERE
field4 = 1224
GROUP BY
dbo.func(field1, field2), field3
HAVING
COUNT(id) > 1
And I want to select also the column id like this:
SELECT
id, dbo.func(field1, field2), field3
FROM
table
WHERE
field4 = 1224
GROUP BY
dbo.func(field1, field2), field3
HAVING
COUNT(id) > 1

I suspect that you want to apply a count restriction and then return all matching records from the original table, along with the output of the scalar function. One approach is to use COUNT as analytic function with a partition which corresponds to the columns which appeared in your original GROUP BY clause. The difference here is that we don't actually aggregate the original table.
WITH cte AS (
SELECT id, dbo.func(field1, field2) AS out, field3,
COUNT(id) OVER (PARTITION BY dbo.func(field1, field2), field3) cnt
FROM yourTable
WHERE field4 = 1224
)
SELECT id, out, field3
FROM cte
WHERE cnt > 1;

You could join back to the original table to retrieve the matching row(s) with id:
SELECT t.id
, filter.funresult
, t.field3
FROM table t
JOIN (
SELECT dbo.func(field1,field2) as funresult
, field3
FROM table
WHERE field4 = 1224
GROUP BY
dbo.func(field1,field2)
, field3
HAVING COUNT(id) > 1
) filter
ON filter.funresult = dbo.func(t.field1, t.field2)
AND filter.field3 = t.field3

Related

Speed up count on distinct

My query return the volume of each field where data is not null.
SELECT COUNT(field1) AS field1, COUNT(field2) AS field2, COUNT(field3) AS field3
FROM (
SELECT field1, field2, field3
FROM table1, table2
WHERE table1.id=table2.idt1
ORDER BY table1.id ASC
LIMIT 10000
) AS rq
table1.id is The primary key of table1 and table2.idt1 is the secondary key of table2.
This query is working perfectly well, but if I need to return the DISTINCT volume of each field, like this
SELECT COUNT(DISTINCT(field1)) AS field1, COUNT(DISTINCT(field2)) AS field2, COUNT(DISTINCT(field3)) AS field3
FROM (
SELECT field1, field2, field3
FROM table1, table2
WHERE table1.id=table2.idt1
ORDER BY table1.id ASC
LIMIT 10000
) AS rq
Problems begins... The query is working on and do the job, but the performances are of course very much slower than without the DISTINCT clause.
Each field of table1 and table2 are indexes with btree
CREATE INDEX field1_index ON table1 USING btree (field1)
CREATE INDEX field2_index ON table1 USING btree (field2)
CREATE INDEX field3_index ON table2 USING btree (field3)
How can I speed up this DISTINCT count ? Maybe with better indexes ?
Thanks for help
I've tried something similar in a big table. (12 Millions rows)
Without the DISTINCT it takes 10 seconds.
With the DISTINCT like your code it take 19 seconds.
Puting the DISTINCT inside the subquery takes 11 seconds
SELECT COUNT(field1) AS field1, COUNT(field2) AS field2, COUNT(field3) AS field3
FROM (
SELECT DISTINCT(field1) AS field1, DISTINCT(field2) AS field2, DISTINCT(field3) AS field3
FROM table1, table2
WHERE table1.id=table2.idt1
ORDER BY table1.id ASC
LIMIT 10000
) AS rq
Other thing, if you only want to filter NULL data, you can make that in the where clause instead of using distinct.
Postgres does not optimize COUNT(DISTINCT) very well. You have multiple such expressions, which makes it a bit harder. I am going to suggest using window functions and conditional aggregation:
SELECT SUM(CASE WHEN seqnum_1 = 1 THEN 1 ELSE 0 END) as field1,
SUM(CASE WHEN seqnum_2 = 1 THEN 1 ELSE 0 END) as field2,
SUM(CASE WHEN seqnum_3 = 1 THEN 1 ELSE 0 END) as field3
FROM (SELECT field1, field2, field3,
ROW_NUMBER() OVER (PARTITION BY field1 ORDER BY field1) as seqnum_1,
ROW_NUMBER() OVER (PARTITION BY field2 ORDER BY field2) as seqnum_2,
ROW_NUMBER() OVER (PARTITION BY field3 ORDER BY field3) as seqnum_3
FROM table1 JOIN
table2
ON table1.id=table2.idt1
ORDER BY table1.id ASC
LIMIT 10000
) rq
EDIT:
It occurs to me that the row_number() might be processed before the limit. Try this version:
SELECT SUM(CASE WHEN seqnum_1 = 1 THEN 1 ELSE 0 END) as field1,
SUM(CASE WHEN seqnum_2 = 1 THEN 1 ELSE 0 END) as field2,
SUM(CASE WHEN seqnum_3 = 1 THEN 1 ELSE 0 END) as field3
FROM (SELECT field1, field2, field3,
ROW_NUMBER() OVER (PARTITION BY field1 ORDER BY field1) as seqnum_1,
ROW_NUMBER() OVER (PARTITION BY field2 ORDER BY field2) as seqnum_2,
ROW_NUMBER() OVER (PARTITION BY field3 ORDER BY field3) as seqnum_3
FROM (SELECT field1, field2, field3
FROM table1 JOIN
table2
ON table1.id = table2.idt1
ORDER BY table1.id ASC
LIMIT 10000
) t
) rq

SQL Server: Multiple max and min values of various fields with respective timestamps in a single row resultset

I have a SQL Server table with the following fields
Field1(REAL), Field2(REAL), ...Fieldn(REAL), DateNTime(TimeStamp)
in a table table1.
How can I get following resultset? i.e. max and min values of each field with corresponding timestamps
Max(Field1), Corresponding TimeStamp, Min(Field1), Corresponding TimeStamp, .....
similarily for other fields.
Thanks All,
By using windowed functions:
with cte as
(select t.*
max(Field1) over () MaxField1,
min(Field1) over () MinField1, ...
from Table1 t)
select max(MaxField1) MaxField1,
max(case Field1 when MaxField1 then DateNTime end) MxF1DateTime,
min(MinField1) MinField1,
min(case Field1 when MinField1 then DateNTime end) MnF1DateTime,
...
from cte
The simplest solution would be something like this:
select
Field1_min, (select max(TimeStamp) from table1 where Field1 = Field1_min) as ts1min,
Field1_max, (select max(TimeStamp) from table1 where Field1 = Field1_max) as ts1max,
Field2_min, (select max(TimeStamp) from table1 where Field2 = Field2_min) as ts2min,
Field2_max, (select max(TimeStamp) from table1 where Field2 = Field2_max) as ts2max,
Field3_min, (select max(TimeStamp) from table1 where Field3 = Field3_min) as ts3min,
Field3_max, (select max(TimeStamp) from table1 where Field3 = Field3_max) as ts3max,
Field4_min, (select max(TimeStamp) from table1 where Field4 = Field4_min) as ts4min,
Field4_max, (select max(TimeStamp) from table1 where Field4 = Field4_max) as ts4max
from (
select
min(Field1) as Field1_min, max(Field1) as Field1_max,
min(Field2) as Field2_min, max(Field2) as Field2_max,
min(Field3) as Field3_min, max(Field3) as Field3_max,
min(Field4) as Field4_min, max(Field4) as Field4_max
from table1
) S

ORACLE Select and group by excluding one field

I have a very simple query (on Oracle 11g) to select 3 fields:
select field1, field2, field3, count(*) from table
where...
group by field1, field2, field3
having count(*) > 10;
Now, what I need, is exclude "field3" from the "group by" since I only need field 1 and 2 to be grouped, but I also need field3 in the output.
As far I know, all the fields in the select must be reported also in "group by", so how can I handle that?
Thanks
Lucas
select t.field1, t.field2, t.field3, tc.Count
from table t
inner join (
select field1, field2, count(*) as Count
from table
where...
group by field1, field2
having count(*) > 10
) tc on t.field1 = tc.field1 and t.field2 = tc.field2
Use the analytical version of the "count" function:
select * from (
select field1, field2, field3, count(*) over(partition by field1, field2) mycounter
from table )
--simulate the having clause
where mycounter > 10;
If you don't group by field3 anymore, there can suddenly be different field3 per group. You must decide which one to show, e.g. the maximum:
select field1, field2, max(field3), count(*) from table
where...
group by field1, field2
having count(*) > 10;
The only way I know how to handle that is to first isolate the Field1 and Field2 data and create a new table, then link it back to the original table adding in Field3.
Select Table2.Field1, Table2.Field2, Table1.Field3
From
(Select Field1, max(Field2) as Field2
From Table1) Table2
Where Table2.Field1 = Table1.Field1
And Table2.Field2 = Table1.Field2
Group By
Table2.Field1, Table2.Field2, Table1.Field3

Unable to get the right output from Oracle SQL

I have a table with field1, field2, field3, … and I need to count the number of items in field1 such that I return all records(field1,filed2,field3,…) that occur 6 times or less in the table.
My SQL code is:
SELECT field1, field2, field3, count(field1) CNT
FROM myTable
WHERE trunc(date) = tp_date(‘03/22/2011’,’mm/dd/yyyy’)
GROUP BY field1
HAVING COUNT(field1) < 7;
The output that I am getting from the above code is all records are returned from the table not what I expected? Any help would be appreciated!!
I think you need to use a subquery:
SELECT field1, field2, field3,
FROM myTable
WHERE trunc(date) = tp_date(‘03/22/2011’,’mm/dd/yyyy’)
AND field1 in
(SELECT field1
FROM mytable
GROUP BY field1
HAVING COUNT(field1) < 7);
WITH tmp AS
(
SELECT field1, COUNT(1) as CountOfField1
FROM myTable
WHERE trunc(date) = tp_date(‘03/22/2011’,’mm/dd/yyyy’)
GROUP BY field1
HAVING COUNT(field1) < 7
)
SELECT mytable.field1, mytable.field2, mytable.field3, tmp.CountOfField1
FROM myTable
INNER JOIN tmp
ON myTable.Field1 = tmp.Field1
Yet another way to do it:
SELECT t.field1, t.field2, t.field3,
FROM myTable t
WHERE trunc(t.date) = tp_date(‘03/22/2011’,’mm/dd/yyyy’)
AND EXISTS
( SELECT *
FROM mytable t2
WHERE t2.field1 = t.field1
AND trunc(t2.date) = tp_date(‘03/22/2011’,’mm/dd/yyyy’)
GROUP BY t2.field1
HAVING COUNT(t2.field1) < 7
)
;

Select statement to find duplicates on certain fields

Can you help me with SQL statements to find duplicates on multiple fields?
For example, in pseudo code:
select count(field1,field2,field3)
from table
where the combination of field1, field2, field3 occurs multiple times
and from the above statement if there are multiple occurrences I would like to select every record except the first one.
To get the list of fields for which there are multiple records, you can use..
select field1,field2,field3, count(*)
from table_name
group by field1,field2,field3
having count(*) > 1
Check this link for more information on how to delete the rows.
http://support.microsoft.com/kb/139444
There should be a criterion for deciding how you define "first rows" before you use the approach in the link above. Based on that you'll need to use an order by clause and a sub query if needed. If you can post some sample data, it would really help.
You mention "the first one", so I assume that you have some kind of ordering on your data. Let's assume that your data is ordered by some field ID.
This SQL should get you the duplicate entries except for the first one. It basically selects all rows for which another row with (a) the same fields and (b) a lower ID exists. Performance won't be great, but it might solve your problem.
SELECT A.ID, A.field1, A.field2, A.field3
FROM myTable A
WHERE EXISTS (SELECT B.ID
FROM myTable B
WHERE B.field1 = A.field1
AND B.field2 = A.field2
AND B.field3 = A.field3
AND B.ID < A.ID)
This is a fun solution with SQL Server 2005 that I like. I'm going to assume that by "for every record except for the first one", you mean that there is another "id" column that we can use to identify which row is "first".
SELECT id
, field1
, field2
, field3
FROM
(
SELECT id
, field1
, field2
, field3
, RANK() OVER (PARTITION BY field1, field2, field3 ORDER BY id ASC) AS [rank]
FROM table_name
) a
WHERE [rank] > 1
To see duplicate values:
with MYCTE as (
select row_number() over ( partition by name order by name) rown, *
from tmptest
)
select * from MYCTE where rown <=1
If you're using SQL Server 2005 or later (and the tags for your question indicate SQL Server 2008), you can use ranking functions to return the duplicate records after the first one if using joins is less desirable or impractical for some reason. The following example shows this in action, where it also works with null values in the columns examined.
create table Table1 (
Field1 int,
Field2 int,
Field3 int,
Field4 int
)
insert Table1
values (1,1,1,1)
, (1,1,1,2)
, (1,1,1,3)
, (2,2,2,1)
, (3,3,3,1)
, (3,3,3,2)
, (null, null, 2, 1)
, (null, null, 2, 3)
select *
from (select Field1
, Field2
, Field3
, Field4
, row_number() over (partition by Field1
, Field2
, Field3
order by Field4) as occurrence
from Table1) x
where occurrence > 1
Notice after running this example that the first record out of every "group" is excluded, and that records with null values are handled properly.
If you don't have a column available to order the records within a group, you can use the partition-by columns as the order-by columns.
CREATE TABLE #tmp
(
sizeId Varchar(MAX)
)
INSERT #tmp
VALUES ('44'),
('44,45,46'),
('44,45,46'),
('44,45,46'),
('44,45,46'),
('44,45,46'),
('44,45,46')
SELECT * FROM #tmp
DECLARE #SqlStr VARCHAR(MAX)
SELECT #SqlStr = STUFF((SELECT ',' + sizeId
FROM #tmp
ORDER BY sizeId
FOR XML PATH('')), 1, 1, '')
SELECT TOP 1 * FROM (
select items, count(*)AS Occurrence
FROM dbo.Split(#SqlStr,',')
group by items
having count(*) > 1
)K
ORDER BY K.Occurrence DESC
Try this query to find duplicate records on multiple fields
SELECT a.column1, a.column2
FROM dbo.a a
JOIN (SELECT column1,
column2, count(*) as countC
FROM dbo.a
GROUP BY column4, column5
HAVING count(*) > 1 ) b
ON a.column1 = b.column1
AND a.column2 = b.column2
You can also try this query to count a distinct() column and order by with your desired column:
select field1, field2, field3, count(distinct (field2))
from table_name
group by field1, field2, field3
having count(field2) > 1
order by field2;
Try this query to have a separate count of each SELECT statement:
select field1, count(field1) as field1Count, field2,count(field2) as field2Counts, field3, count(field3) as field3Counts
from table_name
group by field1, field2, field3
having count(*) > 1