DB2 SQL : selecting rows where value is different from previous one - sql

Let's say I have a table (PERSON) like this :
I would like to select only the rows where the value of column "C" has changed from previous row.
In this case, I should get : rows 1, 4, 5, 7, 8, 9 and 15.
I can't figure out how to achieve this.
Does someone has an idea please ?
Thank you

Try this:
WITH PERSON (ROW_NUMBER, C) AS
(
VALUES
( 1, NULL::INT)
, ( 3, NULL::INT)
, ( 4, 189)
, ( 5, NULL::INT)
, ( 6, NULL::INT)
, ( 7, 212)
, ( 8, NULL::INT)
, ( 9, 235)
, (10, 235)
, (11, NULL::INT)
)
SELECT ROW_NUMBER, C
FROM
(
SELECT
P.*
, LAG (P.C) OVER (ORDER BY ROW_NUMBER) AS C_PREV
, LAG (P.ROW_NUMBER) OVER (ORDER BY ROW_NUMBER) AS ROW_NUMBER_PREV
FROM PERSON P
)
WHERE
ROW_NUMBER_PREV IS NULL
OR (C IS DISTINCT FROM C_PREV)
ROW_NUMBER
C
1
4
189
5
7
212
8
9
235
11

Related

SQL - Average every n rows for items with same ID

I found a lot of similart posts, but any of them answered what I needed. Im trying to sumarize a big table (200M rows). What I need is to get the average every n (5 or something like that) for each ID. I've been trying with this:
select id, dev_id,
row_number() over(partition by dev_id order by dev_id) as rn,
avg(med1) over(order by dev_id rows between current row and 5 following) as avg_med1,
avg(med2) over(order by dev_id rows between current row and 5 following) as avg_med2
from my_table;
What I get with that query is a moving average, however, I only want the mean of the n elements for the current id. So the output should be for ID x > avg(rows 1-5), for ID x > avg(rows 6-10), for ID y > avg(rows 11-15)...
The thing im trying to replicate is something like the following:
From:
dev_id, med1, med2, med3
2, 3, 4, 1
3, 2, 1, 2
3, 1, 3, 9
3, 2, 4, 7
1, 3, 3, 2
2, 4, 3, 2
1, 5, 3, 2
3, 4, 2, 3
2, 4, 7, 2
To:
dev_id, AVG(med1), AVG(med2), AVG(med3)
2, 3.5, 3.5, 1.5
3, 1.5, 2, 5.5
3, 3, 3, 5
1, 4, 3, 2
2, 4, 5, 2
This ended up working for me, however is not what I initially planned it:
select id, reference, avg(b1), avg(b25), avg(b10), max(created_at)
from
(
select id,
#row_number := case when #reference = reference then #row_number + 1 else 0 end as row_number,
#reference := reference as reference,
b1,
b25,
b10,
created_at
from history_air
cross join (select #row_number := -1, #reference := '') as t
order by reference, created_at
) as t
group by reference, row_number div 150
order by reference, row_number div 150;

How to use a table in SQL WITH statement?

I am trying to use a pre-existing table in the SQL statement at the bottom of the question rather than the data that is being generated in the SQL statement. Currently, there is some data that is generated using:
WITH polys(poly_id, geom) AS (VALUES (1, 'POLYGON((1 1, 1 5, 4 5, 4 4, 2 4, 2 2, 4 2, 4 1, 1 1))'::GEOMETRY),
(2, 'POLYGON((6 6, 6 10, 8 10, 9 7, 8 6, 6 6))'::GEOMETRY)),
However, let's say I already have a table named polys with the poly_id and geom columns, exactly as what would be created above. How can I insert my pre-existing polys table into this SQL statement (i.e. what syntax would I use)?
I have tried the following to add a pre-existing polys table using:
CREATE TABLE polys_pts AS
WITH polys(poly_id, geom) AS,
with the following error:
ERROR: syntax error at or near ","
LINE 2: WITH polys(poly_id, geom) AS,
^
Full Code:
CREATE TABLE polys_pts AS
WITH polys(poly_id, geom) AS (VALUES (1, 'POLYGON((1 1, 1 5, 4 5, 4 4, 2 4, 2 2, 4 2, 4 1, 1 1))'::GEOMETRY),
(2, 'POLYGON((6 6, 6 10, 8 10, 9 7, 8 6, 6 6))'::GEOMETRY)),
pnt_clusters AS (SELECT polys.poly_id,
CASE
WHEN ST_Area(polys.geom)>9 THEN ST_ClusterKMeans(pts.geom, 8) OVER(PARTITION BY polys.poly_id)
ELSE ST_ClusterKMeans(pts.geom, 2) OVER(PARTITION BY polys.poly_id)
END AS cluster_id, pts.geom FROM polys,
LATERAL ST_Dump(ST_GeneratePoints(polys.geom, 1000, 1)) AS pts),
centroids AS (SELECT cluster_id, ST_PointOnSurface(ST_collect(geom)) AS geom FROM pnt_clusters GROUP BY poly_id, cluster_id),
neg_buffer AS (SELECT poly_id, (ST_Buffer(geom, -0.4, 'endcap=flat join=round')) geom FROM polys GROUP BY poly_id, polys.geom),
neg_buffer_pts_out AS (SELECT a.cluster_id, (a.geom) geom FROM centroids a WHERE EXISTS (SELECT 1 FROM neg_buffer b WHERE ST_Intersects(a.geom, b.geom))),
neg_buffer_pts_in AS (SELECT a.cluster_id, (a.geom) geom FROM centroids a WHERE NOT EXISTS (SELECT 1 FROM neg_buffer b WHERE ST_Intersects(a.geom, b.geom))),
snap_pts_clusters_in AS (SELECT DISTINCT ST_ClosestPoint(ST_ExteriorRing(a.geom), b.geom) AS geom FROM neg_buffer a, neg_buffer_pts_in b),
node_pts AS (SELECT ST_StartPoint(ST_ExteriorRing(geom)) geom FROM neg_buffer),
snap_pts AS (SELECT b.cluster_id, a.geom FROM snap_pts_clusters_in a JOIN centroids b ON ST_DWithin(a.geom, b.geom, 0.4))
SELECT a.cluster_id, (a.geom) geom FROM snap_pts a WHERE NOT EXISTS (SELECT 1 FROM node_pts b WHERE ST_Intersects(a.geom, b.geom))
UNION SELECT c.cluster_id, (c.geom) geom FROM neg_buffer_pts_out c ORDER BY cluster_id;
I'm not sure of understanding your question so i give you a broad answer.
To create a table from a query you must use:
CREATE TABLE foo AS
SELECT * FROM my_table;
CTEs are builded as:
WITH
tmp1 AS (
SELECT * from my_table1
), -- commna
tmp2 AS (
SELECT * from my_table2
)
SELECT * from tmp1 JOIN tmp2 ON tmp1.id = tmp2.id -- no comma
;
Note that the are , to separate different "temporary" tables defined in the CTE but the final sentence is not preceded with a ,
So to create a table from a CTE the syntax will be:
CREATE TABLE foo AS
WITH
tmp1 AS (
SELECT * from my_table1
),
tmp2 AS (
SELECT * from my_table2
)
SELECT * from tmp1 JOIN tmp2 ON tmp1.id = tmp2.id -- no comma
;
Create a table from a VALUES clause is the same as the other cases:
CREATE TABLE polys2 AS
VALUES
(1, 'POLYGON((1 1, 1 5, 4 5, 4 4, 2 4, 2 2, 4 2, 4 1, 1 1))'::GEOMETRY),
(2, 'POLYGON((6 6, 6 10, 8 10, 9 7, 8 6, 6 6))'::GEOMETRY)
;
If you already have a table called polys2 that has been created for example like is shown in the previous example, you can replace
CREATE TABLE polys_pts AS
WITH
polys(poly_id, geom) AS (
VALUES
(1, 'POLYGON((1 1, 1 5, 4 5, 4 4, 2 4, 2 2, 4 2, 4 1, 1 1))'::GEOMETRY),
(2, 'POLYGON((6 6, 6 10, 8 10, 9 7, 8 6, 6 6))'::GEOMETRY)),
pnt_clusters AS (SELECT polys.poly_id, ...
with
CREATE TABLE polys_pts AS
WITH
polys(poly_id, geom) AS (
SELECT poly_id, geom FROM polys2
),
pnt_clusters AS (SELECT polys.poly_id, ...
um, the question is not 100% clear to me - ... I am not familiar with pecularities of postgresql, but my first bet would be to try
WITH polys(...) AS (...),
pnt_clusters AS (...)
CREATE polys_pts AS (
SELECT ..
FROM polys... etc.
)
but I guess this is not allowed since WITH only goes with DML statements (data manipulation unlike data definition (DDL) statements like CREATE)
so.. my next bet would be to try using polys and pnt_clusters that you defined inside WITH clause, inline inside the SELECT statement, given that
WITH a AS (
SELECT x, y FROM z
)
SELECT *
FROM a
is the same as
SELECT *
FROM (
SELECT x, y
FROM z
) AS a
well, otherwise I would split the process into two steps - create some kind of temporary tables first for polys and pnt_clusters and then do the create...
The definition of a CTE must be a complete statement, so you have to use
WITH polys(poly_id, geom) AS (
SELECT *
FROM (VALUES
(1, 'POLYGON((1 1, 1 5, 4 5, 4 4, 2 4, 2 2, 4 2, 4 1, 1 1))'::GEOMETRY),
(2, 'POLYGON((6 6, 6 10, 8 10, 9 7, 8 6, 6 6))'::GEOMETRY)
) AS p(p, g)
)

SQL - Getting Sum of 'X' Consecutive Values where X is an Integer in another Row (With Categories)

Say for example, I wanted to SUM all the values from the current row until the provided count. See table below:
For example:
Category A, Row 1: 10+15+25 = 50 (because it adds Rows 1 to 3 due to Count)
Category A, Row 2: 15+25+30+40 = 110 (because it adds Rows 2 to 5 due to count)
Category A, Row 5: 40+60 = 100 (because it Adds Rows 5 and 6. Since the count is 5, but the category ends at Row 6, so instead of that, it sums all available data which is Rows 5 and 6 only, thus having a value of 100.
Same goes for Category B.
How do I do this?
You can do this using window functions:
with tt as (
select t.*,
sum(quantity) over (partition by category order by rownumber) as running_quantity,
max(rownumber) over (partition by category) as max_rownumber
from t
)
select tt.*,
coalesce(tt2.running_quantity, ttlast.running_quantity) - tt.running_quantity + tt.quantity
from tt left join
tt tt2
on tt2.category = tt.category and
tt2.rownumber = tt.rownumber + tt.count - 1 left join
tt ttlast
on ttlast.category = tt.category and
ttlast.rownumber = ttlast.max_rownumber
order by category, rownumber;
I can imagine that under some circumstances this would be much faster -- particularly if the count values are relatively large. For small values of count, the lateral join is probably faster, but it is worth checking if performance is important.
Actually, a pure window functions approach is probably the best approach:
with tt as (
select t.*,
sum(quantity) over (partition by category order by rownumber) as running_quantity
from t
)
select tt.*,
(coalesce(lead(tt.running_quantity, tt.count - 1) over (partition by tt.category order by tt.rownumber),
first_value(tt.running_quantity) over (partition by tt.category order by tt.rownumber desc)
) - tt.running_quantity + tt.quantity
)
from tt
order by category, rownumber;
Here is a db<>fiddle.
Try this:
DECLARE #DataSource TABLE
(
[Category] CHAR(1)
,[Row Number] BIGINT
,[Quantity] INT
,[Count] INT
);
INSERT INTO #DataSource ([Category], [Row Number], [Quantity], [Count])
VALUES ('A', 1, 10, 3)
,('A', 2, 15, 4)
,('A', 3, 25, 2)
,('A', 4, 30, 1)
,('A', 5, 40, 5)
,('A', 6, 60, 2)
--
,('B', 1, 12, 2)
,('B', 2, 13, 3)
,('B', 3, 17, 1)
,('B', 4, 11, 2)
,('B', 5, 10, 5)
,('B', 6, 7, 3);
SELECT *
FROM #DataSource E
CROSS APPLY
(
SELECT SUM(I.[Quantity])
FROM #DataSource I
WHERE I.[Row Number] <= E.[Row Number] + E.[Count] - 1
AND I.[Row Number] >= E.[Row Number]
AND E.[Category] = I.[Category]
) DS ([Sum]);

Where clause on Running total

I have this table which stores containers by region and the number of coffee pouches in each of the containers.
if object_id( 'dbo.Container' ) is not null
drop table dbo.Container
go
create table dbo.Container
(
Id int not null,
Region int not null,
NumberOfCoffeePouches int not null,
constraint pkc_Container__Id primary key clustered(Id asc)
)
go
insert into dbo.Container
( Id , Region , NumberOfCoffeePouches )
values
( 1, 1, 10 ),
( 2, 1, 30 ),
( 3, 1, 5),
( 4, 1, 7),
( 5, 1, 1),
( 6, 1, 3),
( 7, 2, 4),
( 8, 2, 4),
( 9, 2, 4)
I need to list out the container Ids that will be used to fulfill an order of, say 50, coffee pouches. Over supplying is OK.
Here is query I have come up with
declare #RequiredCoffeePouches int = 50
select
sq2.Id,
sq2.NumberOfCoffeePouches,
sq2.RunningTotal,
sq2.LagRunningTotal
from
(
select
sq1.Id,
sq1.NumberOfCoffeePouches,
sq1.RunningTotal,
lag(sq1.RunningTotal, 1, 0) over (order by sq1.Id asc)
as 'LagRunningTotal'
from
(
select
c.Id,
c.NumberOfCoffeePouches,
sum(c.NumberOfCoffeePouches)
over (order by c.Id asc) as 'RunningTotal'
from
dbo.Container as c
where
c.Region = 1
) as sq1
) as sq2
where
sq2.LagRunningTotal <= #RequiredCoffeePouches
It gives the expected result
Id NumberOfCoffeePouches RunningTotal LagRunningTotal
----------- --------------------- ------------ ---------------
1 10 10 0
2 30 40 10
3 5 45 40
4 7 52 45
Question:
Is there a better and more optimized way to achieve this?
Specially the Container table is very large table and I think the sub query sq1 will unnecessarily calculate the RunningTotals for all the containers in the region. I was wondering if there is anyway to have sq1 stop processing more rows once the RunnningTotal exceeds over the #RequiredCoffeePouches.
Two things:
Moving your WHERE clause inside of the relevant sub-select can greatly increase the speed of the query because it'll pull less data. Using your example:
SELECT
sq2.Id,
sq2.NumberOfCoffeePouches,
sq2.RunningTotal,
sq2.LagRunningTotal
FROM
(
SELECT
sq1.Id,
sq1.NumberOfCoffeePouches,
sq1.RunningTotal,
lag(sq1.RunningTotal, 1, 0) over (order by sq1.Id asc) AS 'LagRunningTotal'
FROM
(
SELECT
c.Id,
c.NumberOfCoffeePouches,
SUM(c.NumberOfCoffeePouches) OVER (order by c.Id asc) AS 'RunningTotal'
FROM dbo.Container AS c
WHERE c.Region = 1
) AS sq1
WHERE sq2.LagRunningTotal <= #RequiredCoffeePouches
) AS sq2
CTEs can also improve performance:
;WITH sql1CTE AS (
SELECT
c.Id,
c.NumberOfCoffeePouches,
SUM(c.NumberOfCoffeePouches) OVER (order by c.Id asc) AS 'RunningTotal'
FROM dbo.Container AS c
WHERE c.Region = 1
),
sql2CTE AS (
SELECT
Id,
NumberOfCoffeePouches,
RunningTotal,
lag(RunningTotal, 1, 0) over (order by Id asc) AS 'LagRunningTotal'
FROM sql1CTE
WHERE LagRunningTotal <= #RequiredCoffeePouches
)
SELECT
Id,
NumberOfCoffeePouches,
RunningTotal,
LagRunningTotal
FROM sql2CTE
SQL Server CTE Basics
If you're using SSMS, select "Include Client Statistics" and "Include Actual Execution Plan" to keep track of how your query performs while you're crafting it.

SQL Server loop through a table for every 5 rows

I need to write a stored procedure or table function to return a new data table as a new data source.
I wish to loop through the original table for every 5 rows base on the invoice ID column (it's possible not start from 1), the first 5 rows add to the left of the new table and the second 5 rows add to the right of the new table, the third 5 rows to the left and so on.
For example, Here is the original table:
Here is the expect table:
Thanks in advance!
declare #rowCount int = 5;
with cte as (
select *,( (IN_InvoiceID-1) / #rowCount ) % 2 group1
,( (IN_InvoiceID-1) / #rowCount ) group2
,IN_InvoiceID % #rowCount group3
from T
)
select * from cte
select T1.INID,T1.IN_InvoiceID,T1.IN_InvoiceAmount,T2.INID,T2.IN_InvoiceID,T2.IN_InvoiceAmount
from CTE T1
left join CTE T2 on T2.group1 = 1 and T1.group2 = T2.group2-1 and T1.group3 = T2.group3
where T1.group1 = 0
Test DDL
CREATE TABLE T
([INID] varchar(38), [IN_InvoiceID] int, [IN_InvoiceAmount] int)
;
INSERT INTO T
([INID], [IN_InvoiceID], [IN_InvoiceAmount])
VALUES
('DB3E17E6-35C5-41:121-93B1-F809BF6B2972', 1, 2999),
('3212F048-8213-4FCC-AB64-121485B77D4E43', 2, 3737),
('E3526373-A204-40F5-801C-7F8302A4E5E2', 3, 3175),
('76CC9C19-BF79-4E8A-8034-A33805AD3390', 4, 391),
('EC7A2FBC-B62D-4865-88DE-A8097975F125', 5, 1206),
('52AD3046-21331-4F0A-BD1D-67F232C54244', 6, 402),
('CA48F132-A9F5-4516-9E58-CDEE6644AAD1', 7, 1996),
('02E10C31-CAB2-4220-B66A-CEE5E67A9378', 8, 3906),
('98F1EEFF-B07A-4B65-87F4-E165264284DD', 9, 2575),
('91EBDD8B-B73C-470C-8900-DD66078483DB', 10, 2965),
('6E2490E5-C4DE-4833-877F-1590F7BDC1B8', 11, 1603),
('00985921-AC3C-4E3E-BAE1-7F58302F831A', 12, 1302)
;
Result:
Could you please check article Display Data in Multiple Columns using SQL showing with example case how a database developer can show the list of data rows in a columnar mode using Row_Number() function and mode arithmetic expression
You need to add additional columns from the same row that is different in the sample
Seems as if you want to split the table into 2 tables with alternating 5 rows. An easy way to do this would be:
Take data into a temp table having an extra column (lets say
grouping_id)
Update the grouping id so that each 5 rows have the same id. You can
use in_invoiceId % 5 (the nod function). After this step the first 5
rows will have grouping_id 0, next 5 will have 1, next will have 2
(assuming your invoice id is incremented +1 for all rows).
You can just do a normal select with where clause for odd and even grouping_id
Ideally, you can manage with the 2 tables Master and detail table.
But due to my curiosity, I am able to solve and give the answer as
Declare #table table(id int identity, invoice_id int)
; WITH Numbers AS
(
SELECT n = 1
UNION ALL
SELECT n + 1
FROM Numbers
WHERE n+1 <= 50
)
insert into #table SELECT n
FROM Numbers
Select (a.id )%5 ,* from #table a join #table b on a.id+5 = b.id and a.id != b.id
;WITH Numbers AS
(
SELECT n = 1, o = 5
UNION ALL
SELECT n + 10, o = o+10
FROM Numbers
WHERE n+1 <= 50
)
select a.id ParentId,a.invoice_id ParentInvoiceId, --b.n, b.o,
c.invoice_id childInvoiceID from #table a
join Numbers b on a.id between b.n and b.o
left join #table c on a.id + 5 = c.id
Here is my solution
First i create grps based on whether the in_invoiceid is divisible by 5 or not.(Ignore the remainders)
After that i create a category to indicate between alternative groups(ie by checking if the remainder is 0 or otherise)
Then its a matter of dense_ranking the records on the basis of the category field ordered by in_invoiceid
Lastly a join with category=1 rows with same dense_rank as those records in category=0
create table Invoicetable(IN_ID varchar(100), IN_InvoiceID int)
INSERT INTO Invoicetable (IN_ID, IN_InvoiceID)
VALUES
('2345-BCDE-6645-1DDF', 1),
('2345-BCDE-6645-3DDF', 2),
('2345-BCDE-6645-4DDF', 3),
('2345-BCDE-6645-5DDF', 4),
('2345-BCDE-6645-6DDF', 5),
('2345-BCDE-6645-7DDF', 6),
('2345-BCDE-6645-aDDF', 7),
('2345-BCDE-6645-sDDF', 8),
('2345-BCDE-6645-dDDF', 9),
('2345-BCDE-6645-dDDF', 10),
('2345-BCDE-6645-dDDF', 11),
('2345-BCDE-6645-dDDF', 12);
with data
as (
select *
,(in_invoiceid-1)/5 as grp
,case when ((in_invoiceid-1)/5)%2=0 then '1' else '0' end as category
,dense_rank() over(partition by case when ((in_invoiceid-1)/5)%2=0 then '1' else '0' end
order by in_invoiceid) as rnk
from invoicetable a
)
select *
from data a
left join data b
on a.rnk=b.rnk
and b.category=0
where a.category=1
Here is db fiddle link.
https://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=287f101737c580ca271940764b2536ae
You may try with the following approach. Dividing the table is done with (((ROW_NUMBER() OVER (ORDER BY IN_InvoiceID) - 1) / 5) % 2 = 0) which groups records in left and right groups.
CREATE TABLE #InvoiceTable(
IN_ID varchar(24),
IN_InvoiceID int
)
INSERT INTO #InvoiceTable (IN_ID, IN_InvoiceID)
VALUES
('2345-BCDE-6645-1DDF', 1),
('2345-BCDE-6645-3DDF', 2),
('2345-BCDE-6645-4DDF', 3),
('2345-BCDE-6645-5DDF', 4),
('2345-BCDE-6645-6DDF', 5),
('2345-BCDE-6645-7DDF', 6),
('2345-BCDE-6645-aDDF', 7),
('2345-BCDE-6645-sDDF', 8),
('2345-BCDE-6645-dDDF', 9),
('2345-BCDE-6645-dDDF', 10),
('2345-BCDE-6645-dDDF', 11),
('2345-BCDE-6645-dDDF', 12);
WITH cte AS (
SELECT
IN_ID,
IN_InvoiceID,
CASE
WHEN (((ROW_NUMBER() OVER (ORDER BY IN_InvoiceID) - 1) / 5) % 2 = 0) THEN 'L'
ELSE 'R'
END AS IN_Position
FROM #InvoiceTable
),
cteL AS (
SELECT IN_ID, IN_InvoiceID, ROW_NUMBER() OVER (ORDER BY IN_InvoiceID) AS IN_RowNumber
FROM cte
WHERE IN_Position = 'L'
),
cteR AS (
SELECT IN_ID, IN_InvoiceID, ROW_NUMBER() OVER (ORDER BY IN_InvoiceID) AS IN_RowNumber
FROM cte
WHERE IN_Position = 'R'
)
SELECT cteL.IN_ID, cteL.IN_InvoiceID, cteR.IN_ID, cteR.IN_InvoiceID
FROM cteL
LEFT JOIN cteR ON (cteL.IN_RowNumber = cteR.IN_RowNumber)
Output:
IN_ID IN_InvoiceID IN_ID IN_InvoiceID
2345-BCDE-6645-1DDF 1 2345-BCDE-6645-7DDF 6
2345-BCDE-6645-3DDF 2 2345-BCDE-6645-aDDF 7
2345-BCDE-6645-4DDF 3 2345-BCDE-6645-sDDF 8
2345-BCDE-6645-5DDF 4 2345-BCDE-6645-dDDF 9
2345-BCDE-6645-6DDF 5 2345-BCDE-6645-dDDF 10
2345-BCDE-6645-dDDF 11 NULL NULL
2345-BCDE-6645-dDDF 12 NULL NULL