sql create sequence number in subset - apache derby - sql

Able to generate Sequence number using the following query
CREATE SEQUENCE seqno AS integer
START WITH 1;
SELECT t1.*,
(NEXT VALUE
FOR seqno) AS seqno
FROM
(SELECT l.TRANSACTION_ID,
l.HSN
FROM RWLINEITEM l
WHERE l.TRANSACTION_ID IN ('CS610-20-10003','CS610-20-10002')
GROUP BY l.TRANSACTION_ID,l.HSN) t1
this gives result
The requirement is to generate sequence number by Transaction and HSN for example
Is there any way to arrive this result. Using derby-10.13.1.1

It seems like Derby does not fully support standard window function row_number().
A typical approach to emulate this is to use a subquery that counts how many rows have the same transaction_id and a smaller hsn, like so:
select
transaction_id,
hsn,
1 + coalesce(
(
select count(*)
from rwlineitem l1
where l1.transaction_id = l.transaction_id and l1.hsn < l.hsn
),
0
) seqno
from rwlineitem l
where transaction_id in ('CS610-20-10003','CS610-20-10002')
order by transaction_id, hsn
Note that if there are duplicates (transaction_id, hsn) tuples, they would get the same seqno. This is similar to how window function rank() works. If you want a unique number, then you can try and add another random sorting criteria:
select
transaction_id,
hsn,
1 + coalesce(
(
select count(*)
from rwlineitem l1
where
l1.transaction_id = l.transaction_id
and (
l1.hsn < l.hsn
or (l1.hsn = l.hsn and random() < 0.5)
)
),
0
) seqno
from rwlineitem l
where transaction_id in ('CS610-20-10003','CS610-20-10002')
order by transaction_id, hsn

Related

SQL query to return duplicate rows for certain column, but with unique values for another column

I have written the query shown here that combines three tables and returns rows where the at_ticket_num from appeal_tickets is duplicated but against a different at_sys_ref value
select top 100
t.t_reference, at.at_system_ref, at_ticket_num, a.a_case_ref
from
tickets t, appeal_tickets at, appeals_2 a
where
t.t_reference in ('AB123','AB234') -- filtering on these values so that I can see that its working
and t.t_number = at.at_ticket_num
and at.at_system_ref = a.a_system_ref
and at.at_ticket_num IN (select at_ticket_num
from appeal_tickets
group by at_ticket_num
having count(distinct at_system_ref) > 1)
order by
t.t_reference desc
This is the output:
t_reference at_system_ref at_ticket_num a_case_ref
-------------------------------------------------------
AB123 30838974 23641583 1111979010
AB123 30838976 23641583 1111979010
AB234 30839149 23641520 1111977352
AB234 30839209 23641520 1111988003
I want to modify this so that it only returns records where t_reference is duplicated but against a different a_case_ref. So in above case only records for AB234 would be returned.
Any help would be much appreciated.
You want all ticket appeals that have more than one system reference and more than one case reference it seems. You can join the tables, count the occurrences per ticket and then only keep the tickets that match these criteria.
select *
from
(
select
t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref,
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
count(distinct a.a_case_ref) over (partition by at.at_ticket_num) as caserefs
from tickets t
join appeal_tickets at on at.at_ticket_num = t.t_number
join appeals_2 a on a.a_system_ref = at.at_system_ref
) counted
where sysrefs > 1 and caserefs > 1
order by t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref;
Correction
It seems that SQL Server still doesn't support COUNT(DISTINCT ...) OVER (...). You can count distinct values in a subquery though. Replace
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
by
(
select count(distinct a2.a_system_ref)
from appeal_tickets at2
join appeals_2 a2 on a2.a_system_ref = at2.at_system_ref
where at2.at_ticket_num = t.t_number
) as sysrefs,
An alternative workaround is to use DENSE_RANK in two directions (found here: https://stackoverflow.com/a/53518204/2270762):
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref) +
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref desc) -
1 as sysrefs,
with data as (
<your query plus one column>,
case when
min() over (partition by t.t_reference)
<>
max() over (partition by t.t_reference)
then 1 end as dup
)
select * from data where dup = 1

Finding the Difference of Two Results

I have two results with two different dates (a recent one and the previous one) the numbers below are the result 250 being the most recent and 300 being the previous result:
250
300
The code I use is here:
SELECT TOP 2
MY FIELD as bmi
FROM
MY TABLE
ORDER BY
THE DATE FIELD DESC
Within this same code I want to be able to find the difference between those two numbers and for that to appear not the two numbers?
I have tried a few things of skipping N rows etc but now I don't know what I can do?
I think you want something like this:
declare #firstBmiRes int
declare #secondBmiRes int
SET #firstBmiRes = 250 /* insert your query */
SET #secondBmiRes = 300 /* insert your query */
(SELECT SUM(#secondBmiRes - #firstBmiRes))
If you want to continue to use the calculated result. You can obviously store the value into another variable like this:
declare #bmi int
SET #bmi = (SELECT SUM(#secondBmiRes - #firstBmiRes))
SELECT #bmi
2nd Approach:
Since we don't have very much information to work with. you could try something like this... But i'm assuming a lot of your datastructure here.
declare #BmiScore int
declare #firstBmiRes int
declare #secondBmiRes int
SET #firstBmiRes = (SELECT TOP 1 MY_FIELD
FROM MY_TABLE
ORDER BY DATE_FIELD DESC)
SET #secondBmiRes = (SELECT MY_FIELD
FROM MY_TABLE
ORDER BY DATE_FIELD DESC
OFFSET 1 ROW
FETCH NEXT 1 ROW ONLY)
SET #bmiScore = (SELECT SUM(#secondBmiRes - #firstBmiRes))
SELECT #bmiScore
SELECT
MYFIELD - LAG (MYFIELD,1) OVER (ORDER BY MYDATE) AS BMI
FROM
MYTABLE;
ORDER BY MYDATE DESC
Using a LEAD function if you want your code to be a part of new code for some reason:
select TOP 1 (bmi - lead(bmi) over (order by date_field)) as result
from( SELECT TOP 2 my_field as bmi
, date_field
FROM my_table
ORDER BY date_field DESC) A
Here is a DEMO
Or by LAG :
select TOP 1 (lag(my_field) over (order by date_field) - my_field ) as result
FROM my_table
ORDER BY date_field DESC;
You can use LEAD/ LAG if your version of SQL Server supports these functions. If you are on an older version then you can use a windowed function to apply an order to the rows.
Here's your data going into a temporary table variable:
DECLARE #MY_TABLE TABLE (THE_DATE_FIELD DATE, MY_FIELD INT);
INSERT INTO #MY_TABLE SELECT '20200114', 300 UNION ALL SELECT '20200113', 250;
...and here's a query to perform the calculation you needed:
WITH x AS (
SELECT TOP 2
THE_DATE_FIELD,
MY_FIELD AS bmi,
ROW_NUMBER() OVER (ORDER BY THE_DATE_FIELD DESC) AS order_id
FROM
#MY_TABLE)
SELECT
MAX(CASE WHEN order_id = 1 THEN bmi END) - MAX(CASE WHEN order_id = 2 THEN bmi END) AS difference_bmi
FROM
x;
If I peek at the data from the CTE then I see this (and this is why I included the date field, which is redundant, and could otherwise be removed):
THE_DATE_FIELD bmi order_id
2020-01-14 300 1
2020-01-13 250 2
Now it's simply a case of picking the two values, as one has an order_id = 1 and one has an order_id = 2.

SQL Query Help - Negative reporting

Perhaps somebody can help with Ideas or a Solution. A User asked me for a negative report. We have a table with tickets each ticket has a ticket number which would be easy to select but the user wants a list of missing tickets between the first and last ticket in the system.
E.g. Select TicketNr from Ticket order by TicketNr
Result
1,
2,
4,
7,
11
But we actually want the result 3,5,6,8,9,10
CREATE TABLE [dbo].[Ticket](
[pknTicketId] [int] IDENTITY(1,1) NOT NULL,
[TicketNr] [int] NULL
) ON [PRIMARY]
GO
SQL Server 2016 - TSQL
Any ideas ?
So a bit more information is need all solution thus far works on small table. Our production database has over 4 million tickets. Hence why we need to find the missing ones.
First get the minimum and maximum, then generate all posible ticket numbers and finally select the ones that are missing.
;WITH FirstAndLast AS
(
SELECT
MinTicketNr = MIN(T.TicketNr),
MaxTicketNr = MAX(T.TicketNr)
FROM
Ticket AS T
),
AllTickets AS
(
SELECT
TicketNr = MinTicketNr,
MaxTicketNr = T.MaxTicketNr
FROM
FirstAndLast AS T
UNION ALL
SELECT
TicketNr = A.TicketNr + 1,
MaxTicketNr = A.MaxTicketNr
FROM
AllTickets AS A
WHERE
A.TicketNr + 1 <= A.MaxTicketNr
)
SELECT
A.TicketNr
FROM
AllTickets AS A
WHERE
NOT EXISTS (
SELECT
'missing ticket'
FROM
Ticket AS T
WHERE
A.TicketNr = T.TicketNr)
ORDER BY
A.TicketNr
OPTION
(MAXRECURSION 32000)
If you can accept the results in a different format, the following will do what you want:
select TicketNr + 1 as first_missing,
next_TicketNr - 1 as last_missing,
(next_TicketNr - TicketNr - 1) as num_missing
from (select t.*, lead(TicketNr) over (order by TicketNr) as next_TicketNr
from Ticket t
) t
where next_TicketNr <> TicketNr + 1;
This shows each sequence of missing ticket numbers on a single row, rather than a separate row for each of them.
If you do use a recursive CTE, I would recommend doing it only for the missing tickets:
with cte as (
select (TicketNr + 1) as missing_TicketNr
from (select t.*, lead(TicketNr) over (order by TicketNr) as next_ticketNr
from tickets t
) t
where next_TicketNr <> TicketNr + 1
union all
select missing_TicketNr + 1
from cte
where not exists (select 1 from tickets t2 where t2.TicketNr = cte.missing_TicketNr + 1)
)
select *
from cte;
This version starts with the list of missing ticket numbers. It then adds a new one, as the numbers are not found.
One method is to use recursive cte to find the missing ticket numbers :
with missing as (
select min(TicketNr) as mnt, max(TicketNr) as mxt
from ticket t
union all
select mnt+1, mxt
from missing m
where mnt < mxt
)
select m.*
from missing m
where not exists (select 1 from tickets t where t.TicketNr = m.mnt);
This should do the trick: SQL Fiddle
declare #ticketsTable table (ticketNo int not null)
insert #ticketsTable (ticketNo) values (1),(2),(4),(7),(11)
;with cte1(ticketNo, isMissing, sequenceNo) AS
(
select ticketNo
, 0
, row_number() over (order by ticketNo)
from #ticketsTable
)
, cte2(ticketNo, isMissing, sequenceNo) AS
(
select ticketNo, isMissing, sequenceNo
from cte1
union all
select a.ticketNo + 1
, 1
, a.sequenceNo
from cte2 a
inner join cte1 b
on b.sequenceNo = a.sequenceNo + 1
and b.ticketNo != a.ticketNo + 1
)
select *
from cte2
where isMissing = 1
order by ticketNo
It works by collecting all of the existing tickets, marking them as existing, and assigning each a consecutive number giving their order in the original list.
We can then see the gaps in the list by finding any spots where the consecutive order number shows the next record, but the ticket numbers are not consecutive.
Finally, we recursively fill in the gaps; working from the start of a gap and adding new records until that gap's consecutive numbers no longer has a gap between the related ticket numbers.
I think this one give you easiest solution
with cte as(
select max(TicketNr) maxnum,min(TicketNr) minnum from Ticket )
select a.number FROM master..spt_values a,cte
WHERE Type = 'P' and number < cte.maxnum and number > cte.minno
except
select TicketNr FROM Ticket
So After looking at all the solutions
I went with creating a temp table with a full range of number from Starting to Ending ticket and then select from the Temp table where the ticket number not in the ticket table.
The reason being I kept running in MAXRECURSION problems.

Teradata SQL stack rows per user

Is there a way to stack/group string/text per user ?
data I have
USER STATES
1 CA
1 AR
1 IN
2 CA
3 CA
3 NY
4 CA
4 AL
4 SD
4 TX
What I need is
USER STATES
1 CA / AR / IN
2 CA
3 CA / NY
4 CA / AL / SD / TX
I tried cross join and then another cross join however but the data spools out. Thanks!
If Teradata's XML-services are installed there's a function named XMLAGG, which returns a similar result: CA, AR, IN
SELECT user,
TRIM(TRAILING ',' FROM (XMLAGG(TRIM(states)|| ',' /* optionally ORDER BY ...*/) (VARCHAR(10000))))
FROM tab
GROUP BY 1
Btw, using recursion will result in huge spool usage, because you keep all the intermediate rows in spool before returning the final row.
I am not an expert but this should work. You may need to modify it a bit per your exact requirement. Hope this helps!
CREATE VOLATILE TABLE temp AS (
SELECT
USER
,STATES
,ROW_NUMBER() OVER (PARTITION BY USER ORDER BY STATES) AS rn
FROM yourtable
) WITH DATA PRIMARY INDEX(USER) ON COMMIT PRESERVE ROWS;
WITH RECURSIVE rec_test(US,ST, LVL)
AS
(
SELECT USER,STATES (VARCHAR(10)),1
FROM temp
WHERE rn = 1
UNION ALL
SELECT USER, TRIM(STATES) || ', ' || ST,LVL+1
FROM temp INNER JOIN rec_test
ON USER = US
AND temp.rn = rec_test.lvl+1
)
SELECT US,ST, LVL
FROM rec_test
QUALIFY RANK() OVER(PARTITION BY US ORDER BY LVL DESC) = 1;
Unfortunately there is no GROUP_CONCAT or any string aggregate functions in Teradata (at least none that I'm aware of) so one way to achieve your result would be to use recursion, since you don't know the maximum values of states per user.
For recursion you should use a Volatile Table, as OLAP functions are not allowed in the recursive part. This is a non-tested code (I've got no way of testing it unfortunately), so there might be several bugs, but should give you the concept and with some troubleshooting (if needed) give you expected result.
Replace yourtable in definition of Volatile Table with your real table name.
CREATE VOLATILE TABLE vt AS (
SELECT
user
, states
, ROW_NUMBER() OVER (PARTITION BY user ORDER BY states) AS rn
, COUNT(*) OVER (PARTITION BY user) AS cnt
FROM yourtable
) WITH DATA
UNIQUE PRIMARY INDEX(user, rn)
ON COMMIT PRESERVE ROWS;
WITH RECURSIVE cte (user, list, rn) AS (
SELECT
user
, CAST(states AS VARCHAR(1000)) -- maximum size based on maximum number of rows * length of states
, rn
FROM vt
WHERE rn = cnt -- start with last states row
UNION ALL
SELECT
vt.user
, cte.list || ',' || vt.states
, vt.rn
FROM vt
JOIN cte ON vt.user = cte.user AND vt.rn = cte.rn - 1 -- append a row that is rn-1 of your rows for a given user
)
SELECT user, list
FROM cte
WHERE rn = 1; -- going from last to first, in this condition there should be entire list
This solution isn't perfect - it forces the engine to store immediate results in a temporary area during query processing. You may encounter a No more spool space error.

Remove duplicate row based on select statement

I have two select statements which is returning duplicated data. What I'm trying to accomplish is to remove a duplicated leg. But I'm having hard times to get to the second row programmatically.
select i.InvID, i.UID, i.StartDate, i.EndDate, i.Minutes,i.ABID from inv_v i, InvoiceLines_v i2 where
i.Period = '2014/08'
and i.EndDate = i2.EndDate
and i.Minutes = i2.Minutes
and i.Uid <> i2.Uid
and i.abid = i2.abid
order by i.EndDate
This select statement returns the following data.
As you can see it returns duplicate rows where minutes are the same ABID is the same but InvID are different. What I need to do is to remove one of the InvID where the criteria matches. Doesn't matter which one.
The second select statement is returning different data.
select i.InvID, i.UID, i.StartDate, i.EndDate, i.Minutes from InvoiceLines_v i, InvoiceLines_v i2 where
i.Period = '2014/08'
and i.EndDate = i2.EndDate
and i.Uid = i2.Uid
and i.Abid <> i2.Abid
and i.Language <> i2.Language
order by i.startdate desc
In this select statement I want to remove an InvID where UID is the same then select the lowest Mintues. In This case, I would remove the following InvIDs: 2537676 , 2537210
My goal is to remove those rows...
I could accomplish this using cursor grab the InvID and remove it by simple delete statement, but I'm trying to stay away from cursors.
Any suggestions on how I can accomplish this?
You can use exists to delete all duplicates except the one with the highest InvID by deleting those rows where another row exists with the same values but with a higher InvID
delete from inv_v
where exists (
select 1 from inv_v i2
where i2.InvID > inv_v.InvID
and i2.minutes = inv_v.minutes
and i2.EndDate = inv_v.EndDate
and i2.abid = inv_v.abid
and i2.uid <> inv_v.uid -- not sure why <> is used here, copied from question
)
I have faced similar problems regarding duplicate data and some one told me to use partition by and other methods but those were causing performance issues
However , I had a primary key in my table through which I was able to select one row from the duplicate data and then delete it.
For example in the first select statement "minutes" and "ABID" are the criteria to consider duplicacy in data.But "Invid" can be used to distinguish between the duplicate rows.
So you can use below query to remove duplicacy.
delete from inv_i where inv_id in (select max(inv_id) from inv_i group by minutes,abid having count(*) > 1 );
This simple concept was helpful to me. It can be helpful in your case if "Inv_id" is unique.
;WITH CTE AS
(
SELECT InvID
,[UID]
,StartDate
,EndDate
,[Minutes]
,ROW_NUMBER() OVER (PARTITION BY InvID, [UID] ORDER BY [Minutes] ASC) rn
FROM InvoiceLines_v
)
SELECT *
FROM CTE
WHERE rn = 1
Replace the ORIGINAL_TABLE with your table name.
QUERY 1:
WITH DUP_TABLE AS
(
SELECT ROW_NUMBER()
OVER (PARTITION BY minutes, ABID ORDER BY minutes, ABID) As ROW_NO
FROM <ORIGINAL_TABLE>
)
DELETE FROM DUP_TABLE WHERE ROW_NO > 1;
QUERY 2:
WITH DUP_TABLE AS
(
SELECT ROW_NUMBER()
OVER (PARTITION BY UID ORDER BY minutes) As ROW_NO
FROM <ORIGINAL_TABLE>
)
DELETE FROM DUP_TABLE WHERE ROW_NO > 1;