Generating a random number of child records - sql

I am successfully using the code below to generate 20 parent records with 15 children each. How can I modify this code to generate a random number (ie 5-20) random child records for each parent.
CREATE TABLE emp_info
(
empid INTEGER,
empname VARCHAR2(50)
);
CREATE TABLE emp_attendance
(empid INTEGER,
start_date DATE,
end_date DATE
);
-- option with CTE
insert all
when rn = 1 then into emp_info (empid, empname) values (id, name)
when 1 = 1 then into emp_attendance (empid, start_date, end_date)
values (id, d1, d1 + dbms_random.value (0, .75))
with t as (select nvl(max(empid), 0) maxid from emp_info)
select ceil(maxid + level/15) id,
case mod(maxid + level, 15) when 1 then 1 end rn,
dbms_random.string('U', dbms_random.value(3, 15)) name,
trunc(sysdate) + dbms_random.value (1, 30) d1
from t connect by level <= 20 * 15;
-- 20 parent records 15 children each

You can take advantage of the ROW_NUMBER function as follows:
-- see the inline comments for explanation
insert all
when rn = 1 then into emp_info (empid, empname) values (id, name)
when 1 = 1 then into emp_attendance (empid, start_date, end_date)
values (id, d1, d1 + dbms_random.value (0, .75))
select * from
(
with t as (select nvl(max(empid), 0) maxid from emp_info)
select ceil(maxid + level/15) id,
case mod(maxid + level, 15) when 1 then 1 end rn,
dbms_random.string('U', dbms_random.value(3, 15)) name,
trunc(sysdate) + dbms_random.value (1, 30) d1,
case when row_number() over (partition by ceil(maxid + level/15)
order by level) > 5 then
dbms_random.value(5, 20)
else 5 end as random_val -- added this expression as column
from t connect by level <= 20 * 20 -- changed it from 15 to 20
)
where random_val <= 12; -- this is random number less than 20

Related

Calculate Running Total Amount with Bonus

I have following table:
create table test_table
(
employee_id integer,
salary_year integer,
raise_in_salary_perentage decimal(18,2),
annual_salary decimal(18,2)
);
**Test Data is following: **
insert into test_table values ( 1,2016, 0 , 100);
insert into test_table values ( 1,2017, 10, 100);
insert into test_table values ( 1,2018, 10, 100);
insert into test_table values ( 1,2019, 0, 100);
insert into test_table values ( 1,2020, 10, 100);
insert into test_table values ( 2,2016, 10 , 100);
insert into test_table values ( 2,2017, 10, 100);
insert into test_table values ( 2,2018, 0, 100);
insert into test_table values ( 2,2019, 0, 100);
insert into test_table values ( 2,2020, 0, 100);
I am trying to achieve following output:
The cumulative salary should include the running total of annual salary over years for each employee.
There is a percentage of raise every year, so if current year has a raise the cumulative salary will be sum of previous salaries plus the amount received in raise.
I tried to achieve it using following SQL, but results does seems right. Will be thankful for solution.
SELECT *
,sum(annual_salary) OVER (
PARTITION BY employee_id ORDER BY salary_year ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW
) AS cummulative_salary
,(
sum(annual_salary) OVER (
PARTITION BY employee_id ORDER BY salary_year ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW
)
) + (
sum(annual_salary) OVER (
PARTITION BY employee_id ORDER BY salary_year ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW
)
) * (
sum(raise_in_salary_perentage) OVER (
PARTITION BY employee_id ORDER BY salary_year ROWS BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW
) / 100
) AS csalary
FROM test_table;
Based on your description, the increase in salary should be cumulative. However, a given year's increase should not affect previous years.
That is not what your desired results show. Based on my interpretation, I think you want:
with recursive cte as (
select employee_id, salary_year, (t.annual_salary * (1 + raise_in_salary_perentage / 100.0))::numeric(18, 2) as annual_salary,
raise_in_salary_perentage,
(t.annual_salary * (1 + raise_in_salary_perentage / 100.0))::numeric(18, 2) as total
from test_table t
where salary_year = 2016
union all
select t.employee_id, t.salary_year, (cte.annual_salary * (1 + t.raise_in_salary_perentage / 100.0))::numeric(18, 2),
t.raise_in_salary_perentage,
(cte.total + cte.annual_salary * (1 + t.raise_in_salary_perentage / 100.0))::numeric(18, 2)
from cte join
test_table t
on t.employee_id = cte.employee_id and t.salary_year = cte.salary_year + 1
)
select *
from cte
order by employee_id, salary_year;
Here is a db<>fiddle.

SQL: How to group in 30 second intervals and mark the max time in that grouping?

I have the following table:
CREATE TABLE #times
(
num int,
atime datetime
)
INSERT #times VALUES (1, '8/27/2015 1:10:00');
INSERT #times VALUES (1, '8/27/2015 1:10:15');
INSERT #times VALUES (1, '8/27/2015 1:10:28' );
INSERT #times VALUES (2, '7/3/2018 2:20:50' );
INSERT #times VALUES (2, '7/3/2018 2:21:05' );
INSERT #times VALUES (2, '7/3/2018 2:21:10' );
INSERT #times VALUES (2, '7/3/2018 2:30:55' );
INSERT #times VALUES (3, '1/1/2018 10:20:25');
INSERT #times VALUES (4, '1/1/2018 10:20:05');
INSERT #times VALUES (5, '9/15/2015 2:20:55');
I would like to group by num and atime within a 30 second interval, then mark the max time with a 0 and the other times in the grouping with a 1.
So the result dataset would be this:
1 '8/27/2015 1:10:00' 1
1 '8/27/2015 1:10:15' 1
1 '8/27/2015 1:10:28' 0 <<this is the max time of the grouping within num and 30 secs
2 '7/3/2018 2:20:50' 1
2 '7/3/2018 2:21:05' 1
2 '7/3/2018 2:21:10' 0 <<this is the max time of the grouping within num and 30 secs
2 '7/3/2018 2:30:55' 0
3 '1/1/2018 10:20:25' 0
4 '1/1/2018 10:20:05' 0
5 '9/15/2015 2:20:55' 0
I find the other answers much too complicated. Basically, you want to mark rows where the next time is more than 30 seconds away.
This is easy using lead():
select t.*,
(case when lead(atime) over (partition by num order by atime) < dateadd(second, 30, atime)
then 1
else 0
end) as flag
from #times t;
Here is a SQL Fiddle.
Try this to select the required rows
SELECT DISTINCT num,
MAX(atime) OVER(PARTITION BY num) AS maxAtime
FROM #times
WHERE DATEPART(SECOND, atime) <= 30
Or this to mark the rows
SELECT num, atime, IIF(atime = sub.maxAtime, 1, 0) AS flagged
FROM (
SELECT num,
atime,
MAX(atime) OVER(PARTITION BY num) AS maxAtime
FROM #times
WHERE DATEPART(SECOND, atime) <= 30
) AS sub;
Not sure I've understood 100%, but have a look at this, see if it helps.
WITH cte
AS (SELECT *
, CAST(atime AS DATE) D
, DATEPART(HOUR, atime) H
, DATEPART(MINUTE, atime) M
, CASE WHEN DATEPART(SECOND, atime) < 30 THEN 0
ELSE 30
END AS S
FROM #times
),
cte2
AS (SELECT *
, ROW_NUMBER() OVER (PARTITION BY num, D, H, M, S ORDER BY atime DESC) RN
FROM cte
)
SELECT cte2.num
, cte2.atime
, CASE cte2.RN WHEN 1 THEN 0 ELSE 1 END
FROM cte2
ORDER BY atime
Here you go:
;with stats1(num,mintime) as(
select num,min(atime) mintime
from #times t1
group by num
),
stats2(num,maxtime) as(
select s1.num,max(t.atime) maxtime
from stats1 s1
inner join #times t on s1.num=t.num
where t.atime<=dateadd(second,30,s1.mintime)
group by s1.num
)
select t.num, t.atime, case when t.atime>=s2.maxtime then 0 else 1 end
from stats2 s2
inner join #times t on s2.num=t.num

value substitution/replacement in a string

I have a string x-y+z. The values for x, y and z will be stored in a table. Say
x 10
y 15
z 20
This string needs to be changed like 10-15+20.
Anyway I can achieve this using plsql or sql?
using simple Pivot we can do
DECLARE #Table1 TABLE
( name varchar(1), amount int )
;
INSERT INTO #Table1
( name , amount )
VALUES
('x', 10),
('y', 15),
('Z', 25);
Script
Select CAST([X] AS VARCHAR) +'-'+CAST([Y] AS VARCHAR)+'+'+CAST([Z] AS VARCHAR) from (
select * from #Table1)T
PIVOT (MAX(amount) FOR name in ([X],[y],[z]))p
An approach could be the following, assuming a table like this:
create table stringToNumbers(str varchar2(16), num number);
insert into stringToNumbers values ('x', 10);
insert into stringToNumbers values ('y', 20);
insert into stringToNumbers values ('zz', 30);
insert into stringToNumbers values ('w', 40);
First tokenize your input string with something like this:
SQL> with test as (select 'x+y-zz+w' as string from dual)
2 SELECT 'operand' as typ, level as lev, regexp_substr(string, '[+-]+', 1, level) as token
3 FROM test
4 CONNECT BY regexp_instr(string, '[a-z]+', 1, level+1) > 0
5 UNION ALL
6 SELECT 'value', level, regexp_substr(string, '[^+-]+', 1, level) as token
7 FROM test
8 CONNECT BY regexp_instr(string, '[+-]', 1, level - 1) > 0
9 order by lev asc, typ desc;
TYP LEV TOKEN
------- ---------- --------------------------------
value 1 x
operand 1 +
value 2 y
operand 2 -
value 3 zz
operand 3 +
value 4 w
In the example I used lowercase literals and only +/- signs; you can easily edit it to handle something more complex; also, I assume the input string is well-formed.
Then you can join your decoding table to the tokenized string, building the concatenation:
SQL> select listagg(nvl(to_char(num), token)) within group (order by lev asc, typ desc)
2 from (
3 with test as (select 'x+y-zz+w' as string from dual)
4 SELECT 'operand' as typ, level as lev, regexp_substr(string, '[+-]+', 1, level) as token
5 FROM test
6 CONNECT BY regexp_instr(string, '[a-z]+', 1, level+1) > 0
7 UNION ALL
8 SELECT 'value', level, regexp_substr(string, '[^+-]+', 1, level) as token
9 FROM test
10 CONNECT BY regexp_instr(string, '[+-]', 1, level - 1) > 0
11 order by lev asc, typ desc
12 ) tokens
13 LEFT OUTER JOIN stringToNumbers on (str = token);
LISTAGG(NVL(TO_CHAR(NUM),TOKEN))WITHINGROUP(ORDERBYLEVASC,TYPDESC)
--------------------------------------------------------------------------------
10+20-30+40
This assumes that every literal in you input string has a corrensponding value in table. You can even handle the case of strings with no corrensponding number, for example assigning 0:
SQL> select listagg(
2 case
3 when typ = 'operand' then token
4 else to_char(nvl(num, 0))
5 end
6 ) within group (order by lev asc, typ desc)
7 from (
8 with test as (select 'x+y-zz+w-UNKNOWN' as string from dual)
9 SELECT
.. ...
16 ) tokens
17 LEFT OUTER JOIN stringToNumbers on (str = token);
LISTAGG(CASEWHENTYP='OPERAND'THENTOKENELSETO_CHAR(NVL(NUM,0))END)WITHINGROUP(ORD
--------------------------------------------------------------------------------
10+20-30+40-0
Create a function like this:
create table ttt1
( name varchar(1), amount int )
;
INSERT INTO ttt1 VALUES ('x', 10);
INSERT INTO ttt1 VALUES ('y', 15);
INSERT INTO ttt1 VALUES ('z', 25);
CREATE OR REPLACE FUNCTION replace_vars (in_formula VARCHAR2)
RETURN VARCHAR2
IS
f VARCHAR2 (2000) := UPPER (in_formula);
BEGIN
FOR c1 IN ( SELECT UPPER (name) name, amount
FROM ttt1
ORDER BY name DESC)
LOOP
f := REPLACE (f, c1.name, c1.amount);
END LOOP;
return f;
END;
select replace_vars('x-y+z') from dual
Here's another way to approach the problem that attempts to do it all in SQL. While not necessarily the most flexible or fastest, maybe you can get some ideas from another way to approach the problem. It also shows a way to execute the final formula to get the answer. See the comments below.
Assumes all variables are present in the variable table.
-- First build the table that holds the values. You won't need to do
-- this if you already have them in a table.
with val_tbl(x, y, z) as (
select '10', '15', '20' from dual
),
-- Table to hold the formula.
formula_tbl(formula) as (
select 'x-y+z' from dual
),
-- This table is built from a query that reads the formula a character at a time.
-- When a variable is found using the case statement, it is queried in the value
-- table and it's value is returned. Otherwise the operator is returned. This
-- results in a row for each character in the formula.
new_formula_tbl(id, new_formula) as (
select level, case regexp_substr(formula, '(.|$)', 1, level, NULL, 1)
when 'x' then
(select x from val_tbl)
when 'y' then
(select y from val_tbl)
when 'z' then
(select z from val_tbl)
else regexp_substr(formula, '(.|$)', 1, level, NULL, 1)
end
from formula_tbl
connect by level <= regexp_count(formula, '.')
)
-- select id, new_formula from new_formula_tbl;
-- This puts the rows back into a single string. Order by id (level) to keep operands
-- and operators in the right order.
select listagg(new_formula) within group (order by id) formula
from new_formula_tbl;
FORMULA
----------
10-15+20
Additionally you can get the result of the formula by passing the listagg() call to the following xmlquery() function:
select xmlquery(replace( listagg(new_formula) within group (order by id), '/', ' div ')
returning content).getNumberVal() as result
from new_formula_tbl;
RESULT
----------
15

Group close numbers

I have a table with 2 columns of integers. The first column represents start index and the second column represents end index.
START END
1 8
9 13
14 20
20 25
30 42
42 49
60 67
Simple So far. What I would like to do is group all the records that follow together:
START END
1 25
30 49
60 67
A record can follow by Starting on the same index as the previous end index or by a margin of 1:
START END
1 10
10 20
And
START END
1 10
11 20
will both result in
START END
1 20
I'm using SQL Server 2008 R2.
Any help would be Great
This works for your example, let me know if it doesn't work for other data
create table #Range
(
[Start] INT,
[End] INT
)
insert into #Range ([Start], [End]) Values (1, 8)
insert into #Range ([Start], [End]) Values (9, 13)
insert into #Range ([Start], [End]) Values (14, 20)
insert into #Range ([Start], [End]) Values (20, 25)
insert into #Range ([Start], [End]) Values (30, 42)
insert into #Range ([Start], [End]) Values (42, 49)
insert into #Range ([Start], [End]) Values (60, 67)
;with RangeTable as
(select
t1.[Start],
t1.[End],
row_number() over (order by t1.[Start]) as [Index]
from
#Range t1
where t1.Start not in (select
[End]
from
#Range
Union
select
[End] + 1
from
#Range
)
)
select
t1.[Start],
case
when t2.[Start] is null then
(select max([End])
from #Range)
else
(select max([End])
from #Range
where t2.[Start] > [End])
end as [End]
from
RangeTable t1
left join
RangeTable t2
on
t1.[Index] = t2.[Index]-1
drop table #Range;
Edited to include another version which i think is a bit more reliable, and also works with overlapping ranges
CREATE TABLE #data (start_range INT, end_range INT)
INSERT INTO #data VALUES (1,8)
INSERT INTO #data VALUES (2,15)
INSERT INTO #data VALUES (9,13)
INSERT INTO #data VALUES (14,20)
INSERT INTO #data VALUES (13,26)
INSERT INTO #data VALUES (12,21)
INSERT INTO #data VALUES (9,25)
INSERT INTO #data VALUES (20,25)
INSERT INTO #data VALUES (30,42)
INSERT INTO #data VALUES (42,49)
INSERT INTO #data VALUES (60,67)
;with ranges as
(
SELECT start_range as level
,end_range as end_range
,row_number() OVER (PARTITION BY (SELECT NULL) ORDER BY start_range) as row
FROM #data
UNION ALL
SELECT
level + 1 as level
,end_range as end_range
,row
From ranges
WHERE level < end_range
)
,ranges2 AS
(
SELECT DISTINCT
level
FROM ranges
)
,ranges3 AS
(
SELECT
level
,row_number() OVER (ORDER BY level) - level as grouping_group
from ranges2
)
SELECT
MIN(level) as start_number
,MAX(level) as end_number
FROM ranges3
GROUP BY grouping_group
ORDER BY start_number ASC
I think this should work - might not be especially efficient on larger sets though...
CREATE TABLE #data (start_range INT, end_range INT)
INSERT INTO #data VALUES (1,8)
INSERT INTO #data VALUES (2,15)
INSERT INTO #data VALUES (9,13)
INSERT INTO #data VALUES (14,20)
INSERT INTO #data VALUES (21,25)
INSERT INTO #data VALUES (30,42)
INSERT INTO #data VALUES (42,49)
INSERT INTO #data VALUES (60,67)
;with overlaps as
(
select *
,end_range - start_range as range
,row_number() OVER (PARTITION BY (SELECT NULL) ORDER BY start_range ASC) as line_number
from #data
)
,overlaps2 AS
(
SELECT
O1.start_range
,O1.end_range
,O1.line_number
,O1.range
,O2.start_range as next_range
,CASE WHEN O2.start_range - O1.end_range < 2 THEN 1 ELSE 0 END as overlap
,O1.line_number - DENSE_RANK() OVER (PARTITION BY (CASE WHEN O2.start_range - O1.end_range < 2 THEN 1 ELSE 0 END) ORDER BY O1.line_number ASC) as overlap_group
FROM overlaps O1
LEFT OUTER JOIN overlaps O2 on O2.line_number = O1.line_number + 1
)
SELECT
MIN(start_range) as range_start
,MAX(end_range) as range_end
,MAX(end_range) - MIN(start_range) as range_span
FROM overlaps2
GROUP BY overlap_group
You could use a number table to solve this problem. Basically, you first expand the ranges, then combine subsequent items in groups.
Here's one implementation:
WITH data (START, [END]) AS (
SELECT 1, 8 UNION ALL
SELECT 9, 13 UNION ALL
SELECT 14, 20 UNION ALL
SELECT 20, 25 UNION ALL
SELECT 30, 42 UNION ALL
SELECT 42, 49 UNION ALL
SELECT 60, 67
),
expanded AS (
SELECT DISTINCT
N = d.START + v.number
FROM data d
INNER JOIN master..spt_values v ON v.number BETWEEN 0 AND d.[END] - d.START
WHERE v.type = 'P'
),
marked AS (
SELECT
N,
SeqID = N - ROW_NUMBER() OVER (ORDER BY N)
FROM expanded
)
SELECT
START = MIN(N),
[END] = MAX(N)
FROM marked
GROUP BY SeqID
This solution uses master..spt_values as a number table, for expanding the initial ranges. But if (all or some of) those ranges may span more than 2048 (subsequent) values, then you should define and use your own number table.

SQL Query for Grouping the results based on sequence

I have a table like this:
ID Seq Amt
1 1 500
1 2 500
1 3 500
1 5 500
2 10 600
2 11 600
3 1 700
3 3 700
I want to group the continuous sequence numbers into a single row like this:
ID Start End TotalAmt
1 1 3 1500
1 5 5 500
2 10 11 1200
3 1 1 700
3 3 3 700
Please help to achieve this result.
WITH numbered AS (
SELECT
ID, Seq, Amt,
SeqGroup = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Seq) - Seq
FROM atable
)
SELECT
ID,
Start = MIN(Seq),
[End] = MAX(Seq),
TotalAmt = SUM(Amt)
FROM numbered
GROUP BY ID, SeqGroup
ORDER BY ID, Start
;
Well, there's perhaps a more elegant way to do this (something hints at me that there is), but here's an approach that will work if you're using a version of SQL Server that accepts common table expressions:
use Tempdb
go
create table [Test]
(
[id] int not null,
[Seq] int not null,
[Amt] int not null
)
insert into [Test] values
(1, 1, 500),
(1, 2, 500),
(1, 3, 500),
(1, 5, 500),
(2, 10, 600),
(2, 11, 600),
(3, 1, 700),
(3, 3, 700)
;with
lower_bound as (
select *
from Test
where not exists (
select *
from Test as t1
where t1.id = Test.id and t1.Seq = Test.Seq - 1
)
),
upper_bound as (
select *
from Test
where not exists (
select *
from Test as t1
where t1.id = Test.id and t1.Seq = Test.Seq + 1
)
),
bounds as (
select id, (select MAX(seq) from lower_bound where lower_bound.id = upper_bound.id and lower_bound.Seq <= upper_bound.Seq) as LBound, Seq as Ubound
from upper_bound
)
select Test.id, LBound As [Start], UBound As [End], SUM(Amt) As TotalAmt
from Test
join bounds
on Test.id = bounds.id
and Test.Seq between bounds.LBound and bounds.Ubound
group by Test.id, LBound, UBound
drop table [Test]
This seems to work nicely. #breakingRows will contain all rows that break the sequence of id and seq (i.e. if id changes or if seq is not 1 more than the previous seq). With that table you can select all rows of such a sequence within #temp. I must add however that performance will probably be not all that good because of all the subqueries but you'll need to test to be sure.
declare #temp table (id int, seq int, amt int)
insert into #temp select 1, 1, 500
insert into #temp select 1, 2, 500
insert into #temp select 1, 3, 500
insert into #temp select 1, 5, 500
insert into #temp select 2, 10, 600
insert into #temp select 2, 11, 600
insert into #temp select 3, 1, 700
insert into #temp select 3, 3, 700
declare #breakingRows table (ctr int identity(1,1), id int, seq int)
insert into #breakingRows(id, seq)
select id, seq
from #temp t1
where not exists
(select 1 from #temp t2 where t1.id = t2.id and t1.seq - 1 = t2.seq)
order by id, seq
select br.id, br.seq as start,
isnull ((select top 1 seq from #temp t2
where id < (select id from #breakingRows br2 where br.ctr = br2.ctr - 1) or
(id = (select id from #breakingRows br2 where br.ctr = br2.ctr - 1) and
seq < (select seq from #breakingRows br2 where br.ctr = br2.ctr - 1))
order by id desc, seq desc),
br.seq)
as [end],
(select SUM(amt) from #temp t1 where t1.id = br.id and
t1.seq <
isnull((select seq from #breakingRows br2 where br.ctr = br2.ctr - 1 and br.id = br2.id),
(select max(seq) + 1 from #temp)) and
t1.seq >= br.seq)
from #breakingRows br
order by id, seq
Since Andriy has already posted the gold solution, here's my take using an UPDATE statement to get the result from a temp table, just for fun.
declare #tmp table (
id int, seq int, amt money, start int, this int, total money,
primary key clustered(id, seq))
;
insert #tmp
select *, start=seq, this=seq, total=convert(money,amt)
from btable
;
declare #id int, #seq int, #start int, #amt money
update #tmp
set
#amt = total = case when id = #id and seq = #seq+1 then #amt+total else amt end,
#start = start = case when id = #id and seq = #seq+1 then #start else seq end,
#seq = this = seq,
#id = id = id
from #tmp
option (maxdop 1)
;
select id, start, max(this) [end], max(total) total
from #tmp
group by id, start
order by id, start
Notes:
btable: your table name
id int, seq int, amt money: expected columns in your table
Try following query.
select id, min(seq), max(seq), sum(amt) from table group by id
OOps, sorry, it is wrong query as you need sequence
SELECT Id, MIN(Seq) as Start, MAX(Seq) as End, SUM(Amount) as Total
FROM (
SELECT t.*, Seq - ROW_NUMBER() OVER (PARTITION BY Id ORDER BY Seq) Rn
FROM [Table] t
) as T
GROUP BY Id, Rn
ORDER BY Id, MIN(Seq)