Generate rows and insert into a table - sql

I've following table
CREATE TABLE public.af01
(
id integer NOT NULL DEFAULT nextval('af01_id_seq'::regclass),
idate timestamp without time zone,
region text,
city text,
vtype text,
vmake text,
vmodel text,
vregno text,
intime time without time zone,
otime time without time zone,
vstatus boolean,
remarks text,
vowner text
);
I need to add data into it.This data should be for 1 Year, (data from 01-01-2016 to 31-12-2016). in a single date can have 5 entries,
Region column must have 3 values (Central,Western,Eastern),
City column must have 3 values(City1,City2,City3)
vtype column is the Vehicle type for example Heavy,light,Other.
vmake column is the manufacturer Audi,Nissan,Toyota,Hyundai,GMC etc.
vregno this column is for vechicle registration number and it should be unique (Ex.reg no CFB 4587).
intime any random time in day('10:15 AM').
otime this column should be intime+ 5 or 10 or 15 or 20.vstatus column should have True or false.
I've ended up with this select query to generate date rows
select '2013-01-01'::date + (n || ' days')::interval days
from generate_series(0, 365) n;
and
to generate first part of the Vehicle regno.
SELECT substring(string_agg (substr('ABCDEFGHIJKLMNOPQRSTUVWXYZ', ceil (random() * 62)::integer, 1), ''),1,3) t
FROM generate_series(0,45);
expected output;
id idate region city vtype vmake vmodel vregno intime otime vstatus remarks vowner
-- ---------- ------- ----- -------------- ------ ------ -------- ------------------- ------------------- ------- ------- ------
1 2016-01-01 Central City1 Heavy Vechicle Nissan Model1 NGV 4578 12:15:00 12:30:00 1 NULL Tom
2 2016-01-01 Western City1 Light Audi S3 BFR 4587 10:20:00 10:40:00 1 NULL Jerry

r_dates relation is just simple way to generate dates in ranges.
other_const and max_const are arrays and its length respectively for population. region[(random() * region_max)::int2 + 1] - choose element in array by random
INSERT INTO af01 (idate, region, city, vtype, vmake, vregno, intime, otime, vstatus)
SELECT cd, r, c, v, vm, rn, intime, intime + len as otime, status
FROM (
WITH r_dates AS (
SELECT generate_series('2013-01-01'::date, '2013-12-31'::date, '1 day'::interval) as cd
), other_const AS (
SELECT '{Central,Western,Eastern}'::text[] AS region,
'{City1,City2,City3}'::text[] as cities,
'{Heavy,light,Other}'::text[] as vehicles,
'{Audi,Nissan,Toyota,Hyundai,GMC}'::text[] as vmakes,
'{5,10,15,20}'::int4[] AS lengths,
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'::text AS regnosrc
), max_const AS (
SELECT array_upper(region, 1) - 1 AS region_max,
array_upper(cities, 1) - 1 AS cities_max,
array_upper(vehicles, 1) - 1 AS vehicles_max,
array_upper(vmakes, 1) - 1 AS vmakes_max,
array_upper(lengths, 1) - 1 AS lengths_max
FROM other_const
)
SELECT cd,
region[(random() * region_max)::int2 + 1] AS r,
cities[(random() * cities_max)::int2 + 1] AS c,
vehicles[(random() * vehicles_max)::int2 + 1] AS v,
vmakes[(random() * vmakes_max)::int2 + 1] AS vm,
(
SELECT string_agg(s, '')
FROM (
SELECT substr(regnosrc, (random() * (length(regnosrc) - 1))::int4 + 1, 1) AS s
FROM generate_series(1, 3)
) AS a
)
|| lpad(((random() * 9999)::int8)::text, 4, '0') AS rn,
'00:00:00'::time + (((random() * 24 * 60)::int8)::text || 'min')::interval AS intime,
((lengths[(random() * lengths_max)::int2 + 1])::text || 'min')::interval AS len,
random() > 0.5 AS status
FROM r_dates, other_const, max_const, generate_series(1, 5)
) AS A

Related

SQLite query to find datetime difference between multiple rows

Here are my two tables' structures in SQLite
CREATE TABLE user
(
id integer PRIMARY KEY,
name TEXT
);
CREATE TABLE attendanceTable
(
id Integer,
mydate datetime,
startJob boolean
);
if startJob is 1 it implies that the employee is starting the job and if startJob is 0 it means employee is stopping the job.
attendanceTable is sorted by mydate column
I want output as worked hour by individual employees.
Input of query can be two different dates e.g. 2021-08-20 and 2021-08-22
From which I want to know "How much each person has worked?"
Output should be:
[id, name, userWorkedTime]
[1, Alice, 09:00]
[2, Bob, 07:00]
12:00 to 16:00 + 22:00 to 03:00 = 9 hours
13:00 to 17:00 + 12:00 to 15:00 = 7 hours
Input of query 2021-08-20 and 2021-08-21 - output should be:
[id, name, userWorkedTime]
[1, Alice, 09:00]
[2, Bob, 04:00]
12:00 to 16:00 + 22:00 to 03:00 = 9 hours
13:00 to 17:00 = 4 hours
It may possible that Alice starts her job at 11 PM and end her job at 3 AM on next day[So working hour would be 4 hours]
I believe that the following will accomplish the results you desire:-
WITH
/* The date selection parameters - change as necessary */
cte_selection(selection_start,selection_end) AS (SELECT '2020-08-20','2020-08-22'),
/* Extract data per shift - aka combine start and end
note that extract is 1 day befor and 1 day after actual selection criteria
as previous/subsequent days may be relevant
*/
cte_part1(userid,name,periodstart,periodend,duration) AS
(
SELECT
user.id,
name,
strftime('%s',mydate),
strftime('%s',
(
SELECT mydate
FROM attendancetable
WHERE id = at.id
AND NOT startjob
AND mydate > at.mydate
ORDER BY mydate ASC
LIMIT 1
)
) AS endjob,
(strftime('%s',
(
SELECT mydate
FROM attendancetable
WHERE id = at.id
AND NOT startjob
AND mydate > at.mydate
ORDER BY mydate ASC
LIMIT 1
)
) - strftime('%s',at.mydate)) AS duration
FROM attendancetable AS at
JOIN user ON at.id = user.id
WHERE startjob
AND mydate
BETWEEN date
(
(SELECT selection_start FROM cte_selection)
,'-1 day'
)
AND date
(
(SELECT selection_end FROM cte_selection)
,'+1 day'
)
),
/* split times if period crosses a day*/
cte_part2(userid,name,periodstart,startdate,periodend,enddate,duration,startday_duration,nextday_duration) AS
(
SELECT
userid,
name,
periodstart,
date(periodstart,'unixepoch') AS startdate,
periodend,
date(periodend,'unixepoch') AS enddate,
duration,
CASE
WHEN date(periodstart,'unixepoch') = date(periodend,'unixepoch') THEN duration
ELSE strftime('%s',date(periodstart,'unixepoch')||'24:00:00') - periodstart
END AS startday_duration,
CASE
WHEN date(periodstart,'unixepoch') = date(periodend,'unixepoch') THEN 0
ELSE periodend - strftime('%s',date(periodend,'unixepoch')||'00:00:00')
END AS nextday_duration
FROM cte_part1
),
/* generate new rows for following days */
cte_part3(userid,name,periodstart,startdate,periodend,enddate,duration,startday_duration,nextday_duration) AS
(
SELECT
userid,
name,
strftime('%s',date(periodend,'unixepoch')||'00:00:00'),
date(periodend,'unixepoch'),
periodend,
enddate,
nextday_duration,
nextday_duration,
0
FROM cte_part2
WHERE nextday_duration
),
/* combine both sets */
cte_part4 AS (SELECT * FROM cte_part2 UNION ALL SELECT * FROM cte_part3)
/* Group the final data */
SELECT *,time(sum(startday_duration),'unixepoch') AS time_worked
FROM cte_part4
WHERE startdate BETWEEN (SELECT selection_start FROM cte_selection) AND (SELECT selection_end FROM cte_selection) GROUP BY userid
;
e.g. :-
and :-
Note All results with the exception of the time_worked are arbitrary values from the underlying data. However, userid and name will be correct as they would be the same for each group. The other values will be a value from the group.
you can easily apply changes to the final query to include or exclude columns.
The full testing SQL being :-
DROP TABLE IF EXISTS user;
CREATE TABLE IF NOT EXISTS user (id integer PRIMARY KEY,name TEXT);
DROP TABLE IF EXISTS attendanceTable ;
CREATE TABLE attendanceTable(id Integer,mydate datetime,startJob boolean);
INSERT INTO user VALUES (1,'Alice'),(2,'Bob');
INSERT INTO attendanceTable VALUES
(1,'2020-08-20 12:00:00',1),
(2,'2020-08-20 13:00:00',1),
(1,'2020-08-20 16:00:00',0),
(2,'2020-08-20 17:00:00',0),
(1,'2020-08-20 22:00:00',1),
(1,'2020-08-21 03:00:00',0),
(2,'2020-08-22 12:00:00',1),
(2,'2020-08-22 15:00:00',0)
;
WITH
/* The date selection parameters - change as necessary */
cte_selection(selection_start,selection_end) AS (SELECT '2020-08-20','2020-08-22'),
/* Extract data per shift - aka combine start and end
note that extract is 1 day befor and 1 day after actual selection criteria
as previous/subsequent days may be relevant
*/
cte_part1(userid,name,periodstart,periodend,duration) AS
(
SELECT
user.id,
name,
strftime('%s',mydate),
strftime('%s',
(
SELECT mydate
FROM attendancetable
WHERE id = at.id
AND NOT startjob
AND mydate > at.mydate
ORDER BY mydate ASC
LIMIT 1
)
) AS endjob,
(strftime('%s',
(
SELECT mydate
FROM attendancetable
WHERE id = at.id
AND NOT startjob
AND mydate > at.mydate
ORDER BY mydate ASC
LIMIT 1
)
) - strftime('%s',at.mydate)) AS duration
FROM attendancetable AS at
JOIN user ON at.id = user.id
WHERE startjob
AND mydate
BETWEEN date
(
(SELECT selection_start FROM cte_selection)
,'-1 day'
)
AND date
(
(SELECT selection_end FROM cte_selection)
,'+1 day'
)
),
/* split times if period crosses a day*/
cte_part2(userid,name,periodstart,startdate,periodend,enddate,duration,startday_duration,nextday_duration) AS
(
SELECT
userid,
name,
periodstart,
date(periodstart,'unixepoch') AS startdate,
periodend,
date(periodend,'unixepoch') AS enddate,
duration,
CASE
WHEN date(periodstart,'unixepoch') = date(periodend,'unixepoch') THEN duration
ELSE strftime('%s',date(periodstart,'unixepoch')||'24:00:00') - periodstart
END AS startday_duration,
CASE
WHEN date(periodstart,'unixepoch') = date(periodend,'unixepoch') THEN 0
ELSE periodend - strftime('%s',date(periodend,'unixepoch')||'00:00:00')
END AS nextday_duration
FROM cte_part1
),
/* generate new rows for following days */
cte_part3(userid,name,periodstart,startdate,periodend,enddate,duration,startday_duration,nextday_duration) AS
(
SELECT
userid,
name,
strftime('%s',date(periodend,'unixepoch')||'00:00:00'),
date(periodend,'unixepoch'),
periodend,
enddate,
nextday_duration,
nextday_duration,
0
FROM cte_part2
WHERE nextday_duration
),
/* combine both sets */
cte_part4 AS (SELECT * FROM cte_part2 UNION ALL SELECT * FROM cte_part3)
/* Group the final data */
SELECT *,time(sum(startday_duration),'unixepoch') AS time_worked
FROM cte_part4
WHERE startdate BETWEEN (SELECT selection_start FROM cte_selection) AND (SELECT selection_end FROM cte_selection) GROUP BY userid
;
DROP TABLE IF EXISTS user;
DROP TABLE IF EXISTS attendanceTable ;

Why I get error Invalid number ORA-01722:

I am a little bit confusing and have no idea how to solve this problem. I have column in table Quantity which store Time value.
I want to create a following story. If I have for example
Quantity
8:00
8:00
It needs to SUM() this two and in output I need to get 16 HOURS
Second think, it needs to take last two number :00 and add to HOURS.
This is what I do so far
SELECT
(SUM(SUBSTR(A.Quantity, ':', 1)) + TRUNC((SUM(SUBSTR(A.Quantity, ':', -1)) / 60),0)), ':' ,
MOD(SUM(SUBSTR(A.Quantity, ':' , -1)), 60)
AS TOTAL_SUM FROM (
SELECT
ata.ATAID AS AtaId, ata.ProjectID, ata.StartDate, ataAW.Quantity
FROM
ata
INNER JOIN
weekly_report
ON
weekly_report.ataId = ata.ATAID
INNER JOIN
ata_articles ataAW
ON
ataAW.wrId = weekly_report.id
WHERE
ata.ATAID = 10987
AND
ataAW.type = 1
OR
ataAW.type = 2
OR
ataAW.type = 3
AND
(weekly_report.status != 3 AND weekly_report.status != 4)
AND
(
weekly_report.year < (SELECT year FROM weekly_report WHERE id = 89)
OR
(
weekly_report.year <= (SELECT year FROM weekly_report WHERE id = 89)
AND
weekly_report.week <= (SELECT week FROM weekly_report WHERE id = 89)
)
)
) A
group by A.AtaId
So far better explanation, when I run first part of query I need to get something like
SELECT
CONCAT(
-- extract hours froAm time and add minutes converted to hours
(SUM(SUBSTRING_INDEX(aa.Quantity, ':', 1)) + TRUNCATE((SUM(SUBSTRING_INDEX(aa.Quantity, ':', -1)) / 60),0))
-- , ':',
-- extract minutes from time and find reminder (modulo)*/
-- LPAD((SUM(SUBSTRING_INDEX(aa.Quantity, ':', -1)) % 60), 2, 0)
) AS W_TOTAL_SUM
FROM
ata_articles aa
INNER JOIN
weekly_report wr
ON
aa.wrId = wr.id
WHERE
aa.wrId = 69
AND
aa.type = 1
TOTAL_SUM
16
And when I run second part
SELECT
CONCAT(
-- extract hours froAm time and add minutes converted to hours
-- (SUM(SUBSTRING_INDEX(aa.Quantity, ':', 1)) + TRUNCATE((SUM(SUBSTRING_INDEX(aa.Quantity, ':', -1)) / 60),0))
-- , ':',
-- extract minutes from time and find reminder (modulo)*/
LPAD((SUM(SUBSTRING_INDEX(aa.Quantity, ':', -1)) % 60), 2, 0)
) AS W_TOTAL_SUM
FROM
ata_articles aa
INNER JOIN
weekly_report wr
ON
aa.wrId = wr.id
WHERE
aa.wrId = 69
AND
aa.type = 1
I get output
TOTAL_SUM
00
Can someone guide me and tell me how to solve this issue since I try every solution but unfortunetlly doesn't work. And here is what I try so far, but I always get message
ORA-01722: invalid number
01722. 00000 - "invalid number"
*Cause: The specified number was invalid.
*Action: Specify a valid number
SELECT
(SUM(SUBSTR(A.Quantity, ':', 1)) + TRUNC((SUM(SUBSTR(A.Quantity, ':', -1)) / 60),0)), ':' ,
MOD(SUM(SUBSTR(A.Quantity, ':' , -1)), 60)
AS TOTAL_SUM FROM (
SELECT
ata.ATAID AS AtaId, ata.ProjectID, ata.StartDate, ataAW.Quantity
FROM
ata
INNER JOIN
weekly_report
ON
weekly_report.ataId = ata.ATAID
INNER JOIN
ata_articles ataAW
ON
ataAW.wrId = weekly_report.id
WHERE
ata.ATAID = 10987
AND
ataAW.type = 1
OR
ataAW.type = 2
OR
ataAW.type = 3
AND
(weekly_report.status != 3 AND weekly_report.status != 4)
AND
(
weekly_report.year < (SELECT year FROM weekly_report WHERE id = 89)
OR
(
weekly_report.year <= (SELECT year FROM weekly_report WHERE id = 89)
AND
weekly_report.week <= (SELECT week FROM weekly_report WHERE id = 89)
)
)
) A
group by A.AtaId
UPDATE
I get output error message
ORA-00907: missing right parenthesis
00907. 00000 - "missing right parenthesis"
*Cause:
*Action:
Error at Line: 267 Column: 19
SELECT ( EXTRACT( DAY FROM duration ) * 24 + EXTRACT( HOUR FROM duration ) )
|| ':'
|| TO_CHAR( EXTRACT( MINUTE FROM DURATION ), 'FM00' )
|| ' HOURS' AS duration
FROM (
SELECT NUMTODSINTERVAL(SUM( SUBSTR( quantity, 1, INSTR( quantity, ':' ) - 1 ) ),'HOUR')
+ NUMTODSINTERVAL(SUM( SUBSTR( quantity, INSTR( quantity, ':' ) + 1 ) ), 'MINUTE' )
AS duration
FROM (
SELECT ata.ATAID AS AtaId, ata.ProjectID, ata.StartDate, ataAW.Quantity
FROM ata
INNER JOIN weekly_report
ON weekly_report.ataId = ata.ATAID
INNER JOIN ata_articles ataAW
ON ataAW.wrId = weekly_report.id
INNER JOIN (SELECT week, year FROM weekly_report WHERE id = 89 ) b
ON ( weekly_report.year < b.year OR ( weekly_report.year = b.year AND weekly_report.week <= b.week ))
WHERE ata.ATAID = 10987
AND ataAW.type IN ( 1, 2, 3 )
AND weekly_report.status NOT IN ( 3, 4 )
))
group by A.AtaId
Here is what I get as output when I execute following code
DURATION
:HOURS
If you have the (slightly more complicated) sample data:
CREATE TABLE table_name ( Quantity ) AS
SELECT '8:00' FROM DUAL UNION ALL
SELECT '7:30' FROM DUAL UNION ALL
SELECT '0:30' FROM DUAL;
Then you can use string functions to get the hour and minute parts and sum those and then convert the totals to an interval (so you don't end up with 15:60 HOURS) and then format the output:
SELECT ( EXTRACT( DAY FROM duration ) * 24
+ EXTRACT( HOUR FROM duration )
)
|| ':'
|| TO_CHAR( EXTRACT( MINUTE FROM DURATION ), 'FM00' )
|| ' HOURS' AS duration
FROM (
SELECT NUMTODSINTERVAL(
SUM( SUBSTR( quantity, 1, INSTR( quantity, ':' ) - 1 ) ),
'HOUR'
)
+
NUMTODSINTERVAL(
SUM( SUBSTR( quantity, INSTR( quantity, ':' ) + 1 ) ),
'MINUTE'
) AS duration
FROM table_name
);
Which outputs:
| DURATION |
| :---------- |
| 16:00 HOURS |
db<>fiddle here

SQL Server Max() function

I have a column named as Quarter in which data is as below:
Quarter
--------
Q3-2017
Q2-2017
Q1-2017
Q4-2016
Q3-2016
Q2-2016
Q1-2016
Q1-2018
I want to find max() from above. How should I proceed.
When I tried with MAX() function it is giving me output as Q4-2017.
This is happening because it's giving you the max of the column which is in a string format. It's ordering it Alpha-numerically and that's the max value when you sort the data. If you want to order it as you expect, you need to use some string manipulation to break the values down for ordering.
CREATE TABLE #quarters
(
[quarter] NVARCHAR(10)
);
INSERT INTO #quarters ( quarter )
VALUES ( 'Q3-2017' ) ,
( 'Q2-2017' ) ,
( 'Q1-2017' ) ,
( 'Q4-2016' ) ,
( 'Q3-2016' ) ,
( 'Q2-2016' ) ,
( 'Q1-2016' ) ,
( 'Q1-2018' );
SELECT q.quarter Original ,
CAST(RIGHT(q.quarter, 4) AS INT) AS TheYear , -- extracts the year
CAST(SUBSTRING(q.quarter, 2, 1) AS INT) AS TheQuarter -- extracts the quarter
FROM #quarters AS q
ORDER BY CAST(RIGHT(q.quarter, 4) AS INT) DESC ,
CAST(SUBSTRING(q.quarter, 2, 1) AS INT) DESC;
DROP TABLE #quarters;
Produces:
Original TheYear TheQuarter
---------- ----------- -----------
Q1-2018 2018 1
Q3-2017 2017 3
Q2-2017 2017 2
Q1-2017 2017 1
Q4-2016 2016 4
Q3-2016 2016 3
Q2-2016 2016 2
Q1-2016 2016 1
The above solution would also work without the casting: CAST((XXX) AS INT), but it's safer to do that in case an unexpected value appears.
And to get the top value, use TOP:
SELECT TOP 1 q.quarter Original
FROM #quarters AS q
ORDER BY CAST(RIGHT(q.quarter, 4) AS INT) DESC ,
CAST(SUBSTRING(q.quarter, 2, 1) AS INT) DESC;
You can use query like :
SELECT quarter from QuarterTable where right(quarter,4) = (select max(right(quarter,4) from QuarterTable);
Split quarter field into year (numeric!) and quarter (string), and order by at your taste.
Get first row (top 1) to get only the max value:
SELECT TOP 1 quarter,
CAST(SUBSTRING(quarter, 4, 4) AS INTEGER) AS y,
SUBSTRING(quarter, 1, 2) AS q FROM quarter
ORDER BY y desc, q desc

SQL query to find all timestamps covered by an interval in A but not covered by an interval in B ("subtract" or "except" between multiple intervals)

I have multiple tables in a PostgreSQL 9.4 database, where each row contains an interval as two columns "start" (inclusive) and "stop" (exclusive).
Consider the following pseudo-code (the tables are more complicated).
CREATE TABLE left (
start TIMESTAMP,
stop TIMESTAMP,
[...]
);
CREATE TABLE right (
start TIMESTAMP,
stop TIMESTAMP,
[...]
);
The intervals are inclusive of the start, but exclusive of the stop.
I now need a query to find all possible intervals of time where there is a row in "left" covering the interval, but not simultaneously a row in "right" covering the same interval.
One interval in "left" can be cut up into any number of intervals in the result, be shortened, or be entirely absent. Consider the following graph, with time progressing from left to right:
left [-----row 1------------------) [--row 2--) [--row 3----)
right [--row1--) [--row2--) [--row3--)
result [----) [----) [-------) [-----------)
In this tiny example, "left" has tree rows each representing three intervals and "right" has three rows, each representing three other intervals.
The result has four rows of intervals, which together cover all possible timestamps where there is a row/interval in "left" covering that timestamp, but not a row/interval in "right" covering the same timestamp.
The tables are of course in reality very much larger than three rows each - in fact I will frequently be wanting to perform the algorithm between two subqueries that have the "start" and "stop" columns.
I have hit a dead end (multiple dead ends, in fact), and am on the virge of just fetching all records into memory and applying some procedural programming to the problem...
Any solutions or suggestions of what thinking to apply is greatly appreciated.
Change the types of columns to tsrange (or create an appropriate views):
CREATE TABLE leftr (
duration tsrange
);
CREATE TABLE rightr (
duration tsrange
);
insert into leftr values
('[2015-01-03, 2015-01-20)'),
('[2015-01-25, 2015-02-01)'),
('[2015-02-08, 2015-02-15)');
insert into rightr values
('[2015-01-01, 2015-01-06)'),
('[2015-01-10, 2015-01-15)'),
('[2015-01-18, 2015-01-26)');
The query:
select duration* gap result
from (
select tsrange(upper(duration), lower(lead(duration) over (order by duration))) gap
from rightr
) inv
join leftr
on duration && gap
result
-----------------------------------------------
["2015-01-06 00:00:00","2015-01-10 00:00:00")
["2015-01-15 00:00:00","2015-01-18 00:00:00")
["2015-01-26 00:00:00","2015-02-01 00:00:00")
["2015-02-08 00:00:00","2015-02-15 00:00:00")
(4 rows)
The idea:
l [-----row 1------------------) [--row 2--) [--row 3----)
r [--row1--) [--row2--) [--row3--)
inv(r) [----) [----) [------------------------->
l*inv(r) [----) [----) [-------) [-----------)
If the type change to tsrange is not an option, here an alternative solution using window function.
The important idea is to realize that only the start and end points of the intervals are relavent. In the first step a transformation in a sequence of starting and ending timestamps is performed. (I use numbers to simplify the example).
insert into t_left
select 1,4 from dual union all
select 6,9 from dual union all
select 12,13 from dual
;
insert into t_right
select 2,3 from dual union all
select 5,7 from dual union all
select 8,10 from dual union all
select 11,14 from dual
;
with event as (
select i_start tst, 1 left_change, 0 right_change from t_left union all
select i_stop tst, -1 left_change, 0 right_change from t_left union all
select i_start tst, 0 left_change, 1 right_change from t_right union all
select i_stop tst, 0 left_change, -1 right_change from t_right
)
select tst, left_change, right_change,
sum(left_change) over (order by tst) as is_left,
sum(right_change) over (order by tst) as is_right,
'['||tst||','||lead(tst) over (order by tst) ||')' intrvl
from event
order by tst;
This ends with a two recods for each interval one for start (+1) and one for end (-1 in the CHANGE column).
TST LEFT_CHANGE RIGHT_CHANGE IS_LEFT IS_RIGHT INTRVL
1 1 0 1 0 [1,2)
2 0 1 1 1 [2,3)
3 0 -1 1 0 [3,4)
4 -1 0 0 0 [4,5)
5 0 1 0 1 [5,6)
6 1 0 1 1 [6,7)
7 0 -1 1 0 [7,8)
8 0 1 1 1 [8,9)
9 -1 0 0 1 [9,10)
10 0 -1 0 0 [10,11)
11 0 1 0 1 [11,12)
12 1 0 1 1 [12,13)
13 -1 0 0 1 [13,14)
14 0 -1 0 0 [14,)
The window SUM finction
sum(left_change) over (order by tst)
adds all changes so far, yielding the 1 for beeing in interval and 0 beeing out of the interval.
The filter to get all (sub)intervals that are left only ist therefore trivial
is_left = 1 and is_right = 0
The (sub)interval start with the timstamp of the current row and ends with the timstamp of the next row.
Final notes:
You may need to add logik to ignore intervals of leghth 0
I'm testing in Oracle, so pls re-check the Postgres functionality
For completeness: the naive method, without using interval types.
[I used the same sample data as #klin ]
CREATE TABLE tleft (
start TIMESTAMP,
stop TIMESTAMP,
payload text
);
INSERT INTO tleft(start,stop) VALUES
-- ('2015-01-08', '2015-03-07'), ('2015-03-21', '2015-04-14'), ('2015-05-01', '2015-05-15') ;
('2015-01-03', '2015-01-20'), ('2015-01-25', '2015-02-01'), ('2015-02-08', '2015-02-15');
CREATE TABLE tright (
start TIMESTAMP,
stop TIMESTAMP,
payload text
);
INSERT INTO tright(start,stop) VALUES
-- ('2015-01-01', '2015-01-15'), ('2015-02-01', '2015-02-14'), ('2015-03-01', '2015-04-07') ;
('2015-01-01', '2015-01-06'), ('2015-01-10', '2015-01-15'), ('2015-01-18', '2015-01-26');
-- Combine all {start,stop} events into one time series
-- , encoding the event-type into a state change.
-- Note: this assumes non-overlapping intervals in both
-- left and right tables.
WITH zzz AS (
SELECT stamp, SUM(state) AS state
FROM (
SELECT 1 AS state, start AS stamp FROM tleft
UNION ALL
SELECT -1 AS state, stop AS stamp FROM tleft
UNION ALL
SELECT 2 AS state, start AS stamp FROM tright
UNION ALL
SELECT -2 AS state, stop AS stamp FROM tright
) zz
GROUP BY stamp
)
-- Reconstruct *all* (sub)intervals
-- , and calculate a "running sum" over the state variable
SELECT * FROM (
SELECT zzz.stamp AS zstart
, LEAD(zzz.stamp) OVER (www) AS zstop
, zzz.state
, row_number() OVER(www) AS rn
, SUM(state) OVER(www) AS sstate
FROM zzz
WINDOW www AS (ORDER BY stamp)
) sub
-- extract only the (starting) state we are interested in
WHERE sub.sstate = 1
ORDER BY sub.zstart
;
Result:
DROP SCHEMA
CREATE SCHEMA
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 3
zstart | zstop | state | rn | sstate
---------------------+---------------------+-------+----+--------
2015-01-06 00:00:00 | 2015-01-10 00:00:00 | -2 | 3 | 1
2015-01-15 00:00:00 | 2015-01-18 00:00:00 | -2 | 5 | 1
2015-01-26 00:00:00 | 2015-02-01 00:00:00 | -2 | 9 | 1
2015-02-08 00:00:00 | 2015-02-15 00:00:00 | 1 | 11 | 1
(4 rows)
If tsrange is not an option maybe stored procedure is?
Something like this:
--create tables
drop table if exists tdate1;
drop table if exists tdate2;
create table tdate1(start timestamp, stop timestamp);
create table tdate2(start timestamp, stop timestamp);
--populate tables
insert into tdate1(start, stop) values('2015-01-01 00:10', '2015-01-01 01:00');
insert into tdate2(start, stop) values('2015-01-01 00:00', '2015-01-01 00:20');
insert into tdate2(start, stop) values('2015-01-01 00:30', '2015-01-01 00:40');
insert into tdate2(start, stop) values('2015-01-01 00:50', '2015-01-01 01:20');
insert into tdate1(start, stop) values('2015-01-01 01:10', '2015-01-01 02:00');
insert into tdate1(start, stop) values('2015-01-01 02:10', '2015-01-01 03:00');
--stored procedure itself
create or replace function tdate_periods(out start timestamp, out stop timestamp)
returns setof record as
$$
declare
rec record;
laststart timestamp = null;
startdt timestamp = null;
stopdt timestamp = null;
begin
for rec in
select
t1.start as t1start,
t1.stop as t1stop,
t2.start as t2start,
t2.stop as t2stop
from tdate1 t1
left join tdate2 t2 on t2.stop > t1.start or t2.start > t1.stop
loop
if laststart <> rec.t1start or laststart is null then
if laststart is not null then
if startdt < stopdt then
start = startdt;
stop = stopdt;
return next;
startdt = stopdt;
end if;
end if;
startdt = rec.t1start;
stopdt = rec.t1stop;
laststart = startdt;
end if;
if rec.t2start is not null then
if startdt < rec.t2start then
start = startdt;
stop = rec.t2start;
return next;
end if;
startdt = rec.t2stop;
end if;
end loop;
if startdt is not null and startdt < stopdt then
start = startdt;
stop = stopdt;
return next;
end if;
end
$$ language plpgsql;
--call
select * from tdate_periods();

SQL Server creating two tables and comparing them

I have a table with 3 columns (in SQL Server 2012). One of the columns is a date column. What I would like to do is split the table for two specified dates and merge them into one table with an extra field. Hopefully the example below will explain.
Example of what I currently have.
Company date no_employees
ABC 2014-05-30 35
DEF 2014-05-30 322
GHI 2014-05-30 65
JKL 2014-05-30 8
MNO 2014-05-30 30
ABC 2014-01-01 33
DEF 2014-01-01 301
GHI 2014-01-01 70
MNO 2014-01-01 30
What I would like a query to return for me (not sure if its possible),
Company start date no_employees end date no_employees diff
ABC 33 35 2
DEF 301 322 21
GHI 70 65 -5
JKL 0 8 8
MNO 30 30 0
PIVOT (and COALESCE to generate the 0s) seems to do it:
declare #t table (Company char(3),[date] date,no_employees int)
insert into #t(Company,[date],no_employees) values
('ABC','2014-05-30',35 ),
('DEF','2014-05-30',322 ),
('GHI','2014-05-30',65 ),
('JKL','2014-05-30',8 ),
('MNO','2014-05-30',30 ),
('ABC','2014-01-01',33 ),
('DEF','2014-01-01',301 ),
('GHI','2014-01-01',70 ),
('MNO','2014-01-01',30 )
select Company,
COALESCE(start,0) as start,
COALESCE([end],0) as [end],
COALESCE([end],0)-COALESCE(start,0) as diff
from
(select
Company,
CASE WHEN [date]='20140530' THEN 'end'
ELSE 'start' END as period,
no_employees
from #t
where [date] in ('20140101','20140530')
) t
pivot (MAX(no_employees) for period in ([start],[end])) u
Result:
Company start end diff
------- ----------- ----------- -----------
ABC 33 35 2
DEF 301 322 21
GHI 70 65 -5
JKL 0 8 8
MNO 30 30 0
This could easily be parameterized for the specific start and end dates to use.
Also, at the moment I'm using MAX because we have to have an aggregate in PIVOT, even though here the sample data contains a maximum of one row. If there's a possibility of multiple rows existing for the start or end date, we'd need to know how you want that handled.
declare #lowdate date = '2014-01-01'
declare #highdate date = '2014-05-30'
;with x as
(
select company, min(no_employees) no_employees
from #t records
where recorddate = #lowdate
group by company
), y as
(
select company, max(no_employees) no_employees
from #t records
where recorddate = #highdate
group by company
)
select coalesce(x.company, y.company) company,
coalesce(x.no_employees, 0) start_no_employees,
coalesce(y.no_employees, 0) end_no_employees,
coalesce(y.no_employees, 0) - coalesce(x.no_employees, 0) diff
from
x full outer join y
on
x.company = y.company
Create Table #temp(Company varchar(10), CDate date,emp int)
Select T1.Company,T1.emp,T2.emp,(T1.emp-T2.emp) Diff
from #temp T1
inner join #temp T2 On T1.Company=T2.Company and T1.CDate<T2.CDate
Order by T1.Company,T1.CDate
declare #t table (Company char(3),[date] date,no_employees int)
insert into #t(Company,[date],no_employees) values
('ABC','2014-05-30',35 ),
('DEF','2014-05-30',322 ),
('GHI','2014-05-30',65 ),
('JKL','2014-05-30',8 ),
('MNO','2014-05-30',30 ),
('ABC','2014-01-01',33 ),
('DEF','2014-01-01',301 ),
('GHI','2014-01-01',70 ),
('MNO','2014-01-01',30 )
select Company,MIN(no_employees),MAX(no_employees),CASE WHEN MIN(no_employees) = MAX(no_employees) then MAX(no_employees) else
MIN(no_employees) - MAX(no_employees) end as cNT from #t
GROUP BY Company
select the companies. Outer join the start date records. Outer join the end date records. Use coalesce to show 0 instead of null.
select
company,
coalesce(rec20140101.no_employees, 0) as empno_start,
coalesce(rec20140530.no_employees, 0) as empno_end
from
(
select distinct company
from records
) companies -- or use a company table if you have one
left join
(
select company, no_employees
from records
where recorddate = '2014-01-01'
) rec20140101
on rec20140101.company = companies.companyrec
left join
(
select company, no_employees
from records
where recorddate = '2014-05-30'
) rec20140530
on rec20140530.company = companies.company);
EDIT: And here is a way to scan the table just once. It's even a little shorter ;-)
select
company,
coalesce(min( case when recorddate = '2014-05-30' then no_employees end ), 0) as empno_start,
coalesce(min( case when recorddate = '2014-01-01' then no_employees end ), 0) as empno_end
from records
group by company;
Try this:
;with cte as
(select
COALESCE(src.company, tgt.company) company
isnull(tgt.no_employees,0) 'start date no_employees',
isnull(src.no_employees , 0) 'end date no_employees'
from
tbl src
full outer join tbl tgt on src.company = tgt.company and src.date <> tgt.date
where (src.date = (select max(date) from tbl) or src.date is null)
and (tgt.date = (select min(date) from tbl) or tgt.date is null)
)
select *, [end date no_employees] - [start date no_employees] diff
from cte