Related
I am attempting to build a procedure that will INSERT rows into the table emp_attendance.
I call a procedure that generates a list of dates based on a range. I then join that table with each employee_id.
Being a novice SQL developer, I am having difficulty trying to understand why the procedure create_emp_attendance is not being created.
Below is my test CASE. Once I get the rows working for the SELECT I will add the INSERT code as I am trying to take the one little piece at a time.
Thanks in advance for your help and expertise.
ALTER SESSION SET NLS_DATE_FORMAT = 'MMDDYYYY HH24:MI:SS';
CREATE OR REPLACE TYPE nt_date IS TABLE OF DATE;
CREATE OR REPLACE FUNCTION generate_dates_pipelined(
p_from IN DATE,
p_to IN DATE
)
RETURN nt_date PIPELINED DETERMINISTIC
IS
v_start DATE := TRUNC(LEAST(p_from, p_to));
v_end DATE := TRUNC(GREATEST(p_from, p_to));
BEGIN
LOOP
PIPE ROW (v_start);
EXIT WHEN v_start >= v_end;
v_start := v_start + INTERVAL '1' DAY;
END LOOP;
RETURN;
END generate_dates_pipelined;
CREATE SEQUENCE batch_seq
START WITH 1
MAXVALUE 999999999999999999999999999
MINVALUE 1
NOCYCLE
CACHE 20
NOORDER;
Create table employees(
employee_id NUMBER(6),
first_name VARCHAR2(20),
last_name VARCHAR2(20),
card_num VARCHAR2(10),
work_days VARCHAR2(7)
);
INSERT INTO employees (
employee_id,
first_name,
last_name,
card_num,
work_days
)
WITH names AS (
SELECT 1, 'John', 'Doe', 'D564311','YYYYYNN' FROM dual UNION ALL
SELECT 2, 'Justin', 'Case', 'C224311','YYYYYNN' FROM dual UNION ALL
SELECT 3, 'Mike', 'Jones', 'J288811','YYYYYNN' FROM dual UNION ALL
SELECT 4, 'Jane', 'Smith', 'S564661','YYYYYNN' FROM dual
) SELECT * FROM names;
CREATE TABLE emp_attendance(
seq_num integer GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
employee_id NUMBER(6),
start_date DATE,
end_date DATE,
week_number NUMBER(2),
create_date DATE DEFAULT SYSDATE
);
CREATE OR REPLACE PROCEDURE create_emp_attendance (
p_start_date IN DATE,
p_end_date IN DATE
)
IS l_batch_seq number;
BEGIN
SELECT get_batch_seq INTO l_batch_seq FROM dual;
SELECT
employee_id,
start_date,
start_date+NUMTODSINTERVAL(FLOOR(DBMS_RANDOM.VALUE(3600,43200)), 'SECOND') AS end_date,
to_char(start_date,'WW') AS week_number
FROM (
-- Need subquery to generate end_date based on start_date.
SELECT
e.employee_id, d.COLUMN_VALUE+ NUMTODSINTERVAL(FLOOR(DBMS_RANDOM.VALUE(0,86399)), 'SECOND') AS start_date
FROM employees e
INNER JOIN TABLE( generate_dates_pipelined(p_start_date, p_end_date)
) d
) ed
END;
EXEC create_emp_attendanc(DATE '2021-08-07', DATE '2021-08-14');
You didn't refer to the sequence, so I removed that. Your SELECT needs to have a target via INTO clause or be used within the context of an INSERT (or other) statement. The JOIN was missing an ON clause. But you appeared to want a CROSS JOIN.
If you really want to INSERT in one statement, here's the form:
CREATE OR REPLACE PROCEDURE create_emp_attendance (
p_start_date IN DATE,
p_end_date IN DATE
)
IS
BEGIN
INSERT INTO emp_attendance (employee_id, start_date, end_date, week_number)
SELECT employee_id
, start_date
, start_date+NUMTODSINTERVAL(FLOOR(DBMS_RANDOM.VALUE(3600,43200)), 'SECOND') AS end_date
, to_char(start_date,'WW') AS week_number
FROM ( -- Need subquery to generate end_date based on start_date.
SELECT e.employee_id, d.COLUMN_VALUE + NUMTODSINTERVAL(FLOOR(DBMS_RANDOM.VALUE(0,86399)), 'SECOND') AS start_date
FROM employees e
CROSS JOIN TABLE( generate_dates_pipelined(p_start_date, p_end_date) ) d
) ed
;
END;
/
EXEC create_emp_attendance(DATE '2021-08-07', DATE '2021-08-14');
/
-- Procedure CREATE_EMP_ATTENDANCE compiled
-- PL/SQL procedure successfully completed.
Read comments within code.
SQL> CREATE OR REPLACE PROCEDURE create_emp_attendance
2 (
3 p_start_date IN DATE,
4 p_end_date IN DATE
5 )
6 IS
7 l_batch_seq number;
8 BEGIN
9 -- there's no GET_BATCH_SEQ function (at least, you didn't post it)
10 -- SELECT get_batch_seq INTO l_batch_seq FROM dual;
11 l_batch_seq := batch_seq.nextval;
12
13 -- In order to avoid TOO_MANY_ROWS, switching to a cursor FOR loop
14 for cur_r in
15 (SELECT
16 employee_id,
17 start_date,
18 start_date+NUMTODSINTERVAL(FLOOR(DBMS_RANDOM.VALUE(3600,43200)), 'SECOND') AS end_date,
19 to_char(start_date,'WW') AS week_number
20 FROM (-- Need subquery to generate end_date based on start_date.
21 SELECT
22 e.employee_id,
23 d.COLUMN_VALUE + NUMTODSINTERVAL(FLOOR(DBMS_RANDOM.VALUE(0,86399)), 'SECOND') AS start_date
24 FROM employees e
25 -- not INNER, but CROSS join (or, if it were INNER, on which column(s)?)
26 CROSS JOIN TABLE(generate_dates_pipelined(p_start_date, p_end_date)) d
27 ) ed
28 ) loop
29 -- you'll probably have INSERT statement here, according to what you said
30 null;
31 end loop;
32 END;
33 /
Procedure created.
Testing:
SQL> EXEC create_emp_attendance(DATE '2021-08-07', DATE '2021-08-14');
PL/SQL procedure successfully completed.
SQL>
I am attempting to create a procedure that will INSERT multiple rows into a table from the results of a query in the procedure.
The setup below works fine but I am having difficulty inserting the employee_id, timeoff_date from the output of the query into the timeoff table.
Below is my test CASE. I'm testing in live sql so we can both have the same Oracle version. Any help would be greatly appreciated.
CREATE OR REPLACE TYPE obj_date IS OBJECT (
date_val DATE
);
CREATE OR REPLACE TYPE nt_date IS TABLE OF obj_date;
create or replace function generate_dates_pipelined(
p_from in date,
p_to in date
)
return nt_date
pipelined
is
begin
for c1 in (
with calendar (start_date, end_date ) as (
select trunc(p_from), trunc(p_to) from dual
union all
select start_date + 1, end_date
from calendar
where start_date + 1 <= end_date
)
select start_date as day
from calendar
) loop
pipe row (obj_date(c1.day));
end loop;
return;
end generate_dates_pipelined;
create table holidays(
holiday_date DATE not null,
holiday_name VARCHAR2(20),
constraint holidays_pk primary key (holiday_date),
constraint is_midnight check ( holiday_date = trunc ( holiday_date ) )
);
INSERT into holidays (HOLIDAY_DATE,HOLIDAY_NAME) WITH dts as (
select to_date('01-AUG-2021 00:00:00','DD-MON-YYYY HH24:MI:SS'), 'August 1st 2021' from dual union all
select to_date('05-AUG-2021 00:00:00','DD-MON-YYYY HH24:MI:SS'), 'August 5th 2021' from dual) SELECT * from dts;
Create table employees(
employee_id NUMBER(6),
first_name VARCHAR2(20),
last_name VARCHAR2(20),
card_num VARCHAR2(10),
work_days VARCHAR2(7)
);
ALTER TABLE employees
ADD ( CONSTRAINT employees_pk
PRIMARY KEY (employee_id));
INSERT INTO employees
(
EMPLOYEE_ID,
first_name,
last_name,
card_num,
work_days)
WITH names AS (
SELECT 1, 'Jane', 'Doe','F123456', 'NYYYYYN' FROM dual UNION ALL
SELECT 2, 'Madison', 'Smith','R33432','NYYYYYN'FROM dual UNION ALL
SELECT 3, 'Justin', 'Case','C765341','NYYYYYN'FROM dual UNION ALL
SELECT 4, 'Mike', 'Jones', 'D564311','NYYYYYN' FROM dual) SELECT * FROM names;
create table timeoff(
seq_num integer GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
employee_id NUMBER(6),
timeoff_date DATE,
timeoff_type VARCHAR2(1) DEFAULT 'V',
constraint timeoff_chk check (timeoff_date=trunc(timeoff_date, 'dd')),
constraint timeoff_pk primary key (employee_id, timeoff_date)
);
CREATE OR REPLACE PROCEDURE create_timeoff_requests (start_date DATE, end_date DATE)
IS
type t_date is table of date;
l_res t_date;
BEGIN
SELECT
c.date_val
BULK COLLECT INTO l_res
FROM employees e
INNER JOIN TABLE (generate_dates_pipelined (start_date, end_date))c
PARTITION BY ( e.employee_id )
ON (SUBSTR(e.work_days, TRUNC(c.date_val) - TRUNC(c.date_val, 'IW') + 1, 1) = 'Y')
WHERE NOT EXISTS (
SELECT 1
FROM holidays h
WHERE c.date_val = h.holiday_date
)
ORDER BY
e.employee_id,
c.date_val;
-- debug
-- for i in 1..l_res.count -- loop
--dbms_output.put_line(l_res(i));
-- end loop;
END;
EXEC create_timeoff_requests (DATE '2021-08-01', DATE '2021-08-10');
I think it might be easier to create a procedure which looks like this (Just replace the declared constants in the block with parameters in your procedure definition):
CREATE PROCEDURE create_timeoff_requests
(
p_dStart DATE,
p_dEnd DATE,
p_nEmployeeID INTEGER
p_sType VARCHAR2
)
IS
BEGIN
INSERT INTO timeoff (employee_id, timeoff_date, timeoff_type)
SELECT e.employee_id, do.day_off, p_sType
FROM employees e
CROSS JOIN (SELECT p_dStart+LEVEL AS DAY_OFF
FROM DUAL
CONNECT BY LEVEL <= p_dEnd - p_dStart) do
WHERE e.employee_id = p_nEmployeeID
AND SUBSTR(e.workdays, TO_CHAR(do.day_off, 'D'), 1) = 'Y'
AND NOT EXISTS (SELECT 'X' FROM holidays h WHERE h.holiday_date = do.day_off);
END;
/
The bottom line: I have 3 tables: sales, departments and products. Creation Code below:
CREATE TABLE dep
(
id NUMBER (10) NOT NULL,
city VARCHAR2 (100) NOT NULL,
name VARCHAR2 (100) NOT NULL,
CONSTRAINT dep_pk PRIMARY KEY (id)
);
--
CREATE TABLE prod
(
id NUMBER (10) NOT NULL,
price NUMBER (10) NOT NULL,
name VARCHAR2 (100) NOT NULL,
CONSTRAINT prod_pk PRIMARY KEY (id)
);
--
CREATE TABLE sales
(
time DATE NOT NULL,
dep_id NUMBER (10) NOT NULL,
prod_id NUMBER (10) NOT NULL,
cost NUMBER (10, 2) NOT NULL,
CONSTRAINT sales_fk1 FOREIGN KEY (dep_id) REFERENCES dep (id),
CONSTRAINT sales_fk2 FOREIGN KEY (prod_id) REFERENCES prod (id)
);
Filling code:
DECLARE
BEGIN
FOR i IN 1 .. 100 LOOP
INSERT INTO dep
VALUES (i,
CASE
WHEN i <= 50 THEN 'town1'
ELSE 'town2'
END,
'dep'
|| to_char (i));
INSERT INTO prod
VALUES (i,
i * 10,
'prod'
|| to_char (i));
END LOOP;
FOR i IN 1 .. 1000 LOOP
INSERT INTO sales
VALUES (trunc (to_date ('01.01.2016', 'dd.mm.yyyy')) + sys.DBMS_RANDOM.value (0, 0.99) - i,
mod (i, 100) + 1,
mod (i, 100) + 1,
i);
END LOOP;
COMMIT;
END;
I need to implement a query: to double the price of all goods that were sold last year in the 'dep10' department.
Here's what I managed to give birth:
UPDATE prod
SET prod.price = prod.price*2
WHERE EXISTS (SELECT *
FROM prod
JOIN sales ON prod.id = sales.prod_id
JOIN dep ON sales.dep_id = dep.id
WHERE (DATEPART(Year, sales.time) = DATEPART(Year,DATEADD(Month, -1, GETDATE()))) AND dep.name = 'dep10' );
But surprise - not working. Actually, I get stuck with an expression to fetch records from last year. Please tell me the options for implementing requests of this type? Is there a universal subquery for fetching records from last year? Didn't find working example for Oracle.
Your mistake is mainly that you are selecting another product in the subquery. Moreover, DATEPART is not an Oracle keyword.
UPDATE prod
SET price = price * 2
WHERE EXISTS
(
SELECT *
FROM sales
WHERE sales.prod_id = prod.id
AND EXTRACT(Year FROM sales.time) = EXTRACT(Year FROM SYSDATE) - 1
AND sales.dep_id = (SELECT id FROM dep WHERE name = 'dep10')
);
This is similar to Thorsten's query (which I'm upvoting), but it seems more natural to write this using JOIN and some other mechanisms:
UPDATE prod
SET price = price * 2
WHERE EXISTS (SELECT 1
FROM sales s JOIN
dep d
ON s.dep_id = d.id
WHERE s.prod_id = p.id AND
d.name = 'dep10' AND
s.time >= TRUNC(sysdate, 'YEAR') - INTERVAL '1' YEAR AND
s.time < TRUNC(sysdate, 'YEAR)
);
I think this formulation is more likely to use an index on sales(prod_id, time, dep_id).
I have a little DB with 3 tables: account, account_statements and account operations. I need to know:
all credit operations of the period
all debet operations of the perion
a balance on any date
My script is:
CREATE TABLE account
(
id number(10) NOT NULL,
account number(20) NOT NULL,
CONSTRAINT account_id PRIMARY KEY (id)
);
CREATE TABLE account_statements
(
id number(10) NOT NULL,
account_id number(10) NOT NULL,
statement_date date,
inbalance number(20,2),
outbalance number (20,2),
CONSTRAINT statement_id PRIMARY KEY (id),
CONSTRAINT account_statements_foreign_id FOREIGN KEY (account_id) REFERENCES account(id)
);
CREATE TABLE account_operations
(
id number(10) NOT NULL,
account_id number(10) NOT NULL,
operdate date,
summ number(20,2),
opertype char(6),
CONSTRAINT operations_id PRIMARY KEY (id),
CONSTRAINT account_operations_foreign_id FOREIGN KEY (account_id) REFERENCES account(id)
);
CREATE OR REPLACE FUNCTION Get_debet_on_period (
v_startdate IN date,
v_enddate IN date,
v_account IN account.account%TYPE)
RETURN number
IS
v_debet_summ number(20,2);
BEGIN
SELECT SUM(summ) INTO v_debet_summ
FROM account_operations ao,
account a
WHERE ao.operdate between v_startdate AND v_enddate
AND ao.opertype='DEBET'
AND a.account=v_account
AND ao.account_id=a.id;
RETURN v_debet_summ;
END;
CREATE OR REPLACE FUNCTION Get_credit_on_period (
v_startdate IN date,
v_enddate IN date,
v_account IN account.account%TYPE)
RETURN number
IS
v_credit_summ number(20,2);
BEGIN
SELECT SUM(summ) INTO v_credit_summ
FROM account_operations ao,
account a
WHERE ao.operdate between v_startdate AND v_enddate
AND ao.opertype='CREDIT'
AND a.account=v_account
AND ao.account_id=a.id;
RETURN v_credit_summ;
END;
CREATE OR REPLACE FUNCTION Get_balance_on_date (
v_date IN date,
v_account IN account.account%TYPE)
RETURN number
IS
v_balance_summ number(20,2);
v_startdate date;
v_startsumm number;
BEGIN
SELECT MAX(as.statement_date) INTO v_startdate
FROM account_statements as, account a
WHERE a.id=as.account_id
AND a.account=v_account
AND as.statement_date<v_date;
IF v_startdate IS NOT NULL THEN
SELECT as.outbalance INTO v_startsumm
FROM account_statement as, account a
WHERE a.id=as.account_id
AND a.account=v_account
AND as.statement_date=v_statement_date;
v_balance_summ:=v_startsumm+Get_credit_on_period(v_startdate, v_date, v_account)-Get_debet_on_period(v_startdate, v_date, v_account);
ELSE
v_startsumm:=0;
v_balance_summ:=Get_credit_on_period(v_startdate, v_date, v_account)-Get_debet_on_period(v_startdate, v_date, v_account);
END IF;
RETURN v_balance_summ;
END;
I have an error in function Get_balance_on_date:
Errors: FUNCTION GET_BALANCE_ON_DATE Line/Col: 10/4 PL/SQL: SQL
Statement ignored Line/Col: 10/15 PL/SQL: ORA-00936: missing
expression Line/Col: 16/7 PL/SQL: SQL Statement ignored Line/Col:
16/14 PL/SQL: ORA-00936: missing expression
Your function get_balance_on_date does not compile.
You're using table account_statement, however you created table named in plural form account_statements.
You're using reserved keyword AS as an alias "account_statements as". You should change the alias to something different.
You're using undeclared variable "v_statement_date".
Fixed function for you:
CREATE OR REPLACE FUNCTION get_balance_on_date
(
v_date IN DATE
,v_account IN account.account%TYPE
) RETURN NUMBER IS
v_balance_summ NUMBER(20, 2);
v_startdate DATE;
v_startsumm NUMBER;
v_statement_date DATE; -- Remove this if you don't need, created this for function to compile
BEGIN
SELECT MAX(stm.statement_date)
INTO v_startdate
FROM account_statements stm
,account a
WHERE a.id = stm.account_id
AND a.account = v_account
AND stm.statement_date < v_date;
IF v_startdate IS NOT NULL
THEN
SELECT stm.outbalance
INTO v_startsumm
FROM account_statements stm
,account a
WHERE a.id = stm.account_id
AND a.account = v_account
AND stm.statement_date = v_statement_date;
v_balance_summ := v_startsumm +
get_credit_on_period(v_startdate, v_date, v_account) -
get_debet_on_period(v_startdate, v_date, v_account);
ELSE
v_balance_summ := get_credit_on_period(v_startdate, v_date, v_account) -
get_debet_on_period(v_startdate, v_date, v_account);
END IF;
RETURN v_balance_summ;
END;
You are using a reserved keyword as an alias
FROM account_statements as, account a
as has to be changed to something else since it is part of the SQL language.
tl;dr: I want to generate a dates table in Redshift in order to make a report easier to generate. Preferable without needing large tables already in redshift, needing to upload a csv file.
long version:
I am working on a report where I have to average new items created per day of the week. The date range could span months or more, so there could be, say, 5 Mondays but only 4 Sundays, which can make the math a little tricky. Also, I am not guaranteed an instance of a single item per day, especially once a user starts slicing the data. Which, this is tripping up the BI tool.
The best way to tackle this problem is most likely a dates table. However, most of the tutorials for dates tables use SQL commands that are not available or not fully supported by Redshift (I'm looking at you, generate_series).
Is there an easy way to generate a dates table in Redshift?
The code I was attempting to use: (based on this also-not-working recommendation: http://elliot.land/post/building-a-date-dimension-table-in-redshift )
CREATE TABLE facts.dates (
"date_id" INTEGER NOT NULL PRIMARY KEY,
-- DATE
"full_date" DATE NOT NULL,
-- YEAR
"year_number" SMALLINT NOT NULL,
"year_week_number" SMALLINT NOT NULL,
"year_day_number" SMALLINT NOT NULL,
-- QUARTER
"qtr_number" SMALLINT NOT NULL,
-- MONTH
"month_number" SMALLINT NOT NULL,
"month_name" CHAR(9) NOT NULL,
"month_day_number" SMALLINT NOT NULL,
-- WEEK
"week_day_number" SMALLINT NOT NULL,
-- DAY
"day_name" CHAR(9) NOT NULL,
"day_is_weekday" SMALLINT NOT NULL,
"day_is_last_of_month" SMALLINT NOT NULL
) DISTSTYLE ALL SORTKEY (date_id)
;
INSERT INTO facts.dates
(
"date_id"
,"full_date"
,"year_number"
,"year_week_number"
,"year_day_number"
-- QUARTER
,"qtr_number"
-- MONTH
,"month_number"
,"month_name"
,"month_day_number"
-- WEEK
,"week_day_number"
-- DAY
,"day_name"
,"day_is_weekday"
,"day_is_last_of_month"
)
SELECT
cast(seq + 1 AS INTEGER) AS date_id,
-- DATE
datum AS full_date,
-- YEAR
cast(extract(YEAR FROM datum) AS SMALLINT) AS year_number,
cast(extract(WEEK FROM datum) AS SMALLINT) AS year_week_number,
cast(extract(DOY FROM datum) AS SMALLINT) AS year_day_number,
-- QUARTER
cast(to_char(datum, 'Q') AS SMALLINT) AS qtr_number,
-- MONTH
cast(extract(MONTH FROM datum) AS SMALLINT) AS month_number,
to_char(datum, 'Month') AS month_name,
cast(extract(DAY FROM datum) AS SMALLINT) AS month_day_number,
-- WEEK
cast(to_char(datum, 'D') AS SMALLINT) AS week_day_number,
-- DAY
to_char(datum, 'Day') AS day_name,
CASE WHEN to_char(datum, 'D') IN ('1', '7')
THEN 0
ELSE 1 END AS day_is_weekday,
CASE WHEN
extract(DAY FROM (datum + (1 - extract(DAY FROM datum)) :: INTEGER +
INTERVAL '1' MONTH) :: DATE -
INTERVAL '1' DAY) = extract(DAY FROM datum)
THEN 1
ELSE 0 END AS day_is_last_of_month
FROM
-- Generate days for 81 years starting from 2000.
(
SELECT
'2000-01-01' :: DATE + generate_series AS datum,
generate_series AS seq
FROM generate_series(0,81 * 365 + 20,1)
) DQ
ORDER BY 1;
Which throws this error
[Amazon](500310) Invalid operation: Specified types or functions (one per INFO message) not supported on Redshift tables.;
1 statement failed.
... because, I assume, INSERT and generate_series are not allowed in the same command in Redshift
In asking the question, I figured it out. Oops.
I started with a "facts" schema.
CREATE SCHEMA facts;
Run the following to start a numbers table:
create table facts.numbers
(
number int PRIMARY KEY
)
;
Use this to generate your number list. I used a million to get started
SELECT ',(' || generate_series(0,1000000,1) || ')'
;
Then copy-paste the numbers from your results in the query below, after VALUES:
INSERT INTO facts.numbers
VALUES
(0)
,(1)
,(2)
,(3)
,(4)
,(5)
,(6)
,(7)
,(8)
,(9)
-- etc
^ Make sure to remove the leading comma from the copy-pasted list of numbers
Once you have a numbers table, then you can generate a dates table (again, stealing code from elliot land http://elliot.land/post/building-a-date-dimension-table-in-redshift ) :
CREATE TABLE facts.dates (
"date_id" INTEGER NOT NULL PRIMARY KEY,
-- DATE
"full_date" DATE NOT NULL,
-- YEAR
"year_number" SMALLINT NOT NULL,
"year_week_number" SMALLINT NOT NULL,
"year_day_number" SMALLINT NOT NULL,
-- QUARTER
"qtr_number" SMALLINT NOT NULL,
-- MONTH
"month_number" SMALLINT NOT NULL,
"month_name" CHAR(9) NOT NULL,
"month_day_number" SMALLINT NOT NULL,
-- WEEK
"week_day_number" SMALLINT NOT NULL,
-- DAY
"day_name" CHAR(9) NOT NULL,
"day_is_weekday" SMALLINT NOT NULL,
"day_is_last_of_month" SMALLINT NOT NULL
) DISTSTYLE ALL SORTKEY (date_id)
;
INSERT INTO facts.dates
(
"date_id"
,"full_date"
,"year_number"
,"year_week_number"
,"year_day_number"
-- QUARTER
,"qtr_number"
-- MONTH
,"month_number"
,"month_name"
,"month_day_number"
-- WEEK
,"week_day_number"
-- DAY
,"day_name"
,"day_is_weekday"
,"day_is_last_of_month"
)
SELECT
cast(seq + 1 AS INTEGER) AS date_id,
-- DATE
datum AS full_date,
-- YEAR
cast(extract(YEAR FROM datum) AS SMALLINT) AS year_number,
cast(extract(WEEK FROM datum) AS SMALLINT) AS year_week_number,
cast(extract(DOY FROM datum) AS SMALLINT) AS year_day_number,
-- QUARTER
cast(to_char(datum, 'Q') AS SMALLINT) AS qtr_number,
-- MONTH
cast(extract(MONTH FROM datum) AS SMALLINT) AS month_number,
to_char(datum, 'Month') AS month_name,
cast(extract(DAY FROM datum) AS SMALLINT) AS month_day_number,
-- WEEK
cast(to_char(datum, 'D') AS SMALLINT) AS week_day_number,
-- DAY
to_char(datum, 'Day') AS day_name,
CASE WHEN to_char(datum, 'D') IN ('1', '7')
THEN 0
ELSE 1 END AS day_is_weekday,
CASE WHEN
extract(DAY FROM (datum + (1 - extract(DAY FROM datum)) :: INTEGER +
INTERVAL '1' MONTH) :: DATE -
INTERVAL '1' DAY) = extract(DAY FROM datum)
THEN 1
ELSE 0 END AS day_is_last_of_month
FROM
-- Generate days for 81 years starting from 2000.
(
SELECT
'2000-01-01' :: DATE + number AS datum,
number AS seq
FROM facts.numbers
WHERE number between 0 and 81 * 365 + 20
) DQ
ORDER BY 1;
^ Be sure to set the numbers at the end for the date range you need
As a workaround, you can spin Postgres instance on your local machine, run the code there, export to CSV, then run CREATE TABLE portion only in Redshift and load data from CSV. Since this is a one-time operation it's ok to do, this is what I'm actually doing for new Redshift deployments.
Here is a different suggestion for building the facts.numbers that does not require manual intervention:
Take a system table (guaranteed to exist) of a known or stable size
Cross join that table to itself enough times to get the desired number of rows
Select the row_number() over (order by 1) to turn those created records into an ascending set of numbers
Example using the Redshift system table pg_catalog.pg_operator (which as of Oct 2020 has 659 records):
-- Prep, so that you can copy/paste the code sample
create schema if not exists facts; -- Make sure the schema exists
drop table if exists facts.numbers; -- Avoid an error if that table already exists;
create table facts.numbers -- Create the table definition
(
number int primary key
);
-- The bit you care about
insert into facts.numbers
select row_number() over (order by 1) -- return 1..n in place of the original record
from pg_catalog.pg_operator a -- 659 records
cross join pg_catalog.pg_operator b -- to get 659^2=434k records
cross join pg_catalog.pg_operator c -- to get 659^3=286M records
limit 2000000 -- to limit the result to a reasonable size
;
Extending great ideas above - small fixes for starting from 2nd day of the year instead of 1st (BI tools should not be happy with this miss) + simplification and fix for flag is_last_day_of_month:
CREATE SCHEMA IF NOT EXISTS dimensions; -- Make sure the schema exists
DROP TABLE IF EXISTS dimensions.numbers; -- Avoid an error if that table already exists;
CREATE TABLE dimensions.numbers -- Create the table definition
(
number INT PRIMARY KEY
);
-- Work around for Generate_series() and INSERT INTO by Sam Davey
INSERT INTO dimensions.numbers
SELECT row_number() over (order by 1) -- return 1..n in place of the original record
FROM pg_catalog.pg_operator a -- 659 records
CROSS JOIN pg_catalog.pg_operator b -- to get 659^2=434k records
CROSS JOIN pg_catalog.pg_operator c -- to get 659^3=286M records
LIMIT 1000000 -- to limit the result to a reasonable size
;
-- Elliot solution http://elliot.land/post/building-a-date-dimension-table-in-redshift
CREATE TABLE dimensions.dates (
"date_id" INTEGER NOT NULL PRIMARY KEY,
-- DATE
"full_date" DATE NOT NULL,
-- YEAR
"year_number" SMALLINT NOT NULL,
"year_week_number" SMALLINT NOT NULL,
"year_day_number" SMALLINT NOT NULL,
-- QUARTER
"qtr_number" SMALLINT NOT NULL,
-- MONTH
"month_number" SMALLINT NOT NULL,
"month_name" CHAR(9) NOT NULL,
"month_day_number" SMALLINT NOT NULL,
-- WEEK
"week_day_number" SMALLINT NOT NULL,
-- DAY
"day_name" CHAR(9) NOT NULL,
"day_is_weekday" SMALLINT NOT NULL,
"day_is_last_of_month" SMALLINT NOT NULL
) DISTSTYLE ALL SORTKEY (date_id);
INSERT INTO dimensions.dates
(
"date_id"
,"full_date"
,"year_number"
,"year_week_number"
,"year_day_number"
-- QUARTER
,"qtr_number"
-- MONTH
,"month_number"
,"month_name"
,"month_day_number"
-- WEEK
,"week_day_number"
-- DAY
,"day_name"
,"day_is_weekday"
,"day_is_last_of_month"
)
SELECT
CAST(seq + 0 AS INTEGER) AS date_id,
-- DATE
datum AS full_date,
-- YEAR
CAST(EXTRACT(YEAR FROM datum) AS SMALLINT) AS year_number,
CAST(EXTRACT(WEEK FROM datum) AS SMALLINT) AS year_week_number,
CAST(EXTRACT(DOY FROM datum) AS SMALLINT) AS year_day_number,
-- QUARTER
CAST(TO_CHAR(datum, 'Q') AS SMALLINT) AS qtr_number,
-- MONTH
CAST(EXTRACT(MONTH FROM datum) AS SMALLINT) AS month_number,
TO_CHAR(datum, 'Month') AS month_name,
CAST(EXTRACT(DAY FROM datum) AS SMALLINT) AS month_day_number,
-- WEEK
CAST(TO_CHAR(datum, 'D') AS SMALLINT) AS week_day_number,
-- DAY
TO_CHAR(datum, 'Day') AS day_name,
CASE WHEN TO_CHAR(datum, 'D') IN ('1', '7')
THEN 0
ELSE 1 END AS day_is_weekday,
CASE WHEN LAST_DAY(datum) = datum THEN 1 ELSE 0 END AS day_is_last_of_month
FROM
-- Generate days for 81 years starting from 2000.
(
SELECT
('2000-01-01' :: DATE - interval '1 day')::DATE + number AS datum,
number AS seq
FROM dimensions.numbers
WHERE number between 0 and 81 * 365 + 20
) DQ
ORDER BY 1;
DROP TABLE dimensions.numbers;