I want to count certain values in all tables of a schema that contain a column that can contain those values.
Was hoping to use a LATERAL join to loop over all tables, but I'm running into issues:
select
fully_qualified_table_name,
cnt
from (
select
'datastore.' || table_name as fully_qualified_table_name
from
information_schema.columns
where
table_schema = 'datastore'
and column_name = 'dss_current_flag'
cross join lateral
select
count(*) as cnt
from
information_schema.fully_qualified_table_name
);
Is this possible?
I'm afraid it is not possible to run dynamic queries using pure SQL. You might wanna check PL/pgSQL instead, e.g.
CREATE OR REPLACE FUNCTION count_records()
RETURNS bigint AS $$
DECLARE
rec record;
res bigint = 0; ct bigint = 0;
BEGIN
FOR rec IN
SELECT table_schema AS sch,table_name AS tb
FROM information_schema.columns
WHERE table_schema = 'datastore' AND column_name = 'dss_current_flag'
LOOP
EXECUTE format($ex$ SELECT count(*) FROM %I.%I $ex$,rec.sch,rec.tb)
INTO ct;
res := res + ct;
END LOOP;
RETURN res;
END $$ LANGUAGE 'plpgsql';
The more flexible approach would be to provide schema and table names as parameters in the function call instead of hard coding it in the function body, such as CREATE FUNCTION count_records(_schema_name text, _table_name text) .., or even the fully qualified table name as a single parameter: CREATE FUNCTION count_records(_qualified_table_name text) ... .
Demo: db<>fiddle
Based on the answer by #jim-jones my final solution was
CREATE TYPE datastore.schema_table_column_counts_type AS (
schema_name text,
table_name text,
column_name text,
value text,
count_p bigint);
CREATE OR REPLACE FUNCTION datastore.count_records_in_schema_where_column_has_value(_schema_name text, _column_name text, _value text)
RETURNS setof datastore.schema_table_column_counts_type language plpgsql AS $$
DECLARE
rec record;
result_record datastore.schema_table_column_counts_type;
BEGIN
FOR rec IN
SELECT
table_schema AS sch,
table_name AS tb,
$2 as cn,
$3 as v
FROM information_schema.columns
WHERE table_schema = $1
AND column_name = $2
LOOP
EXECUTE format($ex$
SELECT
'%1$s' as schema_name,
'%2$s' as table_name,
'%3$s' as column_name,
'%4$s' as value,
count(*)
FROM
%1$s.%2$s
WHERE
%3$s = %4$L
$ex$
, rec.sch, rec.tb, rec.cn, rec.v)
INTO result_record;
return next result_record;
END LOOP;
END $$ ;
SELECT * from datastore.count_records_in_schema_where_column_has_value('datastore', 'dss_current_flag', 'P');
I have developed a function to UNION ALL tables from a list of table names (a table called tablelist below) inspired by this SO post.
The initial function just returns a selection, but now I'd like to write a new table with a name taken from a parameter new_table_name.
I'm struggling with the syntax to insert the parameter into the DROP TABLE AND CREATE TABLE statements. Here's one of the attempts which returns ERROR: mismatched parentheses at or near ";"
DROP FUNCTION IF EXISTS f_multi_union(text);
CREATE OR REPLACE FUNCTION f_multi_union(new_tab_name text)
RETURNS Table (my_id int, metric double precision, geom geometry)
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN QUERY EXECUTE
(
DROP TABLE IF EXISTS working.'' || new_tab_name || '';
CREATE TABLE working.'' || new_tab_name || '' AS (
SELECT string_agg(format('SELECT * FROM %s', tbl), ' UNION ALL ')
FROM (SELECT tbl FROM working.tablelist) sub
)
);
END
$func$;
Something like this?
DROP FUNCTION IF EXISTS f_multi_union(text);
CREATE OR REPLACE FUNCTION f_multi_union(new_tab_name text)
RETURNS void -- nothing to return
LANGUAGE plpgsql AS
$func$
DECLARE
_sql TEXT;
BEGIN
_sql := format('DROP TABLE IF EXISTS working.%I;', new_tab_name); -- avoid SQL injection
EXECUTE _sql;
_sql := 'SELECT string_agg(format(''SELECT * FROM %I'', tbl), '' UNION ALL '')
FROM (SELECT tbl FROM working.tablelist) sub;';
EXECUTE _sql
INTO _sql; -- overwrite current _sql content
_sql := format('CREATE TABLE working.%I AS %s;', new_tab_name, _sql);
EXECUTE _sql;
END
$func$;
I would replace the * in the SELECT statement with the columns that you need.
Using Postgres 13.1, I want to apply a forward fill function to all columns of a table. The forward fill function is explained in my earlier question:
How to do forward fill as a PL/PGSQL function
However, in that case the columns and table are specified. I want to take that code and apply it to an arbitrary table, ie. specify a table and the forward fill is applied to each of the columns.
Using this table as an example:
CREATE TABLE example(row_num int, id int, str text, val integer);
INSERT INTO example VALUES
(1, 1, '1a', NULL)
, (2, 1, NULL, 1)
, (3, 2, '2a', 2)
, (4, 2, NULL, NULL)
, (5, 3, NULL, NULL)
, (6, 3, '3a', 31)
, (7, 3, NULL, NULL)
, (8, 3, NULL, 32)
, (9, 3, '3b', NULL)
, (10,3, NULL, NULL)
;
I start with the following working base for the function. I call it passing in some variable names. Note the first is a table name not a column name. The function takes the table name and creates an array of all the column names and then outputs the names.
create or replace function col_collect(tbl text, id text, row_num text)
returns text[]
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
raise notice 'col: %', col;
end loop;
return tmp;
end
$func$;
I want to apply the "forward fill" function I got from my earlier question to each column of a table. UPDATE seems to be the correct approach. So this is the preceding function where I replace raise notice by an update using execute so I can pass in the table name:
create or replace function col_collect(tbl text, id text, row_num text)
returns void
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
execute 'update '||tbl||'
set '||col||' = gapfill('||col||') OVER w AS '||col||'
where '||tbl||'.row_num = '||col||'.row_num
window w as (PARTITION BY '||id||' ORDER BY '||row_num||')
returning *;';
end loop;
end
$func$;
-- call the function
select col_collect('example','id','row_num')
The preceding errors out with a syntax error. I have tried many variations on this but they all fail. Helpful answers on SO were here and here. The aggregate function I'm trying to apply (as window function) is:
CREATE OR REPLACE FUNCTION gap_fill_internal(s anyelement, v anyelement)
RETURNS anyelement
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN COALESCE(v, s); -- that's all!
END
$func$;
CREATE AGGREGATE gap_fill(anyelement) (
SFUNC = gap_fill_internal,
STYPE = anyelement
);
My questions are:
is this a good approach and if so what am I doing wrong; or
is there a better way to do this?
What you ask is not a trivial task. You should be comfortable with PL/pgSQL. I do not advise this kind of dynamic SQL queries for beginners, too powerful.
That said, let's dive in. Buckle up!
CREATE OR REPLACE FUNCTION f_gap_fill_update(_tbl regclass, _id text, _row_num text, OUT nullable_columns int, OUT updated_rows int)
LANGUAGE plpgsql AS
$func$
DECLARE
_pk text := quote_ident(_row_num);
_sql text;
BEGIN
SELECT INTO _sql, nullable_columns
concat_ws(E'\n'
, 'UPDATE ' || _tbl || ' t'
, 'SET (' || string_agg( quote_ident(a.attname), ', ') || ')'
, ' = (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
, 'FROM ('
, ' SELECT ' || _pk
, ' , ' || string_agg(format('gap_fill(%1$I) OVER w AS %1$I', a.attname), ', ')
, ' FROM ' || _tbl
, format(' WINDOW w AS (PARTITION BY %I ORDER BY %s)', _id, _pk)
, ' ) u'
, format('WHERE t.%1$s = u.%1$s', _pk)
, 'AND (' || string_agg('t.' || quote_ident(a.attname), ', ') || ') IS DISTINCT FROM'
, ' (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
)
, count(*) -- AS _col_ct
FROM (
SELECT a.attname
FROM pg_attribute a
WHERE a.attrelid = _tbl
AND a.attnum > 0
AND NOT a.attisdropped
AND NOT a.attnotnull
ORDER BY a.attnum
) a;
IF nullable_columns = 0 THEN
RAISE EXCEPTION 'No nullable columns found in table >>%<<', _tbl;
ELSIF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;
-- RAISE NOTICE '%', _sql; -- debug
EXECUTE _sql; -- execute
GET DIAGNOSTICS updated_rows = ROW_COUNT;
END
$func$;
Example call:
SELECT * FROM f_gap_fill_update('example', 'id', 'row_num');
db<>fiddle here
The function is state of the art.
Generates and executes a query of the form:
UPDATE tbl t
SET (str, val, col1)
= (u.str, u.val, u.col1)
FROM (
SELECT row_num
, gap_fill(str) OVER w AS str, gap_fill(val) OVER w AS val
, gap_fill(col1) OVER w AS col1
FROM tbl
WINDOW w AS (PARTITION BY id ORDER BY row_num)
) u
WHERE t.row_num = u.row_num
AND (t.str, t.val, t.col1) IS DISTINCT FROM
(u.str, u.val, u.col1)
Using pg_catalog.pg_attribute instead of the information schema. See:
"Information schema vs. system catalogs"
Note the final WHERE clause to prevent (possibly expensive) empty updates. Only rows that actually change will be written. See:
How do I (or can I) SELECT DISTINCT on multiple columns?
Moreover, only nullable columns (not defined NOT NULL) will even be considered, to avoid unnecessary work.
Using ROW syntax in UPDATE to keep the code simple. See:
SQL update fields of one table from fields of another one
The function returns two integer values: nullable_columns and updated_rows, reporting what the names suggest.
The function defends against SQL injection properly. See:
Table name as a PostgreSQL function parameter
SQL injection in Postgres functions vs prepared queries
About GET DIAGNOSTICS:
Calculate number of rows affected by batch query in PostgreSQL
The above function updates, but does not return rows. Here is a basic demo how to return rows of varying type:
CREATE OR REPLACE FUNCTION f_gap_fill_select(_tbl_type anyelement, _id text, _row_num text)
RETURNS SETOF anyelement
LANGUAGE plpgsql AS
$func$
DECLARE
_tbl regclass := pg_typeof(_tbl_type)::text::regclass;
_sql text;
BEGIN
SELECT INTO _sql
'SELECT ' || string_agg(CASE WHEN a.attnotnull
THEN format('%I', a.attname)
ELSE format('gap_fill(%1$I) OVER w AS %1$I', a.attname) END
, ', ' ORDER BY a.attnum)
|| E'\nFROM ' || _tbl
|| format(E'\nWINDOW w AS (PARTITION BY %I ORDER BY %I)', _id, _row_num)
FROM pg_attribute a
WHERE a.attrelid = _tbl
AND a.attnum > 0
AND NOT a.attisdropped;
IF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;
RETURN QUERY EXECUTE _sql;
-- RAISE NOTICE '%', _sql; -- debug
END
$func$;
Call (note special syntax!):
SELECT * FROM f_gap_fill_select(NULL::example, 'id', 'row_num');
db<>fiddle here
About returning a polymorphic row type:
Refactor a PL/pgSQL function to return the output of various SELECT queries
Hello I am having trouble querying when I have apostrophe in my where clause in postgresql using pgpsql function, I know that manually I could do something like:
select 'author''s'
however my word is stored in a variable, here is my function:
CREATE OR REPLACE FUNCTION public.fn_inserir_doc(caminho_arqv text, conteudo text)
RETURNS void
LANGUAGE plpgsql
AS $function$
declare
conteudo_array text array;
palavra text;
begin
execute 'insert into documento(caminho)
select ''' || caminho_arqv || '''
where not exists(select id
from documento
where caminho='''||caminho_arqv||''')';
conteudo_array := regexp_split_to_array(conteudo, E'\\s+');
FOREACH palavra in array conteudo_array
loop
if length(palavra) >=3 then
raise notice 'palavra: %', palavra;
execute 'insert into termo(descricao)
select ''' || palavra || '''
where not exists(
select id from termo
where descricao='''||palavra||''')';
execute 'insert into documento_termo(id_termo, id_documento, frequencia)
select t.id, d.id, 1
from termo t
cross join documento d
where t.descricao = '''|| palavra ||'''
and d.caminho = '''|| caminho_arqv ||'''
on conflict (id_termo, id_documento) do update set frequencia = documento_termo.frequencia + 1;';
end if;
end loop;
end;
$function$
The following sample is the one that has the problem:
select id from termo
where descricao='''||palavra||'''
because palavra contains single quote
Use dollar quoting and the function format(). Example:
create or replace function test(str text)
returns setof text language plpgsql as $$
begin
-- instead of this:
-- return query execute 'select '''||str||'''::text';
-- use:
return query execute format(
$fmt$
select %L::text
$fmt$, str);
end $$;
select * from test('O''Brian');
test
---------
O'Brian
(1 row)
In the Below Postgresql Function i am trying to get results from 2 different tables but it throws error ERROR: 42601: a column definition list is required for functions returning "record".Can anyone please help me.
CREATE OR REPLACE FUNCTION load_page_record(IN _session INT) RETURNS RECORD AS
$$
DECLARE r1 RECORD;
DECLARE r2 RECORD;
DECLARE RESULT RECORD;
BEGIN
SELECT array_agg(sq.*) AS arr INTO r1
FROM (SELECT user_id, user_name
FROM "user"
) sq;
SELECT array_agg(sq.*) AS arr INTO r2
FROM (SELECT client_id, client_name
FROM "clients"
) sq;
SELECT r1.arr, r2.arr INTO RESULT;
RETURN RESULT;
END;
$$ LANGUAGE plpgsql;
It returns a record,
so you should call the function as below,
select load_page_record(5);
The error come if you call it as a table
select * from load_page_record(5);
If you want to return a table place you query with join inside the body as follows,
CREATE OR REPLACE FUNCTION load_page_record1(IN _session INT)
RETURNS TABLE (column1 integer, column2 integer) as
$BODY$
SELECT column1, column2
FROM
table1 a
join
table2 b
ON a.id = b.id
$BODY$
LANGUAGE plpgsql;
try this, procedur return table
CREATE OR REPLACE FUNCTION load_page_record(IN _session INT)
RETURNS table(col1 record[],col2 record[]) AS
$BODY$
BEGIN
RETURN QUERY
select
(SELECT array_agg(sq.*)
FROM (SELECT user_id, user_name
FROM "user"
) sq
),
(SELECT array_agg(sq.*)
FROM (SELECT client_id, client_name
FROM "clients"
) sq
);
END;
$BODY$ LANGUAGE plpgsql stable;
edit: convert to text, try it
CREATE OR REPLACE FUNCTION load_page_record(IN _session INT)
RETURNS table(col1 text,col2 text) AS
$BODY$
BEGIN
RETURN QUERY
select
(SELECT array_agg(sq.*)
FROM (SELECT user_id, user_name
FROM "user"
) sq
)::text,
(SELECT array_agg(sq.*)
FROM (SELECT client_id, client_name
FROM "clients"
) sq
)::text;
END;
$BODY$ LANGUAGE plpgsql stable;
try with text:
CREATE OR REPLACE FUNCTION load_page_record(IN _session INT) RETURNS text AS
$$
DECLARE r1 RECORD;
DECLARE r2 RECORD;
DECLARE RESULT text;
BEGIN
SELECT array_agg(sq.*) AS arr INTO r1
FROM (SELECT 'fdfdfd','fdfdd'
) sq;
SELECT array_agg(sq.*) AS arr INTO r2
FROM (SELECT 'dsds','sdsd'
) sq;
SELECT r1.arr, r2.arr INTO RESULT;
RETURN RESULT;
END;
$$ LANGUAGE plpgsql;
and then simply:
select * from load_page_record(8);
but I hope you are aware of the fact that this instruction SELECT r1.arr, r2.arr INTO RESULT; will only assign the first column to RESULT?