Use lateral join to loop over all tables from schema - sql

I want to count certain values in all tables of a schema that contain a column that can contain those values.
Was hoping to use a LATERAL join to loop over all tables, but I'm running into issues:
select
fully_qualified_table_name,
cnt
from (
select
'datastore.' || table_name as fully_qualified_table_name
from
information_schema.columns
where
table_schema = 'datastore'
and column_name = 'dss_current_flag'
cross join lateral
select
count(*) as cnt
from
information_schema.fully_qualified_table_name
);
Is this possible?

I'm afraid it is not possible to run dynamic queries using pure SQL. You might wanna check PL/pgSQL instead, e.g.
CREATE OR REPLACE FUNCTION count_records()
RETURNS bigint AS $$
DECLARE
rec record;
res bigint = 0; ct bigint = 0;
BEGIN
FOR rec IN
SELECT table_schema AS sch,table_name AS tb
FROM information_schema.columns
WHERE table_schema = 'datastore' AND column_name = 'dss_current_flag'
LOOP
EXECUTE format($ex$ SELECT count(*) FROM %I.%I $ex$,rec.sch,rec.tb)
INTO ct;
res := res + ct;
END LOOP;
RETURN res;
END $$ LANGUAGE 'plpgsql';
The more flexible approach would be to provide schema and table names as parameters in the function call instead of hard coding it in the function body, such as CREATE FUNCTION count_records(_schema_name text, _table_name text) .., or even the fully qualified table name as a single parameter: CREATE FUNCTION count_records(_qualified_table_name text) ... .
Demo: db<>fiddle

Based on the answer by #jim-jones my final solution was
CREATE TYPE datastore.schema_table_column_counts_type AS (
schema_name text,
table_name text,
column_name text,
value text,
count_p bigint);
CREATE OR REPLACE FUNCTION datastore.count_records_in_schema_where_column_has_value(_schema_name text, _column_name text, _value text)
RETURNS setof datastore.schema_table_column_counts_type language plpgsql AS $$
DECLARE
rec record;
result_record datastore.schema_table_column_counts_type;
BEGIN
FOR rec IN
SELECT
table_schema AS sch,
table_name AS tb,
$2 as cn,
$3 as v
FROM information_schema.columns
WHERE table_schema = $1
AND column_name = $2
LOOP
EXECUTE format($ex$
SELECT
'%1$s' as schema_name,
'%2$s' as table_name,
'%3$s' as column_name,
'%4$s' as value,
count(*)
FROM
%1$s.%2$s
WHERE
%3$s = %4$L
$ex$
, rec.sch, rec.tb, rec.cn, rec.v)
INTO result_record;
return next result_record;
END LOOP;
END $$ ;
SELECT * from datastore.count_records_in_schema_where_column_has_value('datastore', 'dss_current_flag', 'P');

Related

dynamic SQL ERROR: column "age" does not exist

postgres 12
I am trying to loop through a table which has schema , table_names and columns
I want to do various things like finding nulls ,row count etc. I failed at the first hurdle trying to update the col records.
table i am using
CREATE TABLE test.table_study (
table_schema text,
table_name text,
column_name text,
records int,
No_Nulls int,
No_Blanks int,
per_pop int
);
I populate the table with some schema names ,tables and columns from information_schema.columns
insert into test.table_study select table_schema, table_name, column_name
from information_schema.columns
where table_schema like '%white'
order by table_schema, table_name, ordinal_position;
I want to populate the rest with a function
function :-
CREATE OR REPLACE PROCEDURE test.insert_data_population()
as $$
declare s record;
declare t record;
declare c record;
BEGIN
FOR s IN SELECT distinct table_schema FROM test.table_study
LOOP
FOR t IN SELECT distinct table_name FROM test.table_study where table_schema = s.table_schema
loop
FOR c IN SELECT column_name FROM test.table_study where table_name = t.table_name
LOOP
execute 'update test.table_study set records = (select count(*) from ' || s.table_schema || '.' || t.table_name || ') where table_study.table_name = '|| t.table_name ||';';
END LOOP;
END LOOP;
END LOOP;
END;
$$
LANGUAGE plpgsql;
I get this error SQL Error [42703]: ERROR: column "age" does not exist. the table age does exist.
when I take out the where clause
execute 'update referralunion.testinsert ti set records = (select count(*) from ' || s.table_schema || '.' || t.table_name || ') ;';
it works, I just cant figure out whats wrong?
Your procedure is structured entirely wrong. What it results in is an attempt to get every column name for every table name in every schema. I would guess results in your column does not exist error. Further is shows procedural thinking. SQL requires think in terms of sets. Below I use basically your query to demonstrate then a revised version which uses a single loop.
-- setup (dropping schema references)
create table table_study (
table_schema text,
table_name text,
column_name text,
records int,
no_nulls int,
no_blanks int,
per_pop int
);
insert into table_study(table_schema, table_name, column_name)
values ('s1','t1','age')
, ('s2','t1','xyz');
-- procedure replacing EXECUTE with Raise Notice.
create or replace procedure insert_data_population()
as $$
declare
s record;
t record;
c record;
line int = 0;
begin
for s in select distinct table_schema from table_study
loop
for t in select distinct table_name from table_study where table_schema = s.table_schema
loop
for c in select column_name from table_study where table_name = t.table_name
loop
line = line+1;
raise notice '%: update table_study set records = (select count(*) from %.% where table_study.table_name = %;'
, line, s.table_schema, t.table_name, c.column_name;
end loop;
end loop;
end loop;
end;
$$
language plpgsql;
Run procedure
do $$
begin
call insert_data_population();
end;
$$;
RESULTS
1: update table_study set records = (select count(*) from s2.t1 where table_study.table_name = age; 2: update table_study set records = (select count(*) from s2.t1 where table_study.table_name = xyz; 3: update table_study set records = (select count(*) from s1.t1 where table_study.table_name = age; 4: update table_study set records = (select count(*) from s1.t1 where table_study.table_name = xyz;
Notice lines 2 and 3. Each references a column name that does not exist in the table. This results from the FOR structure with the same table name in different schema.
Revision for Single Select statement with Single For loop.
create or replace
procedure insert_data_population()
language plpgsql
as $$
declare
s record;
line int = 0;
begin
for s in select distinct table_schema, table_name, column_name from table_study
loop
line = line+1;
raise notice '%: update table_study set records = (select count(*) from %.% where table_study.table_name = %;'
, line, s.table_schema, s.table_name, s.column_name;
end loop;
end;
$$;
do $$
begin
call insert_data_population();
end;
$$;
RESULTS
1: update table_study set records = (select count(*) from s2.t1 where table_study.table_name = xyz;
2: update table_study set records = (select count(*) from s1.t1 where table_study.table_name = age;
Note: In Postgres DECLARE begins a block. It is not necessary to declared each variable. I would actually consider it bad practice. In theory it could require an end for each declare as each could be considered a nested block. Fortunately Postgres does not require this.

Extract specific Columns from PostgreSQL table and Do an update on its values

I have a PostgreSQL database and I need to do an update over values of specific Columns. The number of columns is so big and I need to do the same operation to different table So better to extract them dynamically.
More specifically I want to extract from the table all the columns whose names ends with "_suffix" and do an update on their values.
I started trying to make a script but I don't know if it is the right road!
SELECT columns.column_name
FROM information_schema.columns
WHERE columns.table_name = 'myInitialTable' AND columns.column_name like '%\_suffix%' AND columns.table_schema = 'public';
I created a view of this query and I used it in the following function :
CREATE OR REPLACE FUNCTION updatetable() RETURNS int4 AS
$BODY$
DECLARE r RECORD;
BEGIN
FOR r IN SELECT * from v_reduced_table LOOP
update myInitialTable
set r.column_name = case
when r.column_name = '' then NULL
when r.column_name = 'value1' or r.column_name = 'value2' then 'xxxxx'
else r.column_name end;
END LOOP;
return 1;
END;
$BODY$
LANGUAGE plpgsql;
SELECT updatetable() as output;
this query do a loop on every column ending with suffix and updates its values. but when I run it I get
ERROR: syntax error at or near "$1"
LINE 1: update myInitialTable set $1 = case when $2 = '' then NULL when ...
Any help is appreciated :)
In your function you need to use dynamic commands.
The funcion format() is often very helpful.
Example data:
create table my_table(col1_suffix text, col2_suffix text, col3_suffix text);
insert into my_table values ('a', 'b', 'c');
Example function:
CREATE OR REPLACE FUNCTION update_my_table() RETURNS void AS
$BODY$
DECLARE r RECORD;
BEGIN
FOR r IN
SELECT columns.column_name
FROM information_schema.columns
WHERE columns.table_name = 'my_table'
AND columns.column_name like '%\_suffix%'
AND columns.table_schema = 'public'
LOOP
EXECUTE(FORMAT($f$
UPDATE my_table
SET %s = CASE
WHEN '%s' = 'col1_suffix' THEN 'col1'
WHEN '%s' = 'col2_suffix' OR '%s' = 'col3_suffix' THEN 'xxxxx'
END;$f$, r.column_name, r.column_name, r.column_name, r.column_name));
END LOOP;
END;
$BODY$
LANGUAGE plpgsql;
Usage:
select update_my_table();
select * from my_table;
col1_suffix | col2_suffix | col3_suffix
-------------+-------------+-------------
col1 | xxxxx | xxxxx
(1 row)

CREATE UNIQUE INDEX IF NOT EXISTS in postgreSQL

Plese I would like to do in PostgreSQL something like
CREATE UNIQUE INDEX IF NOT EXISTS
Any idea?
You can check, if an index with a given name exists with this statement.
If your index name is some_table_some_field_idx
SELECT count(*) > 0
FROM pg_class c
WHERE c.relname = 'some_table_some_field_idx'
AND c.relkind = 'i';
Starting from Postgres 9.5 you can even use
CREATE INDEX IF NOT EXISTS
Just another ready-to-use solution.
PostgreSQL v9.0+:
DO $BLOCK$
BEGIN
BEGIN
CREATE INDEX index_name ON table_name( column_name );
EXCEPTION
WHEN duplicate_table
THEN RAISE NOTICE 'index ''index_name '' on table_name already exists, skipping';
END;
END;
$BLOCK$;
PostgreSQL v9.5+:
CREATE INDEX IF NOT EXISTS index_name ON table_name( column_name );
I have wrapped a_horse_with_no_name's code with PLSQL function for more convenient usage. I hope somebody will find it useful.
CREATE OR REPLACE FUNCTION create_index(table_name text, index_name text, column_name text) RETURNS void AS $$
declare
l_count integer;
begin
select count(*)
into l_count
from pg_indexes
where schemaname = 'public'
and tablename = lower(table_name)
and indexname = lower(index_name);
if l_count = 0 then
execute 'create index ' || index_name || ' on ' || table_name || '(' || column_name || ')';
end if;
end;
$$ LANGUAGE plpgsql;
usage:
select create_index('my_table', 'my_index_name', 'id');
You need some procedural code for this, something like this (untested!):
do
$$
declare
l_count integer;
begin
select count(*)
into l_count
from pg_indexes
where schemaname = 'public'
and tablename = 'your_table'
and indexname = 'your_index_name';
if l_count = 0 then
execute 'create unique index public.your_index_name on public.your_table(id)';
end if;
end;
$$
If you are still stuck in previous versions, I would recommend not using count, but just the query directly in your if condition. Makes the code simpler. You can try something like this:
do
$$
begin
if not exists (
select indexname
from pg_indexes
where schemaname = 'schemaname'
and tablename = 'tablename'
and indexname = 'indexname'
)
then
create unique indexname (...);
end if;
end
$$;
Another solution that support multiple columns index, based on #Kragh answer
CREATE or replace FUNCTION create_index(_index text, _table text, VARIA
DIC param_args text[]) RETURNS void AS
$$
declare
l_count integer;
begin
select count(*) into l_count
from pg_indexes
where schemaname = 'public'
and tablename = lower(_table)
and indexname = lower(_index);
if l_count = 0 then
EXECUTE format('create index %I on %I (%s)', _index, _table, array_to_string($3,','));
end if;
END;
$$
LANGUAGE plpgsql;
and then you can use it like any other pg function:
select create_index('events_timestamp_type_idx', 'events', 'timestamp', 'type');

How can I show all tables in data output tab for plpgsql function?

DECLARE
alltables record;
table_all varchar;
BEGIN
for alltables in select distinct table_name , column_name
from information_schema.colunms
loop
table_all = alltables.table_name;
raise notice 'TAB_Name:% , table_all;
end loop;
return table_all;
In here, I can see all tables in (raise notice 'TAB_Name:% , table_all;) message tab in PgAdmin
but Data output tab (return table_all;) return only one column
How can I show all the tables in the data output tab?
I am not sure, if I understand to your query. You wont to write table function probably.
CREATE OR REPLACE FUNCTION xxx
RETURNS TABLE(table_name text, column_name text)
AS $$
BEGIN
FOR table_name, column_name IN
SELECT c.table_name, c.column_name
FROM information_schema.columns
LOOP
RETURN NEXT;
END LOOP;
RETURN;
END;
$$ LANGUAGE plpgsql;
or little bit simply and little bit faster
CREATE OR REPLACE FUNCTION xxx
RETURNS TABLE(table_name text, column_name text)
AS $$
BEGIN
RETURN QUERY
SELECT c.table_name, c.column_name
FROM information_schema.columns
RETURN;
END;
$$ LANGUAGE plpgsql;
you can call it
SELECT * FROM xxx();

Store select query's output in one array in postgres

My code is:
SELECT column_name
FROM information.SCHEMA.columns
WHERE table_name = 'aean'
It returns column names of table aean.
Now I have declared an array:
DECLARE colnames text[]
How can I store select's output in colnames array.
Is there any need to initialize colnames?
There are two ways. One is to aggregate:
SELECT array_agg(column_name::TEXT)
FROM information.schema.columns
WHERE table_name = 'aean'
The other is to use an array constructor:
SELECT ARRAY(
SELECT column_name
FROM information_schema.columns
WHERE table_name = 'aean'
)
I'm presuming this is for plpgsql. In that case you can assign it like this:
colnames := ARRAY(
SELECT column_name
FROM information_schema.columns
WHERE table_name='aean'
);
I had exactly the same problem. Just one more working modification of the solution given by Denis (the type must be specified):
SELECT ARRAY(
SELECT column_name::text
FROM information_schema.columns
WHERE table_name='aean'
)
Regular:
SELECT post_id FROM posts WHERE(poster_name='John');
output: [
{'post_id': 1},
{'post_id': 2},
{'post_id': 3},
]
Using ARRAY_AGG:
SELECT ARRAY_AGG(post_id) FROM posts WHERE(poster_name='John');
output: [
{[1, 2, 3]}
]
Casting to the datatype "TEXT" will ensure that your queries will run without any problem.
In plpgsql when we assign to a array variable, we need not use the type casting. My requirement was to get a CSV of all the column names of a particular table. I'd used the following code in plpgsql.
Declare col_list varchar[]:=NULL;
cols varchar:=NULL;
Begin
col_list := ARRAY(select t.name from frm_columns t where t.tname='emp_mstr');
cols := array_to_string(col_list,',');
return cols;
End;
CREATE OR REPLACE FUNCTION f_test_array(in _colname text)
returns text as $body$
DECLARE colnames text[];
begin
colnames := ARRAY(
SELECT column_name FROM information_schema.columns WHERE table_name='customer'
);
if exists(select _colname = any(colnames))
then return format('%s it exits.', _colname);
else return format('%s not exits.', _colname);
end if;
end
$body$
LANGUAGE plpgsql;
check if the column exists or not.
Key point: if exists(select _colname = any(colnames))
We can also using string_agg
String_agg usage:
CREATE OR REPLACE FUNCTION f_test_array1(in _colname text)
returns text as $body$
DECLARE colnames text;
begin
colnames := (SELECT string_agg(column_name,',') FROM information_schema.columns WHERE table_name='customer')::text;
if exists(select colnames ilike '%' || quote_literal(_colname) ||'%')
then return format('column %s exits.', _colname);
else return format('column %s does not exits.', _colname);
end if;
end
$body$
LANGUAGE plpgsql;