postgres 12
I am trying to loop through a table which has schema , table_names and columns
I want to do various things like finding nulls ,row count etc. I failed at the first hurdle trying to update the col records.
table i am using
CREATE TABLE test.table_study (
table_schema text,
table_name text,
column_name text,
records int,
No_Nulls int,
No_Blanks int,
per_pop int
);
I populate the table with some schema names ,tables and columns from information_schema.columns
insert into test.table_study select table_schema, table_name, column_name
from information_schema.columns
where table_schema like '%white'
order by table_schema, table_name, ordinal_position;
I want to populate the rest with a function
function :-
CREATE OR REPLACE PROCEDURE test.insert_data_population()
as $$
declare s record;
declare t record;
declare c record;
BEGIN
FOR s IN SELECT distinct table_schema FROM test.table_study
LOOP
FOR t IN SELECT distinct table_name FROM test.table_study where table_schema = s.table_schema
loop
FOR c IN SELECT column_name FROM test.table_study where table_name = t.table_name
LOOP
execute 'update test.table_study set records = (select count(*) from ' || s.table_schema || '.' || t.table_name || ') where table_study.table_name = '|| t.table_name ||';';
END LOOP;
END LOOP;
END LOOP;
END;
$$
LANGUAGE plpgsql;
I get this error SQL Error [42703]: ERROR: column "age" does not exist. the table age does exist.
when I take out the where clause
execute 'update referralunion.testinsert ti set records = (select count(*) from ' || s.table_schema || '.' || t.table_name || ') ;';
it works, I just cant figure out whats wrong?
Your procedure is structured entirely wrong. What it results in is an attempt to get every column name for every table name in every schema. I would guess results in your column does not exist error. Further is shows procedural thinking. SQL requires think in terms of sets. Below I use basically your query to demonstrate then a revised version which uses a single loop.
-- setup (dropping schema references)
create table table_study (
table_schema text,
table_name text,
column_name text,
records int,
no_nulls int,
no_blanks int,
per_pop int
);
insert into table_study(table_schema, table_name, column_name)
values ('s1','t1','age')
, ('s2','t1','xyz');
-- procedure replacing EXECUTE with Raise Notice.
create or replace procedure insert_data_population()
as $$
declare
s record;
t record;
c record;
line int = 0;
begin
for s in select distinct table_schema from table_study
loop
for t in select distinct table_name from table_study where table_schema = s.table_schema
loop
for c in select column_name from table_study where table_name = t.table_name
loop
line = line+1;
raise notice '%: update table_study set records = (select count(*) from %.% where table_study.table_name = %;'
, line, s.table_schema, t.table_name, c.column_name;
end loop;
end loop;
end loop;
end;
$$
language plpgsql;
Run procedure
do $$
begin
call insert_data_population();
end;
$$;
RESULTS
1: update table_study set records = (select count(*) from s2.t1 where table_study.table_name = age; 2: update table_study set records = (select count(*) from s2.t1 where table_study.table_name = xyz; 3: update table_study set records = (select count(*) from s1.t1 where table_study.table_name = age; 4: update table_study set records = (select count(*) from s1.t1 where table_study.table_name = xyz;
Notice lines 2 and 3. Each references a column name that does not exist in the table. This results from the FOR structure with the same table name in different schema.
Revision for Single Select statement with Single For loop.
create or replace
procedure insert_data_population()
language plpgsql
as $$
declare
s record;
line int = 0;
begin
for s in select distinct table_schema, table_name, column_name from table_study
loop
line = line+1;
raise notice '%: update table_study set records = (select count(*) from %.% where table_study.table_name = %;'
, line, s.table_schema, s.table_name, s.column_name;
end loop;
end;
$$;
do $$
begin
call insert_data_population();
end;
$$;
RESULTS
1: update table_study set records = (select count(*) from s2.t1 where table_study.table_name = xyz;
2: update table_study set records = (select count(*) from s1.t1 where table_study.table_name = age;
Note: In Postgres DECLARE begins a block. It is not necessary to declared each variable. I would actually consider it bad practice. In theory it could require an end for each declare as each could be considered a nested block. Fortunately Postgres does not require this.
Related
I want to count certain values in all tables of a schema that contain a column that can contain those values.
Was hoping to use a LATERAL join to loop over all tables, but I'm running into issues:
select
fully_qualified_table_name,
cnt
from (
select
'datastore.' || table_name as fully_qualified_table_name
from
information_schema.columns
where
table_schema = 'datastore'
and column_name = 'dss_current_flag'
cross join lateral
select
count(*) as cnt
from
information_schema.fully_qualified_table_name
);
Is this possible?
I'm afraid it is not possible to run dynamic queries using pure SQL. You might wanna check PL/pgSQL instead, e.g.
CREATE OR REPLACE FUNCTION count_records()
RETURNS bigint AS $$
DECLARE
rec record;
res bigint = 0; ct bigint = 0;
BEGIN
FOR rec IN
SELECT table_schema AS sch,table_name AS tb
FROM information_schema.columns
WHERE table_schema = 'datastore' AND column_name = 'dss_current_flag'
LOOP
EXECUTE format($ex$ SELECT count(*) FROM %I.%I $ex$,rec.sch,rec.tb)
INTO ct;
res := res + ct;
END LOOP;
RETURN res;
END $$ LANGUAGE 'plpgsql';
The more flexible approach would be to provide schema and table names as parameters in the function call instead of hard coding it in the function body, such as CREATE FUNCTION count_records(_schema_name text, _table_name text) .., or even the fully qualified table name as a single parameter: CREATE FUNCTION count_records(_qualified_table_name text) ... .
Demo: db<>fiddle
Based on the answer by #jim-jones my final solution was
CREATE TYPE datastore.schema_table_column_counts_type AS (
schema_name text,
table_name text,
column_name text,
value text,
count_p bigint);
CREATE OR REPLACE FUNCTION datastore.count_records_in_schema_where_column_has_value(_schema_name text, _column_name text, _value text)
RETURNS setof datastore.schema_table_column_counts_type language plpgsql AS $$
DECLARE
rec record;
result_record datastore.schema_table_column_counts_type;
BEGIN
FOR rec IN
SELECT
table_schema AS sch,
table_name AS tb,
$2 as cn,
$3 as v
FROM information_schema.columns
WHERE table_schema = $1
AND column_name = $2
LOOP
EXECUTE format($ex$
SELECT
'%1$s' as schema_name,
'%2$s' as table_name,
'%3$s' as column_name,
'%4$s' as value,
count(*)
FROM
%1$s.%2$s
WHERE
%3$s = %4$L
$ex$
, rec.sch, rec.tb, rec.cn, rec.v)
INTO result_record;
return next result_record;
END LOOP;
END $$ ;
SELECT * from datastore.count_records_in_schema_where_column_has_value('datastore', 'dss_current_flag', 'P');
I have a requirement to translate it to an SQL script.
I am using the information schema to get all the columns of a table and print their distinct count.
I was able to get the count, but not able to print the column name properly,
PFA the below code.
I have to pass the value of the "colum_lbl" to my select clause, if I do so it is giving me an group by error.
So I passed the "colum_lbl" within quotes. now all the values of the result has hardcoded 'colum_lbl' as value, I have to replace it with the original value I read from the for Loop
Any other efficient method for this requirement will be very much appreciated. Thanks in advance
do $$
DECLARE
colum_lbl text;
BEGIN
DROP TABLE IF EXISTS tmp_table;
CREATE TABLE tmp_table
(
colnm varchar(50),
cnt integer
);
FOR colum_lbl IN
SELECT distinct column_name
FROM information_schema.columns
WHERE table_schema = 'cva_aggr'
AND table_name = 'employee' AND column_name in ('empid','empnm')
LOOP
EXECUTE
'Insert into tmp_table
SELECT '' || colum_lbl || '',count(distinct ' || colum_lbl || ')
FROM employee ';
END LOOP;
END; $$
I have a timescale db with multiple tables having the same structure.
I want to retrieve the recent row from each table where a value is true.
My logic is to
retrieve all the tablenames for the tables where this condition can be true
loop over list of tablenames and select the rows where the condition is met
I get an syntax error on the FOR loop but I expect that I do more things wrong.
Can someone suggest a solution please? Thank you in advance.
DECLARE
tablename text;
BEGIN
FOR tablename IN
SELECT table_name FROM information_schema.tables
WHERE table_name LIKE 'ohlc%'
LOOP
SELECT WHERE tablename.is_active is TRUE
ORDER BY time_stamp DESC
Limit 1
END LOOP;
END;
translate your problem
find table that have specific column name in schema.
How to find a table having a specific column in postgresql
first condition meet then loop. Function to loop through and select data from multiple tables
most tricky issue is quote_ident.
create or replace function test0()
returns table (_is_active boolean, id int) as
$$
declare tbl text;
begin
for tbl in
select quote_ident( table_name)
from information_schema.columns
where table_schema = 'public'
and table_name ilike 'ohlc%'
and column_name = 'is_active'
loop
return query EXECUTE
'select ' || quote_ident('is_active') || ' , ' || quote_ident('id') || ' from ' || tbl || ' where '|| quote_ident('is_active') ||' is true';
end loop;
end
$$ language plpgsql;
For all tables in X, while X is
select table_name from all_tab_cols
where column_name = 'MY_COLUMN'
and owner='ADMIN'
I need to check, if the column MY_COLUMN has other values than 'Y' or 'N' and if it does, print out the table name.
Pseudo code:
for table in X:
if MY_COLUMN !='Y' or MY_COLUMN !='N':
print table
How to implement that in PL/SQL, with cursors I guess?
Following should work:
DECLARE
counter NUMBER;
cursor c1 is
select table_name from all_tab_cols
where column_name = 'MY_COLUMN'
and owner='ADMIN';
BEGIN
FOR rec IN c1 LOOP
DBMS_OUTPUT.PUT_LINE(rec.table_name);
EXECUTE IMMEDIATE 'select count(*) into :counter from '|| rec.table_name ||' where MY_COLUMN!= ''Y'' and MY_COLUMN!= ''N'' ';
if counter > 0 then
DBMS_OUTPUT.PUT_LINE(rec.table_name);
end if;
END LOOP;
END;
Basically we open a cursor with all tables containing that column, do a count for rows that have different values than Y or N, and if that count > 0, print the table.
The version of Wouter does not work for me.
Had to remove the semicolon (Oracle Database version 11.2.0.4.0 )
DECLARE
counter NUMBER;
BEGIN
select count(*) into counter from LASTID;
dbms_output.put_line(counter);
END;
/
I am pulling all of the column_names (cname1) from a crosstab table that I made. There are thousands of these column names so I combined them into an array. I then want to use dynamic sql (or whatever works) to use those column_names to make an array based off of the records of that same crosstab table. I keep getting the error:
ERROR: missing "LOOP" at end of SQL expression
.
CREATE OR REPLACE FUNCTION mffcu.test_ty_hey()
RETURNS setof record
LANGUAGE plpgsql
AS $function$
Declare
cname1 text;
Begin
for cname1 in select array_agg(column_name) as useme
from(
select column_name::text
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'crosstab_183'
and ordinal_position != 1
) as fin
join mffcu.crosstab_183 a on fin.id = a.id;
loop
sql2 ='select distinct array['|| columnname ||'] from mffcu.crosstab_183';
execute sql2;
end loop;
END;
$function$
I cannot for the life of me figure out why I'm getting this error.
for cname1 in select array_agg(column_name) as useme
from(
select column_name::text
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'crosstab_183'
and ordinal_position != 1
) as fin
join mffcu.crosstab_183 a on fin.id = a.id; --here should not be semicolon!
loop