Multiple joins sql - sql

I have a query which must contain 3 joins, however I get this error:
Syntax error (Missing operator)
The SQL:
SELECT
Agents.[PF],
Agents.[User_ID],
Agents.[First_Name],
Agents.[Second_Name],
Agents.[Third_Name],
Agents.[Family_Name],
Agents.[Gender],
Agents.[Contract_Type],
Agents.Area,
Teams.Team_Name,
Agents.Hiring_Date,
Resignation_Pool.Resignation_Date,
Resignation_Pool.Effective_Date,
Replace(
IIf(Skills.Skill_Directory IS NULL, '', 'Directory, ')
+ IIf(Skills.Skill_TRC IS NULL, '', 'TRC, ')
+ IIf(Skills.Skill_Prepaid IS NULL, '', 'Prepaid, ')
+ IIf(Skills.Skill_Postpaid IS NULL, '', 'Postpaid, ')
+ IIf(Skills.Skill_KeyAccount IS NULL, '', 'KeyAccount, ')
+ IIf(Skills.Skill_Blackberry IS NULL, '', 'Blackberry, ')
+ IIf(Skills.Skill_Broadband IS NULL, '', 'Broadband, ')
+ IIf(Skills.Skill_Concierge IS NULL, '', 'Concierge, ')
+ IIf(Skills.Skill_ISP IS NULL, '', 'ISP, ')
+ IIf(Skills.Skill_Mada IS NULL, '', 'Mada, ')
+ IIf(Skills.Skill_CSCS IS NULL, '', 'CSCS, ')
+ '$', ', $', ''
) AS Skills
FROM Agents
LEFT JOIN Resignation_Pool
ON Agents.PF = Resignation_Pool.PF
LEFT JOIN Teams
ON Agents.Team = Teams.ID
LEFT JOIN Skills
ON Agents.PF = Skills.PF
WHERE Agents.Contract_Status = 'Active'
What is causing this error?

MS Access requires parentheses around JOIN syntax with multiple tables. You will need to use something similar to this:
FROM ((Agents
LEFT JOIN Resignation_Pool
ON Agents.PF = Resignation_Pool.PF)
LEFT JOIN Teams
ON Agents.Team = Teams.ID)
LEFT JOIN Skills
ON Agents.PF = Skills.PF

Related

I need the code to only show me F, and C in the LAB_ORD.Order_Status unfortunately the code isn't recognizing this command and showing me F, C, X, P

SELECT
ISNULL(LAB_ORD.Accession_No, '') AS [Accession Number],
MPI.Medical_Record_Number AS [MRN],
RIGHT(('0000'+ CONVERT(VARCHAR(50), EP.Episode_No)), 4) AS [Episode Number],
ISNULL(EP.Patient_Name, '') AS [Patient Name],
ISNULL(PEO.Birth_Sex, '') AS [Sex],
CAST(PEO.DOB AS varchar) AS [DOB],
ISNULL(PEO.Address1, '') AS [Address 1],
ISNULL(PEO.Address2, '') AS [Address 2],
ISNULL(PEO.City, '') AS [City],
ISNULL(PEO.State_Code, '') AS [State],
ISNULL(PEO.Zip, '') AS [Zip Code],
ISNULL(PEO.Home_Phone, '') AS [Home Phone],
ISNULL(PEO.Work_Phone, '') AS [Work Phone],
ISNULL(PEO.Other_Phone, '') as [Cell Phone],
ISNULL(PEO.Email, '') as [Email],
ISNULL(STAFF.Staff_Name, ISNULL(PPL3.Person_Name, '')) AS [Ordering Provider],
CAST(LAB_ORD.Collection_Date AS varchar) AS [Collection Date],
ISNULL(SPC.Description, '') AS Description, ---Patient Class Description,
ISNULL(LAB_ORD.Location, '') AS [Order Location],
ISNULL(LAB_ORD.Order_Status, '') [Order Status],
ISNULL(LT.Test_Code, '') AS [Test Code],
ISNULL(LT.Report_Name, '') AS [Report Name],
ISNULL(LR.Result, '') AS [Result],
ISNULL(cast(LR.Result_DT as varchar), '') AS [Result Date & Time],
ISNULL(CAST(LAB_ORD.Final_Date AS varchar), '')[Final Date]
FROM
LIS_Orders AS LAB_ORD
LEFT JOIN
LIS_Order_Tests AS OT ON LAB_ORD.OrderID = OT.OrderID
LEFT JOIN
LIS_Facilities AS FAC ON LAB_ORD.FacilityID = FAC.FacilityID
LEFT JOIN
shr_Episodes AS EP ON LAB_ORD.EpisodeID = EP.EpisodeID
LEFT JOIN
shr_Patient_Classes AS SPC ON EP.Patient_ClassID = SPC.Patient_ClassID
LEFT JOIN
shr_MPI AS MPI ON EP.MPIID = MPI.MPIID
LEFT JOIN
glb_People AS PEO ON MPI.Peopleid = PEO.PeopleID
LEFT JOIN
LIS_Results AS LR ON LR.Order_TestID = OT.Order_TestID
LEFT JOIN
LIS_Tests AS LT ON LT.TestID = LR.TestID
LEFT JOIN
shr_Staffs AS STAFF ON STAFF.StaffID = LAB_ORD.Ordering_PhysicianID
LEFT JOIN
glb_People AS PPL3 ON PPL3.PeopleID = STAFF.PersonId
WHERE
Test_Code = '139901'
OR (Test_Code LIKE '164073')
OR (Test_Code LIKE '%sars%')
OR (Test_Code LIKE '%covid%')
AND LAB_ORD.Order_Status IN ('F', 'C')
AND (Result NOT LIKE '.')
I need the code to only show me F, and C in the LAB_ORD.Order_Status unfortunately the code isn't recognizing this command and showing me F, C, X, P
When you user ORs and ANDs always is a good idea to use parentesis to state the order in which they have to be evaluated.
In your case, the Where clause probably should be written this way:
WHERE
(
Test_Code='139901'
or (Test_Code like '164073')
or (Test_Code like '%sars%')
or (Test_Code like '%covid%')
)
and LAB_ORD.Order_Status IN ('F','C')
and (Result not like '.')

Formula to Divide Total of Columns and Format Result Not Valid

I have the following SQL code below that is returning an error message stating that the AvgPYs column is not valid. This is also impacting my Auth column. Is there an issue with my formula? Any help is appreciated.
SELECT [tblCeiling].[Proj Code], [tblCeiling].[Act Code], [tblCeiling].[Cost Ctr], [tblCeiling].[Date], [tblCeiling].[Ref2], [tblCeiling].[Analyst], [tblCeiling].[Type], [tblCeiling].[B or O], [tblCeiling].[Jul], [tblCeiling].[Aug],
[tblCeiling].[Sep], [tblCeiling].[Oct], [tblCeiling].[Nov], [tblCeiling].[Dec], [tblCeiling].[Jan], [tblCeiling].[Feb], [tblCeiling].[Mar], [tblCeiling].[Apr], [tblCeiling].[May], [tblCeiling].[Jun], [tblCeiling].[Perm], [tblCeiling].[Temp], [tblCeiling].[LimitedTerm],
[tblCeiling].[LTDate], [tblCeiling].[Sal_Rate], [tblCeiling].[New], [Perm] + [Temp] + [LimitedTerm] AS Monthly, Format(([tblCeiling].[Jul] + [tblCeiling].[Aug] + [tblCeiling].[Sep] + [tblCeiling].[Oct] + [tblCeiling].[Nov] + [tblCeiling].[Dec] + [tblCeiling].[Jan] +
[tblCeiling].[Feb] + [tblCeiling].[Mar] + [tblCeiling].[Apr] + [tblCeiling].[May] + [tblCeiling].[Jun]) / 12, '0.0####') AS AvgPYs,
((([Sal_Rate] * [AvgPYs]) * 1000) / 1000) AS Auth, [Dollar Adj] + [Auth] AS Budget, [tblCeiling].[Import], [tblCeiling].[Dollar Adj], [tblCeiling].[OngoingOrOneTime], [tblCeiling].[OneTimeEndingDate]
FROM (SELECT DISTINCT *
FROM [tblactcode]) AS [tblactcode] RIGHT JOIN
(SELECT DISTINCT *
FROM [tblCeiling]) AS [tblCeiling] ON [tblactcode].[Act Code] = [tblCeiling].[Act Code]
WHERE ((([tblCeiling].[Jul]) = iif([jul] IS NULL, 0, [jul])) AND (([tblCeiling].[Aug]) = iif([aug] IS NULL, 0, [aug])) AND (([tblCeiling].[Sep]) = iif([sep] IS NULL, 0, [sep])) AND (([tblCeiling].[Oct]) = iif([oct] IS NULL, 0, [oct])) AND (([tblCeiling].[Nov]) = iif([nov] IS NULL,
0, [nov])) AND (([tblCeiling].[Dec]) = iif([dec] IS NULL, 0, [dec])) AND (([tblCeiling].[Jan]) = iif([jan] IS NULL, 0, [jan])) AND (([tblCeiling].[Feb]) = iif([feb] IS NULL, 0, [feb])) AND (([tblCeiling].[Mar]) = iif([mar] IS NULL, 0, [mar])) AND (([tblCeiling].[Apr])
= iif([apr] IS NULL, 0, [apr])) AND (([tblCeiling].[May]) = iif([may] IS NULL, 0, [may])) AND (([tblCeiling].[Jun]) = iif([jun] IS NULL, 0, [jun])) AND (([tblCeiling].[Import]) = 0))
ORDER BY [tblCeiling].[Proj Code], [tblCeiling].[Cost Ctr], [tblCeiling].[Date]
Here is the specific line I'm referring to:
Format(([tblCeiling].[Jul] + [tblCeiling].[Aug] + [tblCeiling].[Sep] + [tblCeiling].[Oct] + [tblCeiling].[Nov] + [tblCeiling].[Dec] + [tblCeiling].[Jan] +
[tblCeiling].[Feb] + [tblCeiling].[Mar] + [tblCeiling].[Apr] + [tblCeiling].[May] + [tblCeiling].[Jun]) / 12, '0.0####') AS AvgPYs,
The specific issue you have run into is attempting to use a calculation in the same scope you are calculating it - which isn't possible. You can only access a calculated value in an outer query.
Or a neat solution is to use CROSS APPLY which allows you to reuse a calculation as follows. In general this is done as:
select -- existing columns before AvgPYs
, AvgPYs
-- , some formula which depends on AvgPYs
from (
-- existing query
) C -- C is an acceptable short alias for Ceiling
cross apply (
values (formula)
) X (AvgPYs)
In your case I think the following is correct:
SELECT C.[Proj Code], C.[Act Code], C.[Cost Ctr], C.[Date], C.[Ref2], C.[Analyst], C.[Type], C.[B or O], C.[Jul], C.[Aug]
, C.[Sep], C.[Oct], C.[Nov], C.[Dec], C.[Jan], C.[Feb], C.[Mar], C.[Apr], C.[May], C.[Jun], C.[Perm], C.[Temp], C.[LimitedTerm]
, C.[LTDate], C.[Sal_Rate], C.[New], [Perm] + [Temp] + [LimitedTerm] AS Monthly
, X.AvgPYs
, Y.Auth
, [Dollar Adj] + Y.Auth AS Budget, C.[Import], C.[Dollar Adj], C.[OngoingOrOneTime], C.[OneTimeEndingDate]
FROM (
SELECT DISTINCT *
FROM [tblactcode]
) AS AC
RIGHT JOIN (
SELECT DISTINCT *
FROM [tblCeiling]
) AS C ON AC.[Act Code] = C.[Act Code]
CROSS APPLY (
VALUES (Format((C.[Jul] + C.[Aug] + C.[Sep] + C.[Oct] + C.[Nov] + C.[Dec] + C.[Jan] +
C.[Feb] + C.[Mar] + C.[Apr] + C.[May] + C.[Jun]) / 12, '0.0####'))
) AS X (AvgPYs)
CROSS APPLY (
VALUES (((([Sal_Rate] * X.AvgPYs) * 1000) / 1000))
) Y (Auth)
WHERE (((C.[Jul]) = iif([jul] IS NULL, 0, [jul])) AND ((C.[Aug]) = iif([aug] IS NULL, 0, [aug])) AND ((C.[Sep]) = iif([sep] IS NULL, 0, [sep])) AND ((C.[Oct]) = iif([oct] IS NULL, 0, [oct])) AND ((C.[Nov]) = iif([nov] IS NULL,
0, [nov])) AND ((C.[Dec]) = iif([dec] IS NULL, 0, [dec])) AND ((C.[Jan]) = iif([jan] IS NULL, 0, [jan])) AND ((C.[Feb]) = iif([feb] IS NULL, 0, [feb])) AND ((C.[Mar]) = iif([mar] IS NULL, 0, [mar])) AND ((C.[Apr])
= iif([apr] IS NULL, 0, [apr])) AND ((C.[May]) = iif([may] IS NULL, 0, [may])) AND ((C.[Jun]) = iif([jun] IS NULL, 0, [jun])) AND ((C.[Import]) = 0)
)
ORDER BY C.[Proj Code], C.[Cost Ctr], C.[Date];
Note: A key purpose of a table alias is to have a short reference to the table. See how much easier it is to read with shorter aliases.

Duplicates in SQL when using DISTINCT

Not sure why I keep getting duplicates with this query. It should be easy, but for some reason I just cannot figure it out.
This is my query:
SELECT DISTINCT
STUFF((SELECT
'; ' +
CASE
WHEN staff.lastname IS NOT NULL
THEN UPPER(REPLACE(RTRIM(staff.lastname), ' ', '') + ', ' + RTRIM(staff.firstname))
ELSE UPPER('Not Assigned')
END
FROM
ca_case_assign ca
JOIN
staff ON staff.username = ca.staffusername
JOIN
tbl_case c on ca.appid = c.col_caseid
WHERE
ca.clientusername = c.col_username
FOR XML PATH('')), 1, 1, '') [CaseManager]
and this is the result that I get:
LOCALSTAFF, THERESA; LOCALSTAFF, THERESA; O'MALLEY, ELLEN; STAFF, STATE; STAFF, STATE; STAFF, STATE; STAFF, STATE; STAFF, STATE; STAFF, BC; STAFF, BC; STAFF, BC; STAFF, BC; STAFF, BC; STAFF, BC; STAFF, BC;
Which is obviously incorrect.
Please help, thank you.
The inner query should have the distinct instead:
SELECT
STUFF((SELECT DISTINCT
'; ' +
CASE
WHEN staff.lastname IS NOT NULL
THEN UPPER(REPLACE(RTRIM(staff.lastname), ' ', '') + ', ' + RTRIM(staff.firstname))
ELSE UPPER('Not Assigned')
END
FROM
ca_case_assign ca
JOIN
staff ON staff.username = ca.staffusername
JOIN
tbl_case c on ca.appid = c.col_caseid
WHERE
ca.clientusername = c.col_username
FOR XML PATH('')), 1, 1, '') [CaseManager]

Get row counts and distinct row counts for every table in a Snowflake database?

I am trying to create a data quality dashboard, showing every table in my Snowflake database, the row count, the distinct row count, and the number of duplicates. The table I want should look like this:
table_name | row_count | distinct_row_count | duplicates
————————————————————————————————————————————————————————
table_a | 1,372 | 1,370 | 2
table_b | 4,735 | 4,735 | 0
I've been able to get the table name and row count using information_schema.tables. I'm trying to figure out how to get distinct counts for all of these tables. The primary key column for every table is different. On some tables it will be a user_id, on others a session_id, etc.
I've looked through the snowflake documentation for built in functions that could help. I've explored the information/usage schemas, etc. I'm not sure if a stored procedure would help here (I haven't used a lot of those).
In python or another language, I'd loop through every table and calculate what I need. Is there a way to do this in SQL?
create or replace TABLE DEMO_DB.PUBLIC.SNOWBALL (
TABLE_NAME VARCHAR(314),
TOTAL_ROWS NUMBER(18,0),
TABLE_LAST_ALTERED TIMESTAMP_LTZ(9),
TABLE_CREATED TIMESTAMP_LTZ(9),
TABLE_BYTES NUMBER(18,0),
COL_NAME ARRAY,
COL_DATA_TYPE ARRAY,
COL_HLL ARRAY,
COL_NULL_CNT ARRAY,
COL_MIN ARRAY,
COL_MAX ARRAY,
COL_TOP ARRAY,
COL_AVG ARRAY,
COL_MODE ARRAY,
COL_STDDEV ARRAY,
COL_VAR_POP ARRAY,
COL_AVG_LENGTH ARRAY,
STATS_RUN_DATE_TIME TIMESTAMP_LTZ(9)
);
create or replace view SNOWBALL_COLUMNS as
select
concat_ws('.', table_catalog, table_schema, table_name) as full_table_name,
*
from (
select * from demo_db.information_schema.columns
union
select * from snowflake_sample_data.information_schema.columns
union
select * from util_db.information_schema.columns
);
create or replace view SNOWBALL_TABLES as
select
concat_ws('.', table_catalog, table_schema, table_name) as full_table_name,
*
from (
select * from demo_db.information_schema.tables
union
select * from snowflake_sample_data.information_schema.tables
union
select * from util_db.information_schema.tables
);
CREATE OR REPLACE PROCEDURE DEMO_DB.PUBLIC.SNOWBALL(
db_name STRING,
schema_name STRING,
snowball_table STRING,
max_age_days FLOAT,
limit FLOAT
)
RETURNS VARIANT
LANGUAGE JAVASCRIPT
COMMENT = 'Collects table and column stats.'
EXECUTE AS OWNER
AS
$$
var validLimit = Math.max(LIMIT, 0); // prevent SQL syntax error caused by negative numbers
var sqlGenerateInserts = `
WITH snowball_tables AS (
SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.TABLES
),
snowball_columns AS (
SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.COLUMNS
),
snowball AS (
SELECT table_name, MAX(stats_run_date_time) AS stats_run_date_time
FROM IDENTIFIER(?) -- <<SNOWBALL_TABLE>> table
GROUP BY table_name
)
SELECT full_table_name, aprox_row_count,
CONCAT (
'INSERT INTO IDENTIFIER(''', ?, ''') ', -- SNOWBALL table
'(table_name,total_rows,table_last_altered,table_created,table_bytes,col_name,',
'col_data_type,col_hll,col_avg_length,col_null_cnt,col_min,col_max,col_top,col_mode,col_avg,stats_run_date_time)',
'SELECT ''', full_table_name, ''' AS table_name, ',
table_stats_sql,
', ARRAY_CONSTRUCT( ', col_name, ') AS col_name',
', ARRAY_CONSTRUCT( ', col_data_type, ') AS col_data_type',
', ARRAY_CONSTRUCT( ', col_hll, ') AS col_hll',
', ARRAY_CONSTRUCT( ', col_avg_length, ') AS col_avg_length',
', ARRAY_CONSTRUCT( ', col_null_cnt, ') AS col_null_cnt',
', ARRAY_CONSTRUCT( ', col_min, ') AS col_min',
', ARRAY_CONSTRUCT( ', col_max, ') AS col_max',
', ARRAY_CONSTRUCT( ', col_top, ') AS col_top',
', ARRAY_CONSTRUCT( ', col_MODE, ') AS col_MODE',
', ARRAY_CONSTRUCT( ', col_AVG, ') AS col_AVG',
', CURRENT_TIMESTAMP() AS stats_run_date_time ',
' FROM ', quoted_table_name
) AS insert_sql
FROM (
SELECT
tbl.full_table_name,
tbl.row_count AS aprox_row_count,
CONCAT ( '"', col.table_catalog, '"."', col.table_schema, '"."', col.table_name, '"' ) AS quoted_table_name,
CONCAT (
'COUNT(1) AS total_rows,''',
IFNULL( tbl.last_altered::VARCHAR, 'NULL'), ''' AS table_last_altered,''',
IFNULL( tbl.created::VARCHAR, 'NULL'), ''' AS table_created,',
IFNULL( tbl.bytes::VARCHAR, 'NULL'), ' AS table_bytes' ) AS table_stats_sql,
LISTAGG (
CONCAT ('''', col.full_table_name, '.', col.column_name, '''' ), ', '
) AS col_name,
LISTAGG ( CONCAT('''', col.data_type, '''' ), ', ' ) AS col_data_type,
LISTAGG ( CONCAT( ' HLL(', '"', col.column_name, '"',') ' ), ', ' ) AS col_hll,
LISTAGG ( CONCAT( ' AVG(ZEROIFNULL(LENGTH(', '"', col.column_name, '"','))) ' ), ', ' ) AS col_avg_length,
LISTAGG ( CONCAT( ' SUM( IFF( ', '"', col.column_name, '"',' IS NULL, 1, 0) ) ' ), ', ') AS col_null_cnt,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MODE(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_MODE,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MIN(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_min,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MAX(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_max,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' AVG(', '"', col.column_name,'"',') ' ), 'NULL' ), ', ' ) AS col_AVG,
LISTAGG ( CONCAT ( ' APPROX_TOP_K(', '"', col.column_name, '"', ', 100, 10000)' ), ', ' ) AS col_top
FROM snowball_tables tbl JOIN snowball_columns col ON col.full_table_name = tbl.full_table_name
LEFT OUTER JOIN snowball sb ON sb.table_name = tbl.full_table_name
WHERE (tbl.table_catalog, tbl.table_schema) = (?, ?)
AND ( sb.table_name IS NULL OR sb.stats_run_date_time < TIMESTAMPADD(DAY, - FLOOR(?), CURRENT_TIMESTAMP()) )
--AND tbl.row_count > 0 -- NB: also excludes views (table_type = 'VIEW')
GROUP BY tbl.full_table_name, aprox_row_count, quoted_table_name, table_stats_sql, stats_run_date_time
ORDER BY stats_run_date_time NULLS FIRST )
LIMIT ` + validLimit;
var tablesAnalysed = [];
var currentSql;
try {
currentSql = sqlGenerateInserts;
var generateInserts = snowflake.createStatement( {
sqlText: currentSql,
binds: [
`"${DB_NAME}".information_schema.tables`,
`"${DB_NAME}".information_schema.columns`,
SNOWBALL_TABLE, SNOWBALL_TABLE,
DB_NAME, SCHEMA_NAME, MAX_AGE_DAYS, LIMIT
]
} );
var insertStatements = generateInserts.execute();
// loop over generated INSERT statements and execute them
while (insertStatements.next()) {
var tableName = insertStatements.getColumnValue('FULL_TABLE_NAME');
currentSql = insertStatements.getColumnValue('INSERT_SQL');
var insertStatement = snowflake.createStatement( {
sqlText: currentSql,
binds: [ SNOWBALL_TABLE ]
} );
var insertResult = insertStatement.execute();
tablesAnalysed.push(tableName);
}
return { result: "SUCCESS", analysedTables: tablesAnalysed };
}
catch (err) {
return {
error: err,
analysedTables: tablesAnalysed,
sql: currentSql
};
}
$$;
call DEMO_DB.PUBLIC.SNOWBALL(
'SNOWFLAKE_SAMPLE_DATA',
'TPCH_SF1',
'DEMO_DB.PUBLIC.SNOWBALL',
1, -- evals tables not analysed for x days -- first time you run this doesn't matter.
1000 -- limits # of tables analysed
);
CREATE OR REPLACE PROCEDURE DEMO_DB.PUBLIC.SNOWBALL(
db_name STRING,
schema_name STRING,
snowball_table STRING,
max_age_days FLOAT,
limit FLOAT
)
RETURNS VARIANT
LANGUAGE JAVASCRIPT
COMMENT = 'Collects table and column stats.'
EXECUTE AS OWNER
AS
$$
var validLimit = Math.max(LIMIT, 0); // prevent SQL syntax error caused by negative numbers
var sqlGenerateInserts = `
WITH snowball_tables AS (
SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.TABLES
),
snowball_columns AS (
SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.COLUMNS
),
snowball AS (
SELECT table_name, MAX(stats_run_date_time) AS stats_run_date_time
FROM IDENTIFIER(?) -- <<SNOWBALL_TABLE>> table
GROUP BY table_name
)
SELECT full_table_name, aprox_row_count,
CONCAT (
'INSERT INTO IDENTIFIER(''', ?, ''') ', -- SNOWBALL table
'(table_name,total_rows,table_last_altered,table_created,table_bytes,col_name,',
'col_data_type,col_hll,col_avg_length,col_null_cnt,col_min,col_max,col_top,col_mode,col_avg,stats_run_date_time)',
'SELECT ''', full_table_name, ''' AS table_name, ',
table_stats_sql,
', ARRAY_CONSTRUCT( ', col_name, ') AS col_name',
', ARRAY_CONSTRUCT( ', col_data_type, ') AS col_data_type',
', ARRAY_CONSTRUCT( ', col_hll, ') AS col_hll',
', ARRAY_CONSTRUCT( ', col_avg_length, ') AS col_avg_length',
', ARRAY_CONSTRUCT( ', col_null_cnt, ') AS col_null_cnt',
', ARRAY_CONSTRUCT( ', col_min, ') AS col_min',
', ARRAY_CONSTRUCT( ', col_max, ') AS col_max',
', ARRAY_CONSTRUCT( ', col_top, ') AS col_top',
', ARRAY_CONSTRUCT( ', col_MODE, ') AS col_MODE',
', ARRAY_CONSTRUCT( ', col_AVG, ') AS col_AVG',
', CURRENT_TIMESTAMP() AS stats_run_date_time ',
' FROM ', quoted_table_name
) AS insert_sql
FROM (
SELECT
tbl.full_table_name,
tbl.row_count AS aprox_row_count,
CONCAT ( '"', col.table_catalog, '"."', col.table_schema, '"."', col.table_name, '"' ) AS quoted_table_name,
CONCAT (
'COUNT(1) AS total_rows,''',
IFNULL( tbl.last_altered::VARCHAR, 'NULL'), ''' AS table_last_altered,''',
IFNULL( tbl.created::VARCHAR, 'NULL'), ''' AS table_created,',
IFNULL( tbl.bytes::VARCHAR, 'NULL'), ' AS table_bytes' ) AS table_stats_sql,
LISTAGG (
CONCAT ('''', col.full_table_name, '.', col.column_name, '''' ), ', '
) AS col_name,
LISTAGG ( CONCAT('''', col.data_type, '''' ), ', ' ) AS col_data_type,
LISTAGG ( CONCAT( ' HLL(', '"', col.column_name, '"',') ' ), ', ' ) AS col_hll,
LISTAGG ( CONCAT( ' AVG(ZEROIFNULL(LENGTH(', '"', col.column_name, '"','))) ' ), ', ' ) AS col_avg_length,
LISTAGG ( CONCAT( ' SUM( IFF( ', '"', col.column_name, '"',' IS NULL, 1, 0) ) ' ), ', ') AS col_null_cnt,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MODE(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_MODE,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MIN(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_min,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MAX(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_max,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' AVG(', '"', col.column_name,'"',') ' ), 'NULL' ), ', ' ) AS col_AVG,
LISTAGG ( CONCAT ( ' APPROX_TOP_K(', '"', col.column_name, '"', ', 100, 10000)' ), ', ' ) AS col_top
FROM snowball_tables tbl JOIN snowball_columns col ON col.full_table_name = tbl.full_table_name
LEFT OUTER JOIN snowball sb ON sb.table_name = tbl.full_table_name
WHERE (tbl.table_catalog, tbl.table_schema) = (?, ?)
AND ( sb.table_name IS NULL OR sb.stats_run_date_time < TIMESTAMPADD(DAY, - FLOOR(?), CURRENT_TIMESTAMP()) )
--AND tbl.row_count > 0 -- NB: also excludes views (table_type = 'VIEW')
GROUP BY tbl.full_table_name, aprox_row_count, quoted_table_name, table_stats_sql, stats_run_date_time
ORDER BY stats_run_date_time NULLS FIRST )
LIMIT ` + validLimit;
var tablesAnalysed = [];
var currentSql;
try {
currentSql = sqlGenerateInserts;
var generateInserts = snowflake.createStatement( {
sqlText: currentSql,
binds: [
`"${DB_NAME}".information_schema.tables`,
`"${DB_NAME}".information_schema.columns`,
SNOWBALL_TABLE, SNOWBALL_TABLE,
DB_NAME, SCHEMA_NAME, MAX_AGE_DAYS, LIMIT
]
} );
var insertStatements = generateInserts.execute();
// loop over generated INSERT statements and execute them
while (insertStatements.next()) {
var tableName = insertStatements.getColumnValue('FULL_TABLE_NAME');
currentSql = insertStatements.getColumnValue('INSERT_SQL');
var insertStatement = snowflake.createStatement( {
sqlText: currentSql,
binds: [ SNOWBALL_TABLE ]
} );
var insertResult = insertStatement.execute();
tablesAnalysed.push(tableName);
}
return { result: "SUCCESS", analysedTables: tablesAnalysed };
}
catch (err) {
return {
error: err,
analysedTables: tablesAnalysed,
sql: currentSql
};
}
$$;
I've done somewhat of an overkill solution solving this.
SQL used supplied ... basically does everything you've asked for plus top 100 values, min,max, stddev, avg, null % to the column level for every table in ALL databases.
Oh yes and works out ALL PK/FK's returning not just the PK but the description instead.
Runs in seconds ... All sql available from a post in the community snowflake. Hit me up if you want the really smart stuff :-)
SQL here :
https://community.snowflake.com/s/group/0F90Z000000IOX5SAO/general-snowflake-community-help

Error "ORA-00936: missing expression" for below SQL statement

Please help to check why below SQL statement got error of "ORA-00936: missing expression" when executed on Oracle 11g R2.
select part.*,
decode(pv.dspuom, null, part.untqty, part.untqty / prtftp_dtl.untqty) dsp_untqty,
decode(pv.dspuom, null, pv.stkuom, pv.dspuom) untqty_uom,
decode(pv.dspuom, null, cast(null as int), pv.stkuom, cast(null as int), mod(part.untqty, prtftp_dtl.untqty)) rem_untqty,
decode(pv.dspuom, null, null, pv.stkuom, null, pv.stkuom) rem_untqty_uom
from (select #select_on:raw sum(untqty) untqty,
sum(catch_qty) catch_qty,
invadj.prtnum fixed_prtnum,
invadj.prt_client_id fixed_prt_client_id,
invadj.wh_id fixed_wh_id,
invadj.play_prc_id
from invadj
where exists(select 'x'
from prtmst_view
where prtmst_view.prtnum = invadj.prtnum
and prtmst_view.prt_client_id = invadj.prt_client_id
and prtmst_view.wh_id = invadj.wh_id
and prtmst_view.prdflg = 1)
and #+moddte:date
and #+invadj.play_prc_id
and #+invadj.prtnum
and #+invadj.prt_client_id
and wh_id = #wh_id
and #*
group by #result_string:raw,
invadj.prtnum,
invadj.prt_client_id,
invadj.wh_id,
invadj.play_prc_id
having sum(untqty) <> 0
or sum(catch_qty) <> 0) part left
join prtmst_view pv
on pv.prtnum = part.fixed_prtnum
and pv.prt_client_id = part.fixed_prt_client_id
and pv.wh_id = part.fixed_wh_id left
join prtftp
on prtftp.prtnum = pv.prtnum
and prtftp.prt_client_id = pv.prt_client_id
and prtftp.wh_id = pv.wh_id
and prtftp.defftp_flg = 1 left
join prtftp_dtl
on prtftp.prtnum = prtftp_dtl.prtnum
and prtftp.ftpcod = prtftp_dtl.ftpcod
and prtftp.prt_client_id = prtftp_dtl.prt_client_id
and prtftp.wh_id = prtftp_dtl.wh_id
and prtftp_dtl.uomcod = nvl(pv.dspuom, pv.stkuom)
I think the syntax may be off a bit.
Look at this line below
FROM (SELECT # select_on:raw sum(untqty) untqty,
The # character seems odd