I'm trying to simplify a column in BigQuery by using BigQuery extract on it but I am having a bit of an issue.
Here are two examples of the data I'm extracting from:
dc_pre=CLXk_aigyOMCFQb2dwod4dYCZw;gtm=2wg7f1;gcldc=;gclaw=;gac=UA-5815571-8:;auiddc=;u1=OVERDRFT;u2=undefined;u3=undefined;u4=undefined;u5=SSA;u6=undefined;u7=na;u8=undefined;u9=undefined;u10=undefined;u11=undefined;~oref=https://www.online.bank.co.za/onlineContent/ga_bridge.html
dc_pre=COztt4-tyOMCFcji7Qod440PCw;gtm=2wg7f1;gcldc=;gclaw=;gac=UA-5815571-8:;auiddc=;u1=DDA13;u2=undefined;u3=undefined;u4=undefined;u5=SSA;u6=undefined;u7=na;u8=undefined;u9=undefined;u10=undefined;u11=undefined;~oref=https://www.online.support.co.za/onlineContent/ga_bridge.html
I want to extract the portion between ;u1= and ;u2
Running the following legacy SQL Query
SELECT
Date(Event_Time),
Activity_ID,
REGEXP_EXTRACT(Other_Data, r'(?<=u1=)(.*\n?)(?=;u2)')
FROM
[sprt-data-transfer:dtftv2_sprt.p_activity_166401]
WHERE
Activity_ID in ('8179851')
AND Site_ID_DCM NOT IN ('2134603','2136502','2539719','2136304','2134604','2134602','2136701','2378406')
AND Event_Time BETWEEN 1563746400000000 AND 1563832799000000
I get the error...
Failed to parse regular expression "(?<=u1=)(.*\n?)(?=;u2)": invalid
perl operator: (?<
And this is where my talent runs out, is the error being caused because I'm using legacy SQL? Or is an unsupported format for REGEX?
Just tried this, and it worked, but with "Standart SQL" enabled.
select
other_data,
regexp_extract(other_data, ';u1=(.+?);u2') as some_part
from
unnest([
'dc_pre=CLXk_aigyOMCFQb2dwod4dYCZw;gtm=2wg7f1;gcldc=;gclaw=;gac=UA-5815571-8:;auiddc=;u1=OVERDRFT;u2=undefined;u3=undefined;u4=undefined;u5=SSA;u6=undefined;u7=na;u8=undefined;u9=undefined;u10=undefined;u11=undefined;~oref=https://www.online.bank.co.za/onlineContent/ga_bridge.html',
'dc_pre=COztt4-tyOMCFcji7Qod440PCw;gtm=2wg7f1;gcldc=;gclaw=;gac=UA-5815571-8:;auiddc=;u1=DDA13;u2=undefined;u3=undefined;u4=undefined;u5=SSA;u6=undefined;u7=na;u8=undefined;u9=undefined;u10=undefined;u11=undefined;~oref=https://www.online.support.co.za/onlineContent/ga_bridge.html'
]) as other_data
Not using regex but it still works...
with test as (
select 1 as id, 'dc_pre=CLXk_aigyOMCFQb2dwod4dYCZw;gtm=2wg7f1;gcldc=;gclaw=;gac=UA-5815571-8:;auiddc=;u1=OVERDRFT;u2=undefined;u3=undefined;u4=undefined;u5=SSA;u6=undefined;u7=na;u8=undefined;u9=undefined;u10=undefined;u11=undefined;~oref=https://www.online.bank.co.za/onlineContent/ga_bridge.html' as my_str UNION ALL
select 2 as id, 'dc_pre=COztt4-tyOMCFcji7Qod440PCw;gtm=2wg7f1;gcldc=;gclaw=;gac=UA-5815571-8:;auiddc=;u1=DDA13;u2=undefined;u3=undefined;u4=undefined;u5=SSA;u6=undefined;u7=na;u8=undefined;u9=undefined;u10=undefined;u11=undefined;~oref=https://www.online.support.co.za/onlineContent/ga_bridge.html'
),
temp as (
select
id,
split(my_str,';') as items
from test
),
flattened as (
select
id,
split(i,'=')[SAFE_OFFSET(0)] as left_side,
split(i,'=')[SAFE_OFFSET(1)] as right_side
from temp
left join unnest(items) i
)
select * from flattened
where left_side = 'u1'
I am using SQL Developer (and it must be with SQL Developer), I need to take a string that looks like XML data but it really is just a string and display the data into a table. The data is from a large table that has no numbers in some user Id's and some that has no numbers in Job Ids but the XML like tags are still there. Again, it is not XML just made to look like XML so no XML commands will work.
Data:
<UserId>1234567</UserId><JobId>1234567890123</JobId><Date>Wed May 09 13:08:24 EDT 2018</Date>
Here is what I have so far:
select company_id, location_id,
regexp_substr(xml_provision_responses,'UserId>([[:digit:]]+<?)') as USER_Id,
regexp_substr(xml_provision_responses,'UserPitId>([[:digit:]]+<?)') as JOB_ID
From Entitymgr.Cr_Response_Matrix
Where CAST(regexp_replace(SUBSTR(xml_provision_responses,-24,4), '[^0-9]','')as varchar(20))='2018'
and company_Id = 9876543 and location_Id = 9876543210987;
The first Where condition was just to limit the data pull to just this year and the second Where condition was just added to just focus on one data point so that part does not matter.
Expected output:
COMPANY_ID: 9876543
LOCATION_Id: 9876543210987
USER_ID: 1234567
JOB_ID: 1234567890123
========================================
Actual Output:
COMPANY_ID: 9876543
LOCATION_Id: 9876543210987
USER_ID: UserId>1234567<
JOB_ID: JobId>1234567890123<
========================================
I want to only display the number portion of the string that are between the > < tags. And if there is no number between the tags to put null or the word missing into the table in that specific column.
You need to use some more parameters of regexp_substr:
select regexp_substr(xml_provision_responses, '(<JobId>)(.*)(</JobId>)', 1, 1, 'i', 2),
regexp_substr(xml_provision_responses, '(<UserId>)(.*)(</UserId>)', 1, 1, 'i', 2)
...
The idea is to divide the matching string into 3 parts:
the tag opening ((<UserId>))
the content ((.*))
the tag closure ((</UserId>))
and then only get the second matching subexpression (see the parameter 2 in the function calls.
I'm aware that you said (and stressed) that the string is not XML, but what you've shown does seem to be enough like XML to let you use the XML functions in the database anyway:
-- cte for your data
with cr_response_matrix (company_id, location_id, xml_provision_responses) as (
select 9876543, 9876543210987,
'<UserId>1234567</UserId><JobId>1234567890123</JobId><Date>Wed May 09 13:08:24 EDT 2018</Date>'
from dual
)
-- actual query
select crm.company_id, crm.location_id, xml.user_id, xml.job_id
from cr_response_matrix crm
cross join xmltable ('/root'
passing xmltype('<root>' || xml_provision_responses || '</root>')
columns user_id number path 'UserId',
job_id number path 'JobId',
tsz_str varchar2(28) path 'Date'
) xml
where substr(tsz_str, -4) = '2018';
COMPANY_ID LOCATION_ID USER_ID JOB_ID
--------------- --------------- --------------- ---------------
9876543 9876543210987 1234567 1234567890123
Of course, your actual strings might have other stuff that makes this approach invalid.
You could add an XML header as well as a dummy root node:
select crm.company_id, crm.location_id, xml.user_id, xml.job_id
from cr_response_matrix crm
cross join xmltable ('/root'
passing xmltype('<?xml version="1.0" encoding="UTF-8" standalone="no" ?><root>'
|| xml_provision_responses || '</root>')
columns user_id number path 'UserId',
job_id number path 'JobId',
tsz_str varchar2(28) path 'Date'
) xml
where extract(year from to_timestamp_tz(replace(xml.tsz_str, ' E', ' US/Eastern E'),
'Dy Mon DD HH24:MI:SS TZR TZD YYYY', 'NLS_DATE_LANGUAGE=ENGLISH')) = 2018;
Just for fun I've also converted the Date value to a full timestamp with time zone to extract the actual year instead of using substr().
But this is all academic if the data isn't consistently as close to XML as your example suggested, and the adjusted regular expressions are reliable.
Can't say you didn't warn me...
I am trying to setup a CTE table with a series of quarterly dates.
The query returns [42601] ERROR: syntax error at or near "values" Position: 38
with q(qqyy, firstday, lastday) as (
values
('Q4_10', '09-30-2010', '12-31-2010'),
('Q1_11', '12-31-2010', '03-31-2011'),
('Q2_11', '03-31-2011', '06/30/2011'),
('Q3_11', '06/30/2011', '09/30/2011'),
('Q4_11', '09/30/2011', '12/31/2011'),
('Q1_12', '12/31/2011', '03/31/2012'),
('Q2_12', '03/31/2012', '06/30/2012'),
('Q3_12', '06/30/2012', '09/30/2012'),
('Q4_12', '09/30/2012', '12/31/2012'),
('Q1_13', '12/31/2012', '03/31/2013'),
('Q2_13', '03/31/2013', '06/30/2013'),
('Q3_13', '06/30/2013', '09/30/2013'),
('Q4_13', '09/30/2013', '12/31/2013'),
('Q1_14', '12/31/2013', '03/31/2014'),
('Q2_14', '03/31/2014', '06/30/2014'),
('Q3_14', '06/30/2014', '09/30/2014'),
('Q4_14', '09/30/2014', '12/31/2014'),
('Q1_15', '12/31/2014', '03/31/2015'),
('Q2_15', '03/31/2015', '06/30/2015'),
('Q3_15', '06/30/2015', '09/30/2015'),
('Q4_15', '09/30/2015', '12/31/2015'),
('Q1_16', '12/31/2015', '03/31/2016'),
('Q2_16', '03/31/2016', '06/30/2016'),
('Q3_16', '06/30/2016', '09/30/2016'),
('Q4_16', '09/30/2016', '12/31/2016')
)
SELECT q.qqyy, cobrand_id, sum(calc)
into temp_08.cmg_calc
from temp_08.cmg s
join q on
s.transaction_date >= q.firstday
and s.transaction_date <= q.lastday
GROUP BY q.qqyy, cobrand_id;
It appears that the above query is getting stuck on "values" due to Redshift using an older version of postgresql (http://docs.aws.amazon.com/redshift/latest/dg/c_unsupported-postgresql-features.html). But for some reason the below query that also uses "values" works fine. Any idea how I can get the above query to work using redshift?
create table temp_08.cmgquarters (
quarter_col text
, date_from date
, date_to date
);
insert into temp_08.cmgquarters
values
('Q4_10', '09-30-2010', '12-31-2010'),
('Q1_11', '12-31-2010', '03-31-2011'),
('Q2_11', '03-31-2011', '06/30/2011'),
('Q3_11', '06/30/2011', '09/30/2011'),
('Q4_11', '09/30/2011', '12/31/2011'),
('Q1_12', '12/31/2011', '03/31/2012'),
('Q2_12', '03/31/2012', '06/30/2012'),
('Q3_12', '06/30/2012', '09/30/2012'),
('Q4_12', '09/30/2012', '12/31/2012'),
('Q1_13', '12/31/2012', '03/31/2013'),
('Q2_13', '03/31/2013', '06/30/2013'),
('Q3_13', '06/30/2013', '09/30/2013'),
('Q4_13', '09/30/2013', '12/31/2013'),
('Q1_14', '12/31/2013', '03/31/2014'),
('Q2_14', '03/31/2014', '06/30/2014'),
('Q3_14', '06/30/2014', '09/30/2014'),
('Q4_14', '09/30/2014', '12/31/2014'),
('Q1_15', '12/31/2014', '03/31/2015'),
('Q2_15', '03/31/2015', '06/30/2015'),
('Q3_15', '06/30/2015', '09/30/2015'),
('Q4_15', '09/30/2015', '12/31/2015'),
('Q1_16', '12/31/2015', '03/31/2016'),
('Q2_16', '03/31/2016', '06/30/2016'),
('Q3_16', '06/30/2016', '09/30/2016'),
('Q4_16', '09/30/2016', '12/31/2016');
With Redshift not supporting the values() as a "table replacement" you need to re-write that as a union:
with q(qqyy, firstday, lastday) as (
select 'Q4_10', '09-30-2010', '12-31-2010' union all
select 'Q1_11', '12-31-2010', '03-31-2011' union all
....
)
SELECT ...;
you should however user proper DATE literals:
with q(qqyy, firstday, lastday) as (
select 'Q4_10', DATE '2010-09-30', DATE '2010-12-31' union all
select 'Q1_11', DATE '2010-12-31', DATE '2011-03-31' union all
....
)
SELECT ...;
I don't know Postgres well enough, but with SQL-Server you cannot use the VALUES like a table directly. You must use parenthesis around and provide a table alias with column names to define the derived table.
This would be something like this:
with q as (
select * from
(
values
('Q4_10', '09-30-2010', '12-31-2010'),
('Q1_11', '12-31-2010', '03-31-2011'),
('Q2_11', '03-31-2011', '06/30/2011'),
('Q3_11', '06/30/2011', '09/30/2011'),
('Q4_11', '09/30/2011', '12/31/2011'),
('Q1_12', '12/31/2011', '03/31/2012'),
('Q2_12', '03/31/2012', '06/30/2012'),
('Q3_12', '06/30/2012', '09/30/2012'),
('Q4_12', '09/30/2012', '12/31/2012'),
('Q1_13', '12/31/2012', '03/31/2013'),
('Q2_13', '03/31/2013', '06/30/2013'),
('Q3_13', '06/30/2013', '09/30/2013'),
('Q4_13', '09/30/2013', '12/31/2013'),
('Q1_14', '12/31/2013', '03/31/2014'),
('Q2_14', '03/31/2014', '06/30/2014'),
('Q3_14', '06/30/2014', '09/30/2014'),
('Q4_14', '09/30/2014', '12/31/2014'),
('Q1_15', '12/31/2014', '03/31/2015'),
('Q2_15', '03/31/2015', '06/30/2015'),
('Q3_15', '06/30/2015', '09/30/2015'),
('Q4_15', '09/30/2015', '12/31/2015'),
('Q1_16', '12/31/2015', '03/31/2016'),
('Q2_16', '03/31/2016', '06/30/2016'),
('Q3_16', '06/30/2016', '09/30/2016'),
('Q4_16', '09/30/2016', '12/31/2016')
) AS tbl(qqyy, firstday, lastday)
)
SELECT *
from q
Attention
You are in high danger!
You are using culture dependant date formats. This might work in your system, but break on another one...
Further more, your are not even consistent!
Your VALUES provide your date values as string.
In SQL-Server I'd suggest to use ISO8601, unseparated or - my favourite - ODBC. But I'm sure there are culture independent formats for literal dates in Postgres too.
And I would suggest to let the CTE come back with typed values or use a temp table with typed columns.
I have an SNMP message column (formatted as VARCHAR(MAX)) in a SQL table like the one below. Is there a way to convert each message OID into a column/value format?
Message column content sample:
community=PUBLIC, enterprise=1.1.1.1.1.1.1.1.1.1.1, uptime=42170345, agent_ip=1.1.1.1, version=Ver2, ...
Desired result:
community enterprise uptime agent_ip
--------- ---------- ------ --------
PUBLIC 1.1.1.1.1.1.1.1.1.1.1 42170345 1.1.1.1 ...
So basically it would need to split the string by ", " and then return INI values as columns. Note this is on one row (not creating or splitting to multiple rows, just multiple columns)
This is SQL Server 2008 R2.
Thank you.
You can find a splitstring function on the web in many places. Here is how you would use it in a query to do what you want:
select t.*, cols.*
from table t cross apply
(select max(case when token like 'community=%' then substring(token, 11, len(token))
end) as community,
max(case when token like 'enterprise=%' then substring(token, 12, len(token))
end) as enterprise,
max(case when token like 'uptime=%' then substring(token, 8, len(token))
end) as uptime,
max(case when token like 'agent_ip=%' then substring(token, 10, len(token))
end) as agent_ip
from dbo.SplitString(t.snmp, ',')(idx, token)
) cols;
Probably not the most efficient way to do this, but this works:
SELECT
REPLACE((SUBSTRING(MsgText,CHARINDEX('community=',MsgText),CHARINDEX(', enterprise=',MsgText) - CHARINDEX('community=',MsgText))),'community=','') AS community
,REPLACE((SUBSTRING(MsgText,CHARINDEX('enterprise=',MsgText),CHARINDEX(', uptime=',MsgText) - CHARINDEX('enterprise=',MsgText))),'enterprise=','') AS enterprise
,REPLACE((SUBSTRING(MsgText,CHARINDEX('uptime=',MsgText),CHARINDEX(', agent_ip=',MsgText) - CHARINDEX('uptime=',MsgText))),'uptime=','') AS uptime
,REPLACE((SUBSTRING(MsgText,CHARINDEX('agent_ip=',MsgText),CHARINDEX(', version=',MsgText) - CHARINDEX('agent_ip=',MsgText))),'agent_ip=','') AS agent_ip
,MsgText
FROM Database.dbo.Table
In case anyone needs a method to parse SNMP messages
Here is solution using transforming string to XML which brings more freedom with result processing:
-- Prepare data for solution testing
DECLARE #srctable TABLE (
Id INT,
SnmpMessage VARCHAR(MAX),
SnmpMessageXml XML
)
INSERT INTO #srctable
SELECT Id, SnmpMessage, SnmpMessageXml FROM ( VALUES
(1, 'community=PUBLIC, enterprise=1.1.1.1.1.1.1.1.1.1.1, uptime=42170345, agent_ip=1.1.1.1, version=Ver2', null)
) v (Id, SnmpMessage, SnmpMessageXml)
-- Transform source formatted string to XML string
UPDATE #srctable
SET SnmpMessageXml = CAST('<row><data ' + REPLACE(REPLACE(SnmpMessage, ',', '"/><data '), '=', '="') + '"/></row>' AS XML)
-- Final select from XML data
SELECT SnmpMessageXml.value('(/row/data/#community)[1]', 'VARCHAR(999)') AS community,
SnmpMessageXml.value('(/row/data/#enterprise)[1]', 'VARCHAR(999)') AS enterprise,
SnmpMessageXml.value('(/row/data/#uptime)[1]', 'VARCHAR(999)') AS uptime,
SnmpMessageXml.value('(/row/data/#agent_ip)[1]', 'VARCHAR(999)') AS agent_ip,
SnmpMessageXml.value('(/row/data/#version)[1]', 'VARCHAR(999)') AS version
FROM #srctable AS t