Escape characters in BQ external tables - google-bigquery

I have an external table which is populated from a csv file.
In the csv file there is a field which has an escape character in it followed by a coma.
eg "a\,b" which should read just "a,b". when i load the csv file it it separates it into 2 columns "a", "b" but should read "a,b" in one column. I've tried using the option = '' without any luck.
CREATE OR REPLACE EXTERNAL TABLE TEST
(A STRING,
B STRING)
OPTIONS (
format = 'CSV',
quote = '\'
)
Could someone help ?

You may try below workaround.
CREATE OR REPLACE EXTERNAL TABLE `your-project.your-dataset.so_test` (
raw STRING
) OPTIONS (
uris=['gs://your-bucket/so/test2.csv'],
format = 'CSV',
field_delimiter = CHR(1)
);
CREATE TEMP TABLE sample_table AS
SELECT csv[SAFE_OFFSET(0)] col1,
REPLACE(csv[SAFE_OFFSET(1)], '|', ',') col2,
csv[SAFE_OFFSET(2)] col3,
FROM `bigdata-cubig-data.bdc_cubig_temp.so_test`,
UNNEST ([STRUCT(SPLIT(REPLACE(raw, '\\,', '|')) AS csv)]);
SELECT * FROM sample_table;
Sample csv file
gs://your-bucket/so/test2.csv
blah,a\,b,blah
Query results
Or, using PIVOT query
CREATE TEMP TABLE sample_table (
col1 STRING, col2 STRING, col3 STRING,
) AS
SELECT * REPLACE(REPLACE(col_1, '|', ',') AS col_1) FROM (
SELECT col, offset
FROM `your-project.your-dataset.so_test`,
UNNEST (SPLIT(REPLACE(csv, '\\,', '|'))) col WITH offset
) PIVOT (ANY_VALUE(col) col FOR offset IN (0, 1, 2));
SELECT * FROM sample_table;

Related

TRIM in bigquery

I want to apply TRIM function for my columns. But TRIM after Format function is not working. It's not trimming the spaces.
If I do it before format as below then it gives me error for datatype because the columns have other datatypes than string and byte as well.
Please tell me a solution for this.
Meantime, you can apply some extra processing on top of original query to get desired result - as in below example
select *,
trim(replace(regexp_replace(format('%t', t), r' *, *| *\)|\( *', '/'), '/NULL/', '/_/'), '/') HashColumn
from your_table t
if applied to sample data
with your_table as (
select ' 1' A, '2 ' B, null C, 4 D union all
select ' 12 ', null, '4', 5
)
output is
Consider below approach
create temp function json_extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));""";
select *,
( select string_agg(trim(value), '/')
from unnest(json_extract_values(replace(to_json_string(t), ':null', ':"_"'))) value
) as HashColumn
from your_table t
if applied to dummy data as below
with your_table as (
select ' 1' A, '2 ' B, null C, 4 D union all
select ' 12 ', null, '4', 5
)
output is
which, I hope, is exactly what you are looking for

Extracting substring between two characters (commas)

I need to extract codes from a string separated by commas.
I have figured out how to extract the first code but i'm struggling trying to figure out how to extract subsequent codes.
Example string:
~R10.4,T85.8,Y83.8,I10X,I25.9,E10.9,Z95.5,Z93.2,Z88.0
Query:
select code_string, substring(code_string, 0, charindex(',',code_string)) as first
FROM TABLE
Returns:
~R10.4
I now need to extract subsequent codes
~R10.4
T85.8
Y83.8
I10X
etc.
any help would be greatly appreciated
I would prefer not to have to create a function if possible
I guess you're using SQL server if so you can use
SELECT value FROM
STRING_SPLIT('~R10.4,T85.8,Y83.8,I10X,I25.9,E10.9,Z95.5,Z93.2,Z88.0', ',');
thought I'd share my solution
;WITH Split_Field (CODE_STRING, xmlname)
AS
(
SELECT
CODE_STRING,
CONVERT(XML,'<Names><name>'
+ REPLACE(CODE_STRING,',', '</name><name>') + '</name></Names>') AS xmlname
FROM
(
Select cast('~I20.9,I10X,E78.0,H81.0,K90.0,M81.9,M19.9,Z86.7' as nvarchar(MAX))
/*replace with your code field*/ as CODE_STRING
--from table
) as A
)
SELECT
CODE_STRING,
xmlname.value('/Names[1]/name[1]','varchar(100)') AS Col1,
xmlname.value('/Names[1]/name[2]','varchar(100)') AS Col2,
xmlname.value('/Names[1]/name[3]','varchar(100)') AS Col3,
xmlname.value('/Names[1]/name[4]','varchar(100)') AS col4,
xmlname.value('/Names[1]/name[5]','varchar(100)') AS col5
FROM Split_Field

SQL Server import row string to table columns

I have a table containing the following text per row
"[0,0,0,1,2,4,1,0,0,2,0,0,0,0,847,18207,0,0,0,0,0,0,0,0,0,0,0,0]"
Now i want to insert these 28 values in a table containing 28 columns. I tried a few split functions but these would return only rows.
Any ideas?
using dbo.fnParseString()
INSERT INTO a_table (col1, col2, col3, . . . )
SELECT dbo.fnParseString(-1, ',', str)
,dbo.fnParseString(-2, ',', str)
,dbo.fnParseString(-3, ',', str)
,....
FROM yourtable
DECLARE #x XML
;with cte as (
SELECT '[0,0,0,1,2,4,1,0,0,2,0,0,0,0,847,18207,0,0,0,0,0,0,0,0,0,0,0,0]' as col
)
SELECT #x= (
SELECT CAST('<s>' + REPLACE(REPLACE(REPLACE(col,'[','<a>'),']','</a>'),',','</a><a>') +'</s>'AS XML)
FROM cte
FOR XML PATH('')
)
SELECT t.v.value('a[1]','int'),
t.v.value('a[2]','int'),
t.v.value('a[3]','int'),
...
t.v.value('a[28]','int')
FROM #x.nodes('/s') as t(v)

Select rows using in with comma-separated string parameter

I'm converting a stored procedure from MySql to SQL Server. The procedure has one input parameter nvarchar/varchar which is a comma-separated string, e.g.
'1,2,5,456,454,343,3464'
I need to write a query that will retrieve the relevant rows, in MySql I'm using FIND_IN_SET and I wonder what the equivalent is in SQL Server.
I also need to order the ids as in the string.
The original query is:
SELECT *
FROM table_name t
WHERE FIND_IN_SET(id,p_ids)
ORDER BY FIND_IN_SET(id,p_ids);
The equivalent is like for the where and then charindex() for the order by:
select *
from table_name t
where ','+p_ids+',' like '%,'+cast(id as varchar(255))+',%'
order by charindex(',' + cast(id as varchar(255)) + ',', ',' + p_ids + ',');
Well, you could use charindex() for both, but the like will work in most databases.
Note that I've added delimiters to the beginning and end of the string, so 464 will not accidentally match 3464.
You would need to write a FIND_IN_SET function as it does not exist. The closet mechanism I can think of to convert a delimited string into a joinable object would be a to create a table-valued function and use the result in a standard in statement. It would need to be similar to:
DECLARE #MyParam NVARCHAR(3000)
SET #MyParam='1,2,5,456,454,343,3464'
SELECT
*
FROM
MyTable
WHERE
MyTableID IN (SELECT ID FROM dbo.MySplitDelimitedString(#MyParam,','))
And you would need to create a MySplitDelimitedString type table-valued function that would split a string and return a TABLE (ID INT) object.
A set based solution that splits the id's into ints and join with the base table which will make use of index on the base table id. I assumed the id would be an int, otherwise just remove the cast.
declare #ids nvarchar(100) = N'1,2,5,456,454,343,3464';
with nums as ( -- Generate numbers
select top (len(#ids)) row_number() over (order by (select 0)) n
from sys.messages
)
, pos1 as ( -- Get comma positions
select c.ci
from nums n
cross apply (select charindex(',', #ids, n.n) as ci) c
group by c.ci
)
, pos2 as ( -- Distinct posistions plus start and end
select ci
from pos1
union select 0
union select len(#ids) + 1
)
, pos3 as ( -- add row number for join
select ci, row_number() over (order by ci) as r
from pos2
)
, ids as ( -- id's and row id for ordering
select cast(substring(#ids, p1.ci + 1, p2.ci - p1.ci - 1) as int) id, row_number() over (order by p1.ci) r
from pos3 p1
inner join pos3 p2 on p2.r = p1.r + 1
)
select *
from ids i
inner join table_name t on t.id = i.id
order by i.r;
You can also try this by using regex to get the input values from comma separated string :
select * from table_name where id in (
select regexp_substr(p_ids,'[^,]+', 1, level) from dual
connect by regexp_substr(p_ids, '[^,]+', 1, level) is not null );

SQL How do I find values from a list that are not in a table

I have a table with values in a field called 'code'.
ABC
DFG
CDF
How would I select all codes that are not in the table from a list I have?
Eg:
SELECT * from [my list] where table1.code not in [my list]
the list is not in a table.
The list would be something like "ABC","BBB","TTT" (As strings)
Try this:
SELECT code
FROM Table1
WHERE code NOT IN ('ABC','CCC','DEF') --values from your list
It will result:
DFG
CDF
If the list is in another table, try this:
SELECT code
FROM Table1
WHERE code NOT IN (SELECT code FROM Table2)
As per your requirement, try this:
SELECT list
FROM Table2
WHERE list NOT IN (SELECT code from table1)
It will select the list values that are not in code.
See an example in SQL Fiddle
The question key point need to set "ABC","BBB","TTT" source data trun to a table.
that table will look like
|---+
|val|
|---+
|ABC|
|BBB|
|TTT|
Sqlite didn't support sqlite function. so that will be a little hard to sqlite your list to be a table.
You can use a CTE Recursive to make like sqlite function
You need to use replace function to remove " double quotes from your
source data.
There are two column in the CTE
val column carry your List data
rest column to remember current splite string
You will get a table from CTE like this.
|---+
|val|
|---+
|ABC|
|BBB|
|TTT|
Then you can compare the data with table1.
Not IN
WITH RECURSIVE split(val, rest) AS (
SELECT '', replace('"ABC","BBB","TTT"','"','') || ','
UNION ALL
SELECT
substr(rest, 0, instr(rest, ',')),
substr(rest, instr(rest, ',')+1)
FROM split
WHERE rest <> '')
SELECT * from (
SELECT val
FROM split
WHERE val <> ''
) t where t.val not IN (
select t1.code
from table1 t1
)
sqlfiddle:https://sqliteonline.com/#fiddle-5adeba5dfcc2fks5jgd7ernq
Outut Result:
+---+
|val|
+---+
|BBB|
|TTT|
If you want to show it in a line,use GROUP_CONCAT function.
WITH RECURSIVE split(val, rest) AS (
SELECT '', replace('"ABC","BBB","TTT"','"','') || ','
UNION ALL
SELECT
substr(rest, 0, instr(rest, ',')),
substr(rest, instr(rest, ',')+1)
FROM split
WHERE rest <> '')
SELECT GROUP_CONCAT(val,',') val from (
SELECT val
FROM split
WHERE val <> ''
) t where t.val not IN (
select t1.code
from table1 t1
)
Outut Result:
BBB, TTT
sqlfiddle:https://sqliteonline.com/#fiddle-5adecb92fcc36ks5jgda15yq
Note:That is unreasonable on SELECT * from [my list] where table1.code not in [my list],because This query has no place to find table1 so you couldn't get table1.code column
You can use not exists or JOIN to make your expect.
sqlfiddle:https://sqliteonline.com/#fiddle-5adeba5dfcc2fks5jgd7ernq
Can you use common table expressions?
WITH temp(code) AS (VALUES('ABC'),('BBB'),('TTT'),(ETC...))
SELECT temp.code FROM temp WHERE temp.code NOT IN
(SELECT DISTINCT table1.code FROM table1);
This would allow you to create a temporary table defined with your list of strings within the VALUES statement. Then use standard SQL to select values NOT IN your table1.code column.
Is this solution good, or am I missing something?
create table table10 (code varchar(20));
insert into table10 (code) values ('ABC');
insert into table10 (code) values ('DFG');
insert into table10 (code) values ('CDF');
select * from (
select 'ABC' as x
union all select 'BBB'
union all select 'TTT'
) t where t.x not in (select code from table10);
-- returns: BBB
-- TTT
See SQL Fiddle.
This can also be achieved using a stored procedure:
DELIMITER //
drop function if exists testcsv
//
create function testcsv(csv varchar(255)) returns varchar(255)
deterministic
begin
declare pos, found int default 0;
declare this, notin varchar(255);
declare continue handler for not found set found = 0;
set notin = '';
repeat
set pos = instr(csv, ',');
if (pos = 0) then
set this = trim('"' from csv);
set csv = '';
else
set this = trim('"' from trim(substring(csv, 1, pos-1)));
set csv = substring(csv, pos+1);
end if;
select 1 into found from table1 where code = this;
if (not found) then
if (notin = '') then
set notin = this;
else
set notin = concat(notin, ',', this);
end if;
end if;
until csv = ''
end repeat;
return (notin);
end
//
select testcsv('"ABC","BBB","TTT","DFG"')
Output:
BBB, TTT