Filtering with sqldf in R when fields have quotation marks - sql

I have a large sql db (7gbs), where the fields appear to have quotation marks in them.
For example:
res <- dbSendQuery(con, "
SELECT *
FROM master")
dbf2 <- fetch(res, n = 3)
dbClearResult(res)
Yields
NPI EntityTypeCode ReplacementNPI EmployerIdentificationNumber.EIN.
1 "1679576722" "1" "" ""
2 "1588667638" "1" "" ""
3 "1497758544" "2" "" "<UNAVAIL>"
ProviderOrganizationName.LegalBusinessName. ProviderLastName.LegalName. ProviderFirstName
1 "" "WIEBE" "DAVID"
2 "" "PILCHER" "WILLIAM"
3 "CUMBERLAND COUNTY HOSPITAL SYSTEM, INC" "" ""
I've been trying to get a smaller table by filtering on, say EntityTypeCode but I'm not getting any results. Here's an example of a query not getting anything, any advice? I think the issue is use of double quotes in the fields.
# Filter on State
res <- dbSendQuery(npi2, "
SELECT *
FROM master
WHERE (ProviderBusinessPracticeLocationAddressStateName = 'PA')
")
# Filter on State and type
res <- dbSendQuery(npi2, "
SELECT *
FROM master
WHERE (ProviderBusinessPracticeLocationAddressStateName = 'PA') AND
(EntityTypeCode = '1')
")

Escape the inner double quotes (ie, the ones in the cell) with a \.
res <- dbSendQuery(npi2, "
SELECT *
FROM master
WHERE (ProviderBusinessPracticeLocationAddressStateName = '\"PA\"') AND
(EntityTypeCode = '1')
")
This produces the following string:
SELECT *
FROM master
WHERE (ProviderBusinessPracticeLocationAddressStateName = '"PA"')

Related

Dynamic raw query (select clause) Django

I'm trying to execute a raw query in Django where I dynamically want to pick column names.
Eg
def func(all=True){
if all:
query_parameters = {
'col': '*'
}
else:
query_parameters = {
'col': 'a,b,c'
}
with connections["redshift"].cursor() as cursor:
cursor.execute(
"select %(col)s from table limit 2 ;",
query_parameters,
)
val = dictfetchall(cursor)
return val
}
Django is executing it like.
select "*" from table limit 2;
so the output is just like select "*"
*
and in the else case it is executed like
select "a,b,c" from table limit 2;
so the output is a,b,c
How can I run the command so that Django run it like
select a , b , c from table limit 2
so that the output is
a b c
1 2 3
4 5 6
I found a hack myself.
See Here
Explanation
Prepared query step by step
Input data (Columns I need)
self.export_col = “a,b,c”
def calc_col(self):
if self.exp == 'T':
select_col = ""
self.export_col = self.export_col.split(',')
for col in self.export_col:
select_col += col + ","
select_col = select_col[:-1]
self.export_col = select_col
else:
self.export_col += '*'
def prepare_query(self):
query += " SELECT "
query += self.export_col
query += """ from table limit 2;"""

How to vectorize an SQL update query in R

I have the following zoo object (res)
column1 column2 column3
2015-12-30 3.2735 2.3984 1.1250
2015-12-31 2.5778 1.8672 1.1371
2016-01-01 3.3573 2.4999 1.1260
2016-01-04 3.3573 2.4999 1.1463
and I would like to produce a vectorized update query.
UPDATE table SET column1=3.2735, column2=2.3984, column3=1.1250 WHERE dt = '2015-12-30';
UPDATE table SET column1=2.5778, column2=1.8672, column3=1.1371 WHERE dt = '2015-12-31';
etc.
I was able to do something similar previously for an INSERT query
sColumns <- paste0("dt, index, ", paste0(colnames(res), collapse=", "))
sValues = apply(data.frame(paste0("'", index(res), "'"), paste0("'", index, "'"), coredata(res)),
1 , paste, collapse = ",")
sql <- paste0("INSERT INTO table (", sColumns, ") VALUES (", sValues, ")")
which was considerably easier because all column names were grouped, and all values were grouped. For an UPDATE query, I have to combine alternately columns and fields.
So far, I have the following:
sColumns <- paste0(colnames(res), "=")
tmp <- paste(c(matrix(c(sColumns, res[1, ]), 2, byrow = T)), collapse = ", ")
tmp <- gsub("=, ", "=", tmp)
Which produces (for one row), output like:
[1] "column1=3.2735, column2=2.3984, column3=1.125"
Can anyone provide guidance as to how I can use something like apply() to do this for all rows of 'res'?
1) Try this:
library(zoo)
sapply(1:nrow(res), function(i) paste0(
"UPDATE table SET ",
toString(paste0(names(res), "=", coredata(res)[i, ])),
" WHERE dt='", time(res)[i], "'"))
giving the following character vector:
[1] "UPDATE table SET column1=3.2735, column2=2.3984, column3=1.125 WHERE dt='2015-12-30'"
[2] "UPDATE table SET column1=2.5778, column2=1.8672, column3=1.1371 WHERE dt='2015-12-31'"
[3] "UPDATE table SET column1=3.3573, column2=2.4999, column3=1.126 WHERE dt='2016-01-01'"
[4] "UPDATE table SET column1=3.3573, column2=2.4999, column3=1.1463 WHERE dt='2016-01-04'"
2) And a variation giving the same result:
sapply(unname(split(res, time(res))), function(z) paste0(
"UPDATE table SET ",
toString(paste0(names(z), "=", z)),
" WHERE dt='", time(z), "'"))
Note 1: If your table is not too large then you could alternately consider reading it into R, performing the update in R and then writing it back.
Note 2: Here is the input shown reproducibly:
Lines <- "Date column1 column2 column3
2015-12-30 3.2735 2.3984 1.1250
2015-12-31 2.5778 1.8672 1.1371
2016-01-01 3.3573 2.4999 1.1260
2016-01-04 3.3573 2.4999 1.1463"
library(zoo)
res <- read.zoo(text = Lines, header = TRUE)
Is this what you want?
foo <- apply(res,1, function(x){
sprintf("%s = %f", names(x),x)
})
lapply(colnames(foo), function(nn) {
sprintf("UPDATE table SET %s WHERE dt = \'%s\'",
paste(foo[,nn], collapse=","),
nn)
})
which gives:
[[1]]
[1] "UPDATE table SET column1 = 3.273500,column2 = 2.398400,column3 = 1.125000 WHERE dt = '2015-12-30'"
[[2]]
[1] "UPDATE table SET column1 = 2.577800,column2 = 1.867200,column3 = 1.137100 WHERE dt = '2015-12-31'"
[[3]]
[1] "UPDATE table SET column1 = 3.357300,column2 = 2.499900,column3 = 1.126000 WHERE dt = '2016-01-01'"
[[4]]
[1] "UPDATE table SET column1 = 3.357300,column2 = 2.499900,column3 = 1.146300 WHERE dt = '2016-01-04'"

Remove first 4 characters from a string

Through a bunch of if statements, I'm concatenating a string to be used as a SQL statement. The first 3 characters of this string will invariably be "OR ". How can I efficiently remove these first 4 characters.
example:
sql = " OR tennis = TRUE OR basetball = TRUE"
if condition sql = sql + " OR racquetball = TRUE"
So I need to remove the "OR" and 2 spaces at the beginning of whatever string is created.
Thank you for your time
It would be better to just join things together in the end:
sql = [ ]
sql << 'tennis = TRUE'
sql << 'baseball = TRUE'
if (condition)
sql << 'racquetball = TRUE'
end
sql = sql.join(' OR ')
You can also reduce redundancy by remapping things:
sports = [ :tennis, :baseball ]
sql = sports.collect { |s| "#{s} = TRUE" }.join(' OR ')
If you're intent on trimming off the first four letters:
sql = " OR x OR y"
sql.slice!(" OR ")
sql
# => "x OR y"

VB.net Auto Newline After Copying

First of all, I'm still new in VB.net and I had encountered one weird issue
I had created a tools that will split content from multilines textbox A into lines of string and add some characters and join them back and display in another multilines textbox B (A -> split content -> add character -> join -> display in B). The sample would be like this
Original Data from A:
This
is
a
test
data
Result Displayed in B:
Row 0 = This
Row 1 = is
Row 2 = a
Row 3 = test
Row 4 = data
Result COPIED from B:
Row 0 = This
Row 1 =
is
Row 2 =
a
Row 3 =
test
Row 4 =
data
The source code is
tempA = ""
tempB = ""
tempA = A.Text()
stringAry = tempA.Split(Environment.NewLine)
For iCounter As Integer = 0 To stringAry.Length - 1
tempB = tempB + "Row " + iCounter.ToString + " = " + stringAry(iCounter).ToString + Environment.NewLine
Next
B.Text() = tempB
So may I know why the copied result will be different from result displayed and how could I solve this?
You should remove any unwanted new line characters from the stringAry(iCounter) value.
tempA = ""
tempB = ""
tempA = A.Text()
stringAry = tempA.Split(Environment.NewLine)
For iCounter As Integer = 0 To stringAry.Length - 1
tempB = tempB + "Row " + iCounter.ToString + " = " + stringAry(iCounter).ToString.Replace(Environment.NewLine, string.Empty) + Environment.NewLine
Next
B.Text() = tempB

How to use dash (-) as value for criteria

I don't know if this is possible in MS Access, but what I want to do is detect dash (-) and use Between in SQL statement and or Comma.
Ex. I have a table called "Books" with fields: BookID, Title, Subject.
Now how can I query Books table that allows a user to input a value in a textbox like:
1 or 1-5 or 1,3,4.
If the value is 1 the sql statement should be:
SELECT * FROM Books WHERE BookID = 1
If the value of 1-5 then the sql statement should be:
SELECT * FROM Books WHERE BookID BETWEEN 1 And 5
If the value of 1,3,4 then the sql statement should be:
SELECT * FROM Books WHERE BookID IN (1,3,4)
Cut from something I already have;
s = "SELECT * FROM Books WHERE BookID" & parseSarg("11,22,33")
using;
Public Function parseSarg(str As String) As String
Dim re As Object: Set re = CreateObject("vbscript.regexp")
re.Global = True
Select Case True
'//is number
Case reTest(re, "^\d+$", str): parseSarg = " = " & str
'//is number-number
Case reTest(re, "^\d+-\d+$", str): parseSarg = " BETWEEN " & Replace$(str, "-", " AND ")
'//is number,number[,number]
Case reTest(re, "^\d+(?:,\d+)*$", str): parseSarg = " IN (" & str & ")"
'//is >number
Case reTest(re, "^>\s*\d+$", str): parseSarg = " >" & Mid$(str, 2)
Case Else
parseSarg = " = 0 AND 1=0"
End Select
End Function
Function reTest(re As Object, pattern As String, value As String) As Boolean
re.pattern = pattern
reTest = re.Test(value)
End Function
SELECT Books.Title FROM Books WHERE Books.BookID > 1 AND Books.BookID < 5;