Stored Procedure with R Script in SSMS 2016 Error - sql

I have a stored procedure in SQL Server 2016, within which I'm executing an external R script. I'm having trouble figuring out why the procedure works for me, but whenever any one person runs it, they get this error. After they run it one time, I also cannot run the procedure and this error is the default result:
Location: tmpilb.cpp:2532
Expression: fFalse
SPID: 76
Process ID: 2036
Description: Attempt to access expired blob handle (1)
Msg 21, Level 20, State 1, Line 0
Warning: Fatal error 3624 occurred at Jun 19 2017 3:07PM. Note the error
and time, and contact your system administrator.
Msg 596, Level 21, State 1, Line 0
Cannot continue the execution because the session is in the kill state.
Msg 0, Level 20, State 0, Line 0
A severe error occurred on the current command. The results, if any, should
be discarded.
The stored procedure executes R script that links to a separate database via RODBC, pulls data, performs manipulations, and sends the results back into SQL to populate a table.
EDIT: Including Stored Procedure code:
USE [Park_Analytics]
GO
/****** Object: StoredProcedure [dbo].[storedproc] Script Date: 6/19/2017 2:40:38 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER procedure [dbo].[storedproc]
#startdate date NULL
AS
set #startdate = ISNULL(#startdate,DATEADD(wk,DATEDIFF(wk,7,GETDATE()),-15))
declare #enddate datetime = dateadd(D,1,#startdate)
create table #temp_results
(Park NVARCHAR(max) NOT NULL,
Turbine int NOT NULL,
avg_wtgp_kw float NULL,
avg_metwndspd_ms float NULL,
avg_metextmp_degc float NULL,
st_wtgp_kw float NULL,
st_metwndspd_ms float NULL,
st_metextmp_degc float NULL,
site_avg_wtgp_kw float NULL,
site_avg_metwndspd_ms float NULL,
site_avg_metextmp_degc float NULL,
site_st_wtgp_kw float NULL,
site_st_metwndspd_ms float NULL,
site_st_metextmp_degc float NULL,
Z_Score_avg_wtgp_kw float NULL,
Z_Score_avg_metwndspd_ms float NULL,
Z_Score_avg_metextmp_degc float NULL,
Alarm_Level_Z_Score_avg_wtgp_kw float NULL,
Alarm_Level_Z_Score_avg_metwndspd_ms float NULL,
Alarm_Level_Z_Score_avg_metextmp_degc float NULL)
insert into #temp_results
EXEC sp_execute_external_script
#language = N'R'
,#script = N'
library(RODBC)
# String to connect to global database
piglobal_str <- ''driver={SQL Server};server=Server.corp.org\\Servername;database=Assets;uid=generic_login;pwd=generic_pwd''
# Connect to the database
Assets <- odbcDriverConnect(piglobal_str)
# Write SQL Query to execute within R
QueryString <- ''SELECT
w.wpp_name as wpp,
pd.wt,
pd.datetime,
pd.datetime_local,
avg_wtgp_kw,
avg_metwndspd_ms,
avg_metextmp_degc,
FROM [Assets].[dbo].[PI_Main10mindata] pd
left join [Assets].[dbo].PI_Main10mindata_temp pt on pt.wpp=pd.wpp and pt.datetime = pd.datetime and pt.wt=pd.wt
left join [Assets].[dbo].[pi_wpps] w on w.wpp_pi = pd.wpp
where pt.datetime between DATEADD(wk,DATEDIFF(wk,7,GETDATE()),-15)
and DATEADD(wk,DATEDIFF(wk,7,GETDATE()),-14)
order by pt.datetime''
# Set query results to a dataframe. as.is parameter is to keep all datatypes exactly the same as they are read.
pidata <- sqlQuery(Assets, QueryString,as.is=TRUE)
# Closing the RODBC Channel
odbcClose(Assets)
# Basic data processing. Setting the datatype to numeric.
pidata[,-which(names(pidata) %in% c("datetime","datetime_local","wpp"))] <-
sapply(pidata[,-which(names(pidata) %in% c("datetime","datetime_local","wpp"))], function(x) as.numeric(x))
# Filter on availability and on kW output
filtered <- pidata[pidata$avg_wtgst_int %in% c(100, 200) & pidata$avg_wtgp_kw > 500 & !(is.na(pidata$avg_wtgp_kw)),]
# Remove date columns and state int column. They wont be important from here on out until theyre put back into the SQL table.
df_turbinefilter <- filtered[,-which(names(filtered) %in% c("datetime","datetime_local","avg_wtgst_int"))]
# Taking out the "avg_" in front of all of the columns. Will make sense later when I rename columns after aggregations.
colnames(df_turbinefilter) <- gsub("avg_","",colnames(df_turbinefilter),perl = TRUE)
## PERFORM AGGREGATIONS ##
library(reshape2)
df_melt <- melt(df_turbinefilter, id= c("wpp","wt"))
agged <- dcast(df_melt, wpp + wt ~ variable, mean)
df_meltst <- melt(df_turbinefilter, id=c("wpp","wt"))
aggedst <- dcast(df_meltst, wpp + wt ~ variable, sd)
# Aggregated data joining
# list of data.frames
list_of_df <- list(agged, aggedst)
# names of data.frames
names(list_of_df) <- c("avg", "st")
# my sequence and names of data.frames in a list
my_seq <- seq_along(list_of_df)
my_list_names <- names(list_of_df)
# Renaming new columns. Including avg_ and st_ for average and standard deviation.
for (i in my_seq) {
names(list_of_df[[my_seq[i]]]) <-
paste(my_list_names[i], names(list_of_df[[my_seq[i]]]), sep = "_")
}
# Binding the columns from the agged table (to keep the nomenclature) with the other aggregate calcs
scored_data <- cbind(agged[,which(names(agged) %in% c("wpp","wt"))],
list_of_df$avg[,-which(names(list_of_df$avg) %in% c("avg_wpp","avg_wt"))],
list_of_df$st[,-which(names(list_of_df$st) %in% c("st_wpp","st_wt"))] )
## SITE AGGREGATIONS ##
df_sitefilter <- df_turbinefilter[,-which(names(df_turbinefilter) %in% c("wt"))]
df_meltsite <- melt(df_sitefilter, id="wpp")
aggedsite <- dcast(df_meltsite, wpp ~ variable, mean)
df_meltstsite <- melt(df_sitefilter, id="wpp")
aggedstsite <- dcast(df_meltstsite, wpp ~ variable, sd)
# Bug - have to create a separate vector
wpp <- aggedsite$wpp
# Aggegrated data joining
# list of data.frames
list_of_dfs <- list(aggedsite, aggedstsite)
# names of data.frames
names(list_of_dfs) <- c("site_avg", "site_st")
# my sequence and names of data.frames in a list
my_seqs <- seq_along(list_of_dfs)
my_list_namess <- names(list_of_dfs)
# Renaming new columns.
for (i in my_seqs) {
names(list_of_dfs[[my_seqs[i]]]) <-
paste(my_list_namess[i], names(list_of_dfs[[my_seqs[i]]]), sep = "_")
}
# Binding the columns from the aggedsite table (after setting wpp as an object) to the other aggregate tables
scored_data_site <- cbind(wpp,
list_of_dfs$site_avg[,-which(names(list_of_dfs$site_avg) %in% c("site_avg_wpp"))],
list_of_dfs$site_st[,-which(names(list_of_dfs$site_st) %in% c("site_st_wpp"))] )
## Main SIT Table: All turbine avgs, standard deviations, and site averages and standard deviations.
library(dplyr)
parent_df <- left_join(scored_data, scored_data_site, by = "wpp")
# Creating variables to keep the for-loop dynamic in the case that changes need to be made.
# Finding the number of columns associated with the tags. This will aid in calculating statistics.
tag_length <- length(df_turbinefilter[,-which(names(pidata) %in% c("wpp", "wt"))])
# Finding the first index column number where the condition is met. This is how the for loop will start.
tag_start <- which.max(!(names(parent_df) %in% c("wpp","wt")))
length_before_calcs <- length(parent_df)
## PERFORMING STATISTICAL CALCULATIONS AND ADDING NEW COLUMNS: WTG Avg-Site Avg/(Site St.Dev ^2/WTG St. Dev)
for (i in tag_start:tag_length) {
parent_df[,ncol(parent_df) + 1] <-
(parent_df[,i]-parent_df[,i+2*tag_length])/((parent_df[,i+3*(tag_length)])^2/parent_df[,i+(tag_length)])
names(parent_df)[ncol(parent_df)] <- paste0("Z_Score_",names(parent_df[i]))
}
## ADDING AN ALARM LEVEL COLUMN (0, 1, 2) DEPENDING ON THE CURRENT Z_SCORE
for (i in (length_before_calcs+1):ncol(parent_df)) {
parent_df[,ncol(parent_df) + 1] <-
ifelse(abs(parent_df[,i])<1, 0,ifelse(1<=abs(parent_df[,i]), 1, ifelse(abs(parent_df[,i]) >=2,2,0)))
names(parent_df)[ncol(parent_df)] <- paste0("Alarm_Level_", names(parent_df[i]))
}
OutputDataSet <- parent_df'
,#input_data_1 = N''
insert into SIT_WTG(
Date,
Park,
Turbine,
avg_wtgp_kw,
avg_metwndspd_ms,
avg_metextmp_degc,
st_wtgp_kw,
st_metwndspd_ms,
st_metextmp_degc,
site_avg_wtgp_kw,
site_avg_metwndspd_ms,
site_avg_metextmp_degc,
site_st_wtgp_kw,
site_st_metwndspd_ms,
site_st_metextmp_degc,
Z_Score_avg_wtgp_kw,
Z_Score_avg_metwndspd_ms,
Z_Score_avg_metextmp_degc,
Alarm_Level_Z_Score_avg_wtgp_kw,
Alarm_Level_Z_Score_avg_metwndspd_ms,
Alarm_Level_Z_Score_avg_metextmp_degc)
select #startdate as Date , *
from #temp_results t
drop table #temp_results

After doing quite a bit of research, I realized that this stored procedure was not being automatically recompiled. I used
WITH RECOMPILE AS
in the beginning of the stored procedure and this solved my problem.

Related

Copy_to in R results in dates being converted to numeric columns

I am attempting to create a small, training database for a package that I am writing. I am using the following code to create the database:
library(tidyverse)
library(DBI)
dat <- data.frame(name = rep("Clyde", 100),
DOB = sample(x = seq(as.POSIXct('1970/01/01'), as.POSIXct('1995/01/01'), by="day"),
size = 100, replace = T))
# Example using schemas with SQLite
train_con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
## create tables in primary db
copy_to(dest = train_con, df = dat, name = "client_list", temporary = FALSE)
The above portion works fine. However, when I attempt to pull data from the database, I see that all dates have been converted to numeric.
train_con %>% tbl("client_list")
Can anybody tell me how to fix this? Thanks!
SQLite does not have a datetime type. In the absence of such a type POSIXct objects are sent to the database as seconds since the UNIX Epoch and SQLite does not know that they are intended to represent date times.
Either convert such columns yourself after you read them back into R or else use a different database. Nearly all databases except SQLite support this.

Add new column to MonetDBLite table

I am trying to use R + MonetDB as a large-data analysis stack and am having trouble creating a new column and populating it with data from my analysis. Here is a toy example:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB.R::MonetDB(), embedded="./test.db")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
dbSendQuery(db, "INSERT INTO mtcars (v1) VALUES (?)", bind.data=v1)
And the error message:
Error in .local(conn, statement, ...) :
Unable to execute statement 'INSERT INTO mtcars (v1) VALUES ('c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.23008278...'.
Server says 'ERROR: 49.6371639267187, 61.8893752757189, 47.1238898038469, 67.2300827868216)' to type dbl failed.
' [#conversion of string 'c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.2300827868216, 58.7477826221291, 56.8628270299753, 44.924774946334, 76.6548607475909, 71.6283125018473, 60.318578948924, 55.9203492338983, 51.5221195188726, 54.3495529071034, 47.7522083345649, 32.6725635973338, 32.6725635973338, 46.18141200777, 101.787601976309, 95.5044166691297, 106.499990956694, 67.5442420521806, 48.6946861306418, 47.7522083345649, 41.7831822927443, 60.318578948924, 85.7654794430014, 81.6814089933346, 95.5044166691297,
].
In addition: Warning message:
In if (is.na(value)) statement <- sub("?", "NULL", statement, fixed = TRUE) else if (valueClass %in% :
the condition has length > 1 and only the first element will be used
From this error I surmise that maybe bind.data can't be used with MonetDBLite?
Question:
How can I add a column(s) to a MonetDBLite table and populate it with data from my R session?
First of all the "INSERT" command in your last statement is not correct. You will need the "UPDATE" statement.
That being said, I propose a solution where you can populate your MonetDBLite table directly from R:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
# I added a rownbr to the dataset so it will be easier later
mtcars$rownbr <- 1:nrow(mtcars)
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
for (i in 1:nrow(mtcars)){
myquery <- paste0("UPDATE mtcars SET v1 = ",v1$mpg[i], "where rownbr =",i," ;")
dbSendQuery(db, myquery )
}
There seems to be an issue with parameterized SQL queries via dbBind with MonetDBLite (see https://github.com/hannesmuehleisen/MonetDBLite-R/issues/16). The code below works with SQLite:
library(RSQLite)
data(mtcars)
db <- dbConnect(SQLite(), ":memory:")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## do computation with R
mtcars$v1 <- mtcars$mpg * pi
mtcars$row_names <- rownames(mtcars)
update_query <- dbSendQuery(db, 'update mtcars set "v1"=$v1 WHERE row_names=$row_names')
dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
dbClearResult(update_query) # release the prepared statement
dbReadTable(db, "mtcars")
dbDisconnect(db)
However, with MonetDBLite, it is producing an error (and a warning) for the dbBind step:
> dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
Error in vapply(params, function(x) { : values must be length 1,
but FUN(X[[1]]) result is length 32
In addition: Warning message:
In if (is.na(x)) "NULL" else if (is.numeric(x) || is.logical(x)) { :
the condition has length > 1 and only the first element will be used
A workaround I came up with is to use the glue_data_sql in the glue package to "manually" compose the queries (without needing to loop through rows):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
update_query <- glue_data_sql(mtcars, "update mtcars set v1 = {v1} where row_names = {row_names};", .con=db)
lapply(update_query, dbSendQuery, conn=db)
# verify
dbReadTable(db, "mtcars")
dbDisconnect(db)
Yet another workaround that does the insert in one query (closer to #Zelazny7 's original attempt):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbSendQuery(db, "CREATE TABLE mtcars (
row_names VARCHAR(32),
v1 DOUBLE);")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
insert_values <- glue_data(mtcars, "('{row_names}', {v1})")
insert_values <- glue_collapse(insert_values, sep=", ", last="")
insert_query <- glue("INSERT INTO mtcars (row_names, v1) VALUES {insert_values}")
dbSendQuery(db, insert_query)
dbReadTable(db, "mtcars")
dbDisconnect(db)

R forecast output to SQL Server

I am doing database analysis using SQL Server and forecasting using R. I need to get the results from R back into the SQL Server database. One approach is to output the forecast data to a text file using write.table and import using BULK INSERT. Is there a better way?
You can use dbBulkCopy from rsqlserver package. It is a DBI extension that interfaces the Microsoft SQL Server popular command-line utility named bcp to quickly bulk copying large files into table.
dat <- matrix(round(rnorm(nrow * ncol), 2), nrow = nrow, ncol = ncol)
colnames(dat) <- cnames
id.file = "temp_file.csv"
write.csv(dat, file = id.file, row.names = FALSE)
dbBulkCopy(conn, "NEW_BP_TABLE", value = id.file)
Thanks for your comments and answers! I went with a solution based on the comment by nrussell. Below is my code. The specific command is the last line; I am providing the preceding lines to provide a little bit of context for anyone trying to use this answer.
data <- sqlQuery(myconn, query) # returns time series with year, month (both numeric), and value
data_ts <- ts(data$value,
start=c(data$year[1],data$month[1]), # start is first year and month
end=c(data$year[nrow(data)],data$month[nrow(data)]), # end is last year and month
frequency=12)
data_fit <- auto.arima(data_ts)
fct <- forecast(data_ts, 12)
sqlQuery(myconn, 'truncate table dgtForecast') # Pre-existing table
sqlSave(myconn, data.frame(fct), tablename='dgtForecast', rownames='MonthYear', append=TRUE)

Creating a SQL structure table in R using contents of a dataframe

# Create dummy dataframe
dataframe1 <- rbind("a","b","c")
# Create variable list
variablelist <- unique(dataframe1[,1])
# Loop through adding VARCHAR and commas
sql_var_list <- NULL
for(char in seq_along(variablelist)){
sql_var_list <- paste(sql_var_list,client_variables[char]," VARCHAR,",sep="")
}
# Remove the final comma
sql_var_list <- substr(sql_var_list, 1, nchar(sql_var_list)-1)
# Create structure table SQL string
create_strture_table <- paste("CREATE TABLE (",sql_var_list,")",sep="")
In the above code I'm using the contents from a column in a dataframe in R to create a structure table that will later be created in redshift. The above code is working, but my method seems a bit untidy, as I'm new to R I wonder if someone can suggest a better approach?
You can just use a combination of paste and paste0, as suggested by David Arenburg in the comments:
paste0("CREATE TABLE (",
paste(dataframe1, collapse = " VARCHAR, "),
" VARCHAR)")
# [1] "CREATE TABLE (a VARCHAR, b VARCHAR, c VARCHAR)"

Providing lookup list from R vector as SQL table for RODBC lookup

I have a list of IDs in an R vector.
IDlist <- c(23, 232, 434, 35445)
I would like to write an RODBC sqlQuery with a clause stating something like
WHERE idname IN IDlist
Do I have to read the whole table and then merge it to the idList vector within R? Or how can I provide these values to the RODBC statement, so recover only the records I'm interested in?
Note: As the list is quite long, pasting individual values into the SQL statement, as in the answer below, won't do it.
You could always construct the statement using paste
IDlist <- c(23, 232, 434, 35445)
paste("WHERE idname IN (", paste(IDlist, collapse = ", "), ")")
#[1] "WHERE idname IN ( 23, 232, 434, 35445 )"
Clearly you would need to add more to this to construct your exact statement
I put together a solution to a similar problem by combining the tips here and here and running in batches. Approximate code follows (retyped from an isolated machine):
#assuming you have a list of IDs you want to match in vIDs and an RODBC connection in mycon
#queries that don't change
q_create_tmp <- "create table #tmptbl (ID int)"
q_get_records <- "select * from mastertbl as X join #tmptbl as Y on (X.ID = Y.ID)"
q_del_tmp <- "drop table #tmptbl"
#initialize counters and storage
start_row <- 1
batch_size <- 1000
allresults <- data.frame()
while(start_row <= length(vIDs) {
end_row <- min(length(vIDs), start_row+batch_size-1)
q_fill_tmp <- sprintf("insert into #tmptbl (ID) values %s", paste(sprintf("(%d)", vIDs[start_row:end_row]), collapse=","))
q_all <- list(q_create_tmp, q_fill_tmp, q_get_records, q_del_tmp)
sqlOutput <- lapply(q_all, function(x) sqlQuery(mycon, x))
allresults <- rbind(allresults, sqlOutput[[3]])
start_row <- end_row + 1
}