R: SQL query for mean extraction - sql

I'd like to aggregate CD mean by CD_TALHAO, ID_UNIQUE and DATA_S2 using a SQL query with glue package. But when I try:
library(dplyr)
library(rgdal)
library(rgeos)
library(DBI)
library(glue)
# get AOI
download.file(
"https://github.com/Leprechault/trash/raw/main/stands_example.zip",
zip_path <- tempfile(fileext = ".zip")
)
unzip(zip_path, exdir = tempdir())
# Open the files
setwd(tempdir())
stands_ds <- read.csv("pred_target_stands.csv", sep=";") # Data set
stands_ds <- stands_ds %>%
mutate(DATA_S2 = ymd(DATA_S2))
stands_ds$CLASS<-c(rep("A",129),rep("B",130))
stands_ds$CD<-abs(rnorm(length(stands_ds[,1]),mean=50))
# Crete like a SQL server condition
bq_conn<- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
DBI::dbWriteTable(bq_conn, "stands_ds", stands_ds)
# Aggregate CD mean by CD_TALHAO, ID_UNIQUE and DATA_S2
sqlInput_pred_FARM <- glue::glue_sql("SELECT * FROM stands_ds AVG(CD) GROUP BY CD_TALHAO = {x} AND ID_UNIQUE = {y} AND DATA_S2 = {z}",
x = "001G", y = "CERROCOROADO_001G",
z = "2021-04-02",.con=bq_conn)
pred_attack_BQ_FARM <- dbGetQuery(bq_conn, as.character(sqlInput_pred_FARM, stringsAsFactors = T))
I always have Error: near "(": syntax error as output. Please, any help with it?

Sorry #KU99 but your solution doesn't return the mean values as I expected. Now, I try something new like an object creation for my mean operation and despite some ugly results with REPLICATE(DATE()), now works. The solution is:
# Aggregate CD mean by CD_TALHAO, ID_UNIQUE and DATA_S2
sqlInput_pred_FARM <- glue::glue_sql("SELECT REPLICATE(CD_TALHAO,1) AS TALHAO, REPLICATE(ID_UNIQUE,1) AS ID, REPLICATE(DATE(DATA_S2),1) AS DATE, AVG(CD) AS CD FROM stands_ds GROUP BY CD_TALHAO = {x},ID_UNIQUE = {y}, DATA_S2 = {z} ORDER BY CD_TALHAO = {x},ID_UNIQUE = {y}, DATA_S2 = {z}",
x = "001G", y = "CERROCOROADO_001G",
z = "2021-04-02",.con=bq_conn)
pred_attack_BQ_FARM <- dbGetQuery(bq_conn, as.character(sqlInput_pred_FARM, stringsAsFactors = T))
pred_attack_BQ_FARM
# TALHAO ID DATE CD
#1 001C CERROCOROADO_001C -4661-02-24 49.93823
#2 001G CERROCOROADO_001G -4661-02-24 50.12102

Try the following:
statement <- "SELECT * FROM stands_ds AVG(CD)
GROUP BY CD_TALHAO = ? AND ID_UNIQUE = ? AND DATA_S2 = ?"
pars <- list("001G", "CERROCOROADO_001G","2021-04-02")
pred_attack_BQ_FARM <- dbGetQuery(bq_conn, statement, params = pars)

Related

Portfolio Optimization Using Quadprog Gives the Same Result for Every time even after changing variables

I have a task to construct the efficient frontier using 25 portfolios (monthly data). I tired writing a quadprog code for calculating minimum variance portfolio weights for a given expected rate of return. However, regardless of the expected return, the solver values give me the same set weights and variance, which the global minimum variance portfolio. I found the answer using an analytical solution. Attached are the codes:
basedf <- read.csv("test.csv", header = TRUE, sep = ",")
data <- basedf[,2:26]
ret <- as.data.frame(colMeans(data))
variance <- diag(var(data))
covmat <-as.matrix(var(data))
###minimum variance portfolio calculation
Q <- 2*cov(data)
A <- rbind(rep(1,25))
a <- 1
result <- solve.QP(Dmat = Q,
dvec = rep(0,25),
Amat = t(A),
bvec = a,
meq = 1)
w <-result$solution
w
var <- result$value
var
sum(w)
this is another set of codes giving the me same value::
mvp <- function(e,ep){
Dmat <- 2*cov(e)
dvec <- rep(0, ncol(e))
Amat <- cbind(rep(1, ncol(e)), colMeans(e))
bvec <- c(1, ep)
result <- solve.QP(Dmat = Dmat, dvec = dvec, Amat = Amat, bvec = bvec, meq=1)
wp <- result$solution
varP <- result$value
ret_values <- list(wp, varP)
names(ret_values) <- c("wp", "VarP")
return(ret_values)
}
z <- mvp(data, -.005)
z$wp
sum(z$wp)
z$VarP
ef <- function(e, min_e, max_e){
list_e <- seq(min_e,max_e, length=50)
loop <- sapply(list_e, function(x) mvp(e, x)$VarP)
effF <- as.data.frame(cbind(list_e,loop))
minvar <- min(effF$loop)
L <- effF$loop==minvar
minret <- effF[L,]$list_e
minpoint <- as.data.frame(cbind(minret,minvar))
minvarwp <- mvp(e, min_e)$wp
rlist <- list(effF, minpoint, minvarwp)
names(rlist) <- c( "eFF", "minPoint", "wp")
return(rlist)
}
in the efficient frontier, all the 50 portfolios have same level of variance. can anyone tell me whats wrong with solver equation??? thanks.
I tried quadprog but couldnt solve it.

can't execute mysql request via R

I am having difficulty. I need to get the desired result through mysql query in R .
my attempt.
load libraries which i use.
library(RMySQL)
library(data.table)
library(dplyr)
library(Rcpp)
library(zoo)
library(gsubfn)
library(proto)
library(RSQLite)
library(DBI)
library(RMariaDB)
library(dbx)
then i try perform connect
getDf <- function (connect, sql)
{
str(paste("EXECUTE: ", sql))
query <- dbSendQuery(connect, sql)
df <- dbFetch(query, n = -1)
dbClearResult(query)
str(paste("ROW FETCHED:", nrow(df)))
df
}
then
db_user <- 'k'
db_password <- 'F'
db_name <- 'yyy'
db_table <- 'mytable'
db_host <- 'xxx' # for local access
db_port <- 3306
readDB <- dbConnect(RMariaDB::MariaDB(), user = db_user, pass = db_password, dbname = db_name, host = db_host, port = db_port)
and here i try get prepared data via query mysql
df333 <- getDf(readDB, paste("UPDATE incoming_aggregation_google ia
JOIN data_aggregation_google_median dm ON
(ia.agency_id = dm.agency_id)
AND (ia.search_category_id = dm.search_category_id)
AND (ia.offer_category_id = dm.offer_category_id)
AND (ia.flight_codes = dm.flight_codes)
AND (ia.search_type_category_id = dm.search_type_category_id)
SET ia.prediction_diff_percent_base_price = dm.median_diff_percent_base_price + (0.4 * RAND() - 0.2);
UPDATE incoming_aggregation_google_general ia
JOIN data_aggregation_google_general_median dm ON
(ia.agency_id = dm.agency_id)
AND (ia.offer_category_id = dm.offer_category_id)
AND (ia.search_type_category_id = dm.search_type_category_id)
AND (ia. service_discount_category_id = dm. service_discount_category_id)
SET ia.prediction_diff_percent_base_price = dm.median_diff_percent_base_price + (0.4 * RAND() - 0.2);
UPDATE incoming_aggregation_google_general da
JOIN data_aggregation_google_general_median i ON
(
i.agency_id=da.agency_id
AND i.offer_category_id=da.offer_category_id
AND i.service_discount_category_id=da.service_discount_category_id
AND i.search_type_category_id=da.search_type_category_id
)
set da.prediction_diff_percent_base_price = i.median_diff_percent_base_price;", db_table))
but the error
Error: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'UPDATE incoming_aggregation_google_general ia
JOIN data_aggregation_google_gener' at line 9 [1064]
Please help me understand what I did wrong and how to fix it? If what I want to do is not can be done, is there a way to translate the essence of this query into logic R.
Thanks for your any valuable help

How to send data frame from r to SQL?

#setwd('Desktop/IE332')
install.packages("wakefield")
install.packages("RMySQL")
install.packages("randomNames")
install.packages('password')
install.packages('OpenRepGrid')
library(RMySQL)
library(password)
library(wakefield)
library(randomNames)
industriesData <- read.csv('Industries.csv')
skills <- read.csv('Skills.csv')
sp500 <- read.csv("http://www.princeton.edu/~otorres/sandp500.csv")
companies <- sample(sp500$Name, 100)
locations <- c('Northwest', 'Midwest', 'Northeast', 'South', 'Southwest', 'Southeast',
'International') # Locations
gpas <- c(4,3.5,3,2.5,2)
n <- 100
locPrefs <- numeric(n)
studentSkills <- matrix(nrow=100,ncol=10)
studentInd <- matrix(nrow=100,ncol=5)
jobSkills <- matrix(nrow=100,ncol=5)
for(j in 1:n){ # Samples random skills assigned to students
studentSkills[j,] <- sample(skills[,1],10,replace=FALSE)
studentInd[j,] <- sample(industriesData[,1],5,replace=FALSE)
jobSkills[j,] <- sample(skills[,1],5,replace=FALSE)
}
studentData <- data.frame('first names'=randomNames(n, which.names = 'first'),'last
names'=randomNames(n, which.names = 'last'),'username'=seq(1,
n),'password'=password(8,numbers=TRUE),'gpa'=gpa(n, mean = 85.356, sd = 3.2, name =
"GPA"),'visa'=sample(c("N","Y"), size = n, replace = TRUE, prob = c(.78, .22)), 'loc
pref'=sample(locations,n,replace = TRUE), 'skill'=studentSkills, 'Industry'=studentInd) # Student data
employerData <- data.frame('company names'=companies, 'pref
gpa'=sample(gpas,n,replace=TRUE), 'sponser?'=sample(c('N','Y'), size=n, replace = TRUE, prob
= c(.78, .22)), 'job id'=sample(seq(100,999),n,replace=FALSE),'pref skill'=jobSkills,
'industry'=sample(industriesData[,1],n,replace=TRUE),'location'=sample(locations,n,replace =
TRUE)) # Employer data
I am trying to send certain columns of the studentData and employerData to tables in SQL, how would i go about doing that? I have a table named students where I would like to upload the first and last names of the studentsData data frame into this SQL table.

How to pass function parameters in sql query in R? [duplicate]

This question already has answers here:
Pass R variable to RODBC's sqlQuery? [duplicate]
(4 answers)
Closed 5 years ago.
I have the following function in R:
dbhandle <- odbcDriverConnect('driver={SQL Server};
server=myServer;database=myDB;
trusted_connection=true')
func <- function(x){
sqlQuery(dbhandle, 'select attr from myTable where cond = x')
}
I want to know how to pass the x parameter to the where condition for the cond attribute. For example, I want to calculate func(10), then the query must be:
select attr from myTable where cond = 10
when using RODBC, I prefer to use parameterized queries. This becomes more valuable when you are passing character strings, as this avoids SQL Injection.
library(RODBCext)
sqlExecute(dbhandle,
"select attr from myTable where cond = ?",
data = list(cond = 10),
fetch = TRUE,
stringsAsFactors = FALSE)
You could try with paste:
func <- function(x){
sqlQuery(dbhandle, paste('select attr from myTable where cond =', x))
}
I like the glue package for these things, though it's really the same as paste, just prettier :
library(glue)
func <- function(x){
sqlQuery(dbhandle, glue('select attr from myTable where cond = {x}'))
}
In this simple case you could even use dplyr itself:
library(dplyr)
func <- function(x) {
tbl(dbhandle, "myTable") %>%
select(attr) %>%
filter(cond == x) %>%
collect()
}

dateRangeInput in reactive SQl query- date output is in wrong format

Has anyone faced the problem with dateRangeInput? I want to use it to filter my data, however the output of the dateRangeInput is wrong as 2016-02-21, and I need it to be 21.02.2016. I thought that the format=dd.mm.yyyy will solve it, however I do not get any change..
My code:
library(ROracle)
library(shiny)
library(DT)
server <- shinyServer(
function(input, output, session) {
con <- dbConnect(dbDriver("Oracle"),"xx/K",username="user",password="pwd")
tableList <- dbListTables(con,schema="K")
updateSelectizeInput(session, "tabnames", server = TRUE, choices = tableList)
output$date_ui=renderUI({
dateRangeInput('date',
label = 'Datum: dd.mm.yyyy',
start = Sys.Date()-1, end = Sys.Date()+1,
separator = " bis ",
format = 'dd.mm.yyyy',language = "de")
})
sqlOutput <- reactive({
sqlInput <- paste("select rownum * from K.",input$tabnames, "where dati_create between to_date('",format(input$date[1]),"','dd.mm.yyyy') and to_date('",input$date[2],"','dd.mm.yyyy')")
print(sqlInput) # I have printed it to see the format of the date
dbGetQuery(con$cc, sqlInput, stringsAsFactors = T)
})
output$table <- DT::renderDataTable(sqlOutput(), server=TRUE, rownames=TRUE, filter="top", options=list(pageLength=10))
session$onSessionEnded(function() { dbDisconnect(con) })
})
ui_panel <-
tabPanel("Test",
sidebarLayout(
sidebarPanel(
),
mainPanel(
selectizeInput("tabnames",label = "server side", choices = NULL),
uiOutput("date_ui"),
tableOutput("out"),
tableOutput("table")
)
)
)
ui <- shinyUI(navbarPage("Test",ui_panel))
runApp(list(ui=ui,server=server))
After printing sqlInput:
[1] "select rownum * from K.xy where dati_create between to_date(' 2016-02-21 ','dd.mm.yyyy') and to_date(' 2016-02-23 ','dd.mm.yyyy')"
Error in .oci.GetQuery(conn, statement, data = data, prefetch = prefetch, :
ORA-01861: literal does not match format string
Does anyone have an idea how I can change the format of the date in dateRangeInput?
Thanks for any help!