Subscripts when using bquote in geom_text followed by text - ggplot2

I've been looking through stackoverflow for the past few hours and couldn't find a solution which works for my problem.
The issue: I'm trying to use geom_text to manually write in a manually calculated value, which is fine when I just write the value and its unit using bquote (for example just writing 4.4µm is fine). However, I would like to write out D[g]= 4.4µm (with the g subsetted!) in the top left corner using geom_text. But once I start trying to add a subscript to it, it doesn't work and I get one of the following errors:
Error: Don't know how to add RHS to theme object
Error: Aesthetics must be either length 1 or the same as the data (14): label
Error: unexpected string constant in:
" #geom_text(label = bquote("D" [g] "= 4.4µm"), x = -.3, y = 900, size = 10) +
geom_text(label = bquote(D[g]"=4.4µm"
Here is the code I've been using thus far:
A = structure(list(`Size (µm)` = c(0.85, 0.85, 1.6, 1.6, 2.7, 2.7,
4, 4, 5.85, 5.85, 9, 9, 20, 20), `C/dlogd` = c(0, 70.7482842209313,
70.7482842209313, 147.721992341133, 147.721992341133, 752.133343128365,
752.133343128365, 296.076678012312, 296.076678012312, 226.648066580862,
226.648066580862, 302.286593848111, 302.286593848111, 0), `+s` = c(0,
175.47011068069, 175.47011068069, 243.15534458114, 243.15534458114,
1007.91042406178, 1007.91042406178, 439.475898343651, 439.475898343651,
366.578774657598, 366.578774657598, 385.065980566499, 385.065980566499,
0), `-s` = c(0, -33.9735422388272, -33.9735422388272, 52.2886401011273,
52.2886401011273, 496.356262194949, 496.356262194949, 152.677457680973,
152.677457680973, 86.7173585041254, 86.7173585041254, 219.507207129723,
219.507207129723, 0), Sampling = c("A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A"), Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("21st of February 2019",
"19th of June 2019", "3rd of July 2019", "17th of July 2019"), class = "factor"),
Datenoyear = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("21st of February", "19th of June",
"3rd of July", "17th of July"), class = "factor"), DateNumber = c(21.2,
21.2, 21.2, 21.2, 21.2, 21.2, 21.2, 21.2, 21.2, 21.2, 21.2,
21.2, 21.2, 21.2)), row.names = c(NA, -14L), class = c("tbl_df",
"tbl", "data.frame"))
ggplot(A, aes(x= `Size (µm)`)) +
geom_line(aes(y = `C/dlogd`), size = 1.5) +
geom_line(aes(y = `+s`), linetype = "twodash", color = "red") +
geom_line(aes(y = `-s`), linetype = "dashed" , color = "blue") +
geom_point(aes(y = `+s`), color = "red") +
geom_point(aes(y = `C/dlogd`)) +
geom_point(aes(y = `-s`), color = "blue") +
scale_x_log10(limits = c(0.4,21)) +
#geom_text(label = bquote("D" [g] "= 4.4µm"), x = -.3, y = 900, size = 10) + #gives the second error
#geom_text(label = bquote(D[g]"=4.4µm"), x = -.1, y = 900, size = 10) + #gives the 3rd error
# geom_text(label = bquote("4.4µm"), x = -0.3, y = 900, size = 10) + #No error, but doesn't give me the "D[g] = " I need
theme_bw() +
annotation_logticks(side = "b") +
facet_grid(~Datenoyear) +
xlab(element_blank()) +
ylab(element_blank())
What is it that I'm doing wrong? I've tried looking at these solutions:
http://rchaeology.blogspot.com/2012/11/combining-superscripts-subscripts-and.html
https://community.rstudio.com/t/use-bquote-in-ggplot2-for-subscript-text/40882
writing a label in R for a plot using text, and subscripts using either bquote, paste or expression
But unfortunately none of them have worked. Any help would be appreciated.

Unfortunately I can't tell you what the problem is with your code. But instead of using bquote the desired result can be achieved using as.character(expression(paste(D[g], "=4.4µm"))). Source: see here. Try this:
ggplot(A, aes(x= `Size (µm)`)) +
geom_line(aes(y = `C/dlogd`), size = 1.5) +
geom_line(aes(y = `+s`), linetype = "twodash", color = "red") +
geom_line(aes(y = `-s`), linetype = "dashed" , color = "blue") +
geom_point(aes(y = `+s`), color = "red") +
geom_point(aes(y = `C/dlogd`)) +
geom_point(aes(y = `-s`), color = "blue") +
geom_text(label = as.character(expression(paste(D[g], "=4.4µm"))), parse = TRUE, x = -.1, y = 900, size = 10) + #gives the 3rd error
scale_x_log10(limits = c(0.4, 21)) +
theme_bw() +
annotation_logticks(side = "b") +
facet_grid(~Datenoyear) +
xlab(element_blank()) +
ylab(element_blank())

Related

Colors don't stick when lollipop plot is run

I have created a lollipop chart that I love. However, when the code runs to create the plot, the colors of the lines, segments, and points all change from what they were set to. Everything else runs great, so this isn't the end of the world, but I am trying to stick with a color palette throughout a report.
The colors should be this ("#9a0138", and "#000775" specifically):
But come out like this:
Any ideas?
Here is the data:
TabPercentCompliant <- structure(list(Provider_ShortName = c("ProviderA", "ProviderA", "ProviderA", "ProviderB",
"ProviderB", "ProviderB", "ProviderC", "ProviderC", "ProviderC", "ProviderD"), SubMeasureID = c("AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2"), AdaptedCompliant = c(139, 2, 117, 85, 1, 33, 36, 2, 22, 43), TotalEligible = c(238, 27, 155, 148, 10, 34, 61, 3, 24, 76), PercentCompliant = c(0.584033613445378, 0.0740740740740741, 0.754838709677419, 0.574324324324324, 0.1, 0.970588235294118, 0.590163934426229, 0.666666666666667, 0.916666666666667, 0.565789473684211 ), PercentTotalEligible = c(0.00516358587173479, 0.00058578495183546, 0.00336283953831467, 0.00321096936561659, 0.000216957389568689, 0.000737655124533542, 0.001323440076369, 6.50872168706066e-05, 0.000520697734964853, 0.00164887616072203), ClaimsAdjudicatedThrough = structure(c(19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024 ), class = "Date"), AdaptedNCQAMean = c(0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57), PerformanceLevel = c(0.0140336134453782, -0.315925925925926, -0.175161290322581, 0.00432432432432439, -0.29, 0.0405882352941176, 0.0201639344262295, 0.276666666666667, -0.0133333333333334, -0.00421052631578944)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
VBP_Report_Date = "2022-09-01"
And the code for the plot:
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(x = Provider_ShortName,
y = AdaptedNCQAMean,
group = SubMeasureID,
color = "#9a0138",
size = .001)
) +
geom_point(aes(color = "#000775",
size = (PercentTotalEligible)
)
) +
geom_segment(aes(x = Provider_ShortName,
xend = Provider_ShortName,
y = 0,
yend = PercentCompliant,
color = "#000775")
)+
facet_grid(cols = vars(SubMeasureID),
scales = "fixed",
space = "fixed")+
theme_classic()+
theme(legend.position = "none") +
theme(panel.spacing = unit(.5, "lines"),
panel.border = element_rect(
color = "black",
fill = NA,
linewidth = .5),
panel.grid.major.y = element_line(
color = "gray",
linewidth = .5),
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
color = NULL,
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")
If you have an aesthetic constant, it is often easier / better to have it "outside" your aes call. If you want to have a legend for your color, then you need to keep it "inside", but you will need to manually set the colors with + scale_color/fill_manual.
I've had to cut down quite a lot in your code to make it work. I've also removed bits that are extraneous to the problem. I've removed line size = 0.001 or the line wasn't visible. I've removed the weird filter step or the plot wasn't possible.
Tips: when defining a global aesthetic with ggplot(aes(x = ... etc), you don't need to specify this aesthetic in each geom layer (those aesthetics will be inherited)- makes a more concise / readable code.
library(ggplot2)
ggplot(TabPercentCompliant, aes(x = Provider_ShortName, y = PercentCompliant)) +
geom_line(aes(y = AdaptedNCQAMean, group = SubMeasureID),
color = "#9a0138") +
geom_point(aes(size = PercentTotalEligible), color = "#000775") +
geom_segment(aes(xend = Provider_ShortName, y = 0, yend = PercentCompliant),
color = "#000775") +
facet_grid(~SubMeasureID) +
theme(strip.background = element_rect(color = NULL, fill = "#e1e7fa"))
Here is the final code. Thanks again tjebo!
# Lollipop Chart ----------------------------------------------------------
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(y = AdaptedNCQAMean,
group = SubMeasureID),
color = "#9a0138"
) +
geom_point(aes(size = PercentTotalEligible),
color = "#000775",
) +
geom_segment(aes(xend = Provider_ShortName,
y = 0,
yend = PercentCompliant),
color = "#000775"
)+
facet_grid(cols = vars(SubMeasureID)
)+
theme_bw()+
theme(legend.position = "none",
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")

R: How to add vertical line (geom_vline) on plotly (ts_plot)?

I have weekly time series for 2 variables. Now I need to add 2 vertical lines (geom_vline) at "2019-11-10" and "2020-04-26 date points.
structure(list(week = structure(c(18119, 18126, 18133, 18140,
18147, 18154, 18161, 18168, 18175, 18182, 18189, 18196, 18203,
18210, 18217, 18224, 18231, 18238, 18245, 18252, 18259, 18266,
18273, 18280, 18287, 18294, 18301, 18308, 18315, 18322, 18329,
18336, 18343, 18350, 18357, 18364, 18371, 18378, 18385, 18392,
18399, 18406, 18413, 18420, 18427, 18434, 18441, 18448, 18455,
18462, 18469, 18476, 18483, 18490, 18497, 18504, 18511, 18518,
18525, 18532, 18539, 18546, 18553, 18560, 18567, 18574, 18581,
18588, 18595, 18602, 18609, 18616, 18623, 18630, 18637, 18644,
18651, 18658, 18665, 18672, 18679, 18686, 18693, 18700, 18707,
18714, 18721, 18728, 18735, 18742, 18749, 18756, 18763, 18770,
18777, 18784, 18791, 18798, 18805, 18812, 18819, 18826, 18833,
18840, 18847, 18854, 18861, 18868, 18875, 18882, 18889, 18896,
18903, 18910, 18917, 18924, 18931, 18938, 18945, 18952, 18959,
18966, 18973, 18980, 18987, 18994, 19001, 19008, 19015, 19022,
19029, 19036, 19043, 19050, 19057, 19064, 19071, 19078, 19085,
19092, 19099, 19106, 19113, 19120, 19127, 19134, 19141, 19148,
19155, 19162, 19169, 19176, 19183, 19190, 19197, 19204, 19211,
19218, 19225, 19232, 19239, 19246, 19253, 19260, 19267), class = "Date"),
X = c(6L, 104L, 123L, 82L, 67L, 108L, 89L, 1153L, 311L, 346L,
220L, 219L, 184L, 257L, 585L, 342L, 197L, 184L, 351L, 278L,
120L, 204L, 206L, 146L, 216L, 186L, 171L, 200L, 198L, 170L,
192L, 139L, 136L, 414L, 256L, 306L, 281L, 221L, 271L, 311L,
315L, 277L, 342L, 394L, 493L, 855L, 1617L, 954L, 1143L, 1031L,
692L, 380L, 378L, 493L, 381L, 706L, 546L, 653L, 447L, 1004L,
499L, 442L, 926L, 564L, 568L, 755L, 581L, 572L, 780L, 601L,
739L, 563L, 300L, 454L, 727L, 733L, 673L, 648L, 614L, 754L,
827L, 719L, 874L, 692L, 752L, 536L, 658L, 817L, 913L, 813L,
844L, 811L, 977L, 877L, 818L, 673L, 1419L, 809L, 818L, 709L,
577L, 802L, 508L, 536L, 663L, 782L, 634L, 665L, 583L, 685L,
908L, 1013L, 903L, 965L, 981L, 1030L, 1205L, 1197L, 956L,
936L, 901L, 707L, 565L, 384L, 341L, 529L, 510L, 597L, 610L,
587L, 715L, 777L, 638L, 619L, 617L, 677L, 1118L, 1173L, 1025L,
655L, 1006L, 1129L, 811L, 773L, 796L, 993L, 891L, 900L, 1072L,
1182L, 947L, 743L, 759L, 616L, 584L, 613L, 597L, 734L, 671L,
1119L, 906L, 825L, 1109L, 1085L, 913L), Y = c(0L, 23L, 50L,
35L, 38L, 38L, 43L, 173L, 128L, 134L, 115L, 103L, 104L, 122L,
168L, 186L, 158L, 136L, 122L, 91L, 87L, 73L, 105L, 97L, 96L,
84L, 121L, 108L, 83L, 106L, 108L, 107L, 96L, 151L, 128L,
135L, 148L, 133L, 114L, 150L, 175L, 172L, 182L, 185L, 181L,
224L, 451L, 369L, 399L, 377L, 325L, 259L, 245L, 268L, 284L,
338L, 375L, 383L, 307L, 318L, 341L, 290L, 401L, 333L, 336L,
404L, 406L, 333L, 372L, 372L, 388L, 236L, 162L, 264L, 378L,
390L, 350L, 327L, 341L, 359L, 462L, 413L, 421L, 400L, 424L,
283L, 377L, 459L, 478L, 482L, 411L, 416L, 522L, 557L, 502L,
479L, 631L, 533L, 514L, 442L, 477L, 432L, 378L, 365L, 443L,
407L, 384L, 451L, 381L, 435L, 603L, 612L, 685L, 567L, 603L,
576L, 564L, 643L, 671L, 722L, 695L, 463L, 328L, 228L, 160L,
235L, 282L, 346L, 332L, 340L, 396L, 433L, 354L, 356L, 355L,
367L, 462L, 445L, 454L, 355L, 566L, 628L, 491L, 517L, 429L,
569L, 595L, 520L, 601L, 646L, 630L, 590L, 513L, 465L, 390L,
356L, 356L, 417L, 420L, 443L, 450L, 482L, 540L, 611L, 572L
)), row.names = c(NA, -165L), class = c("tbl_df", "tbl",
"data.frame"))
Firstly I convert it into xts object and next use ts_plot() for visualization
#Create xts object
df.xts <- xts(df[, 2:3], order.by = df.w$week)
#plot
library(TSstudio)
ts_plot(df.xts,
slider = TRUE)
I tried the following codes, but it give me the "NULL" in the output
+ geom_vline(xintercept = as.Date("2020-03-01"))
or
+ geom_vline(xintercept = as.POSIXct("2020-03-01"))
Edit
You could use the add_segments function from plotly. You could also add multiple lines by specifying vectors like this:
library(xts)
library(TSstudio)
library(dplyr)
library(plotly)
#Create xts object
df.xts <- xts(df[, 2:3], order.by = df$week)
# plot
ts_plot(df.xts) %>%
add_segments(y = c(0, 0, 0),
x = as.Date(c("2020-03-01", "2020-06-01", "2020-08-01")),
yend = c(1500, 1500, 1500),
xend = as.Date(c("2020-03-01", "2020-06-01", "2020-08-01")),
color = c('red', 'green', 'purple'),
showlegend = FALSE)
Created on 2022-10-12 with reprex v2.0.2
Because you are using xts data format, you can use the function addEventLines to add some vertical lines. Here is a reproducible example:
library(xts)
library(rtweet)
#Create xts object
df.xts <- xts(df[, 2:3], order.by = df$week)
# plot
plot(df.xts)
addEventLines(xts('', as.Date("2020-03-01")), pos = 2, srt = 90, col = 'blue')
Created on 2022-10-11 with reprex v2.0.2

Multi-row x-axis labels with breaks in R

I would like to add 2-row x-axis labels on my line plot, but not sure how to also incorporate the continuous labels and breaks I have for my 1st x-axis (Column "CYR" - short for calendar year). I'd like to have the 2nd axis (WYR) start half-way between the first label and the second (WYR = 2010 starts between CYR = 2009 -> 2010). I also wasn't sure how to add 2 x-axis titles either, maybe at the beginning of each x-axis row?
My data:
> dput(wet_pivot)
structure(list(WYR = c("WR_2010", "WR_2011", "WR_2012", "WR_2013",
"WR_2014", "WR_2015", "WR_2016", "WR_2017", "WR_2018", "WR_2019",
"WR_2020", "WR_2021", "WR_2022"), CYR = c(2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021), Season = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("DRY",
"WET"), class = "factor"), N = c(59L, 63L, 69L, 70L, 72L, 71L,
71L, 72L, 71L, 68L, 70L, 48L, 72L), n_mean = c(0.00696806934430411,
0.000649730847004026, 0.00288256551918419, 0.01141088388474,
0.000536174103147671, 0.00349584646220785, 0.000482925207291882,
0.00245359625194744, 0.00292096956686587, 0.00252817293686805,
0.00196286772014134, 0.00501799463867351, 0.00132244297252478
), n_median = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), sd = c(0.030946706350869,
0.00248965525641742, 0.0100973832581282, 0.051577934580242, 0.00331468784320076,
0.0266064084754242, 0.00212505905295283, 0.00675243933898364,
0.0119729983336735, 0.00639785127193391, 0.00930625647382774,
0.0136275258272549, 0.00543420856675111), se = c(0.00402891799826298,
0.000313667078988821, 0.00121558209746373, 0.0061647423020683,
0.000390639708573979, 0.00315759975690469, 0.000252198110662322,
0.000795782607691024, 0.00142093348159893, 0.000775853428563995,
0.00111231039833223, 0.00196696392618855, 0.000640427621321956
)), row.names = c(NA, -13L), class = "data.frame")
My attempt:
years <- seq(2009,2021,1)
labs <- seq(2009,2021,by=1)
myplot <- ggplot(wet_pivot, aes(x = CYR, y = n_mean)) +
geom_errorbar(aes(ymin=n_mean-se, ymax=n_mean+se), width=.2, color = "black") +
geom_point(color = "black", shape = 1, size = 2) +
geom_line(color = "black") +
scale_y_continuous(limits = c(0, 0.04), expand = expansion(mult = c(0, 0.05))) +
scale_x_continuous(breaks= years, labels = labs)
myplot +
annotate(geom = "text",
x = 1:nrow(wet_pivot),
y = min(wet_pivot$n_mean),
label = labs,
vjust = 3.5) +
annotate(geom = "text",
x = 1:nrow(wet_pivot),
y = min(wet_pivot$n_mean),
label = wet_pivot$WYR,
vjust = 5)
You indeed can use text annotations to substitute for x-axis labels. A few recommendations:
Set y = -Inf to automatically place text as bottom, independent of whatever data is on the plot. vjust can indeed be used to place it further down.
You'd need coord_cartesian(clip = "off") to actually show the text.
You can place 'titles' with an extra annotation layer, with x = -Inf to place it on the left.
I used the above for the example below. Maybe the text is still to big, so you could set the 8.8 / .pt to something smaller. (The / .pt translates between mm, which geom_text() uses, to points, which is used in theme)
library(ggplot2)
# wet_pivot <- structure(...) # omitted for previty
ggplot(wet_pivot, aes(x = CYR, y = n_mean)) +
geom_errorbar(aes(ymin=n_mean-se, ymax=n_mean+se), width=.2, color = "black") +
geom_point(color = "black", shape = 1, size = 2) +
geom_line(color = "black") +
scale_y_continuous(limits = c(0, 0.04), expand = expansion(mult = c(0, 0.05))) +
scale_x_continuous(breaks= years, labels = ~ rep("", length(.x))) +
annotate(geom = "text",
x = wet_pivot$CYR,
y = -Inf,
label = labs,
size = 8.8 / .pt,
vjust = 2.5) +
annotate(geom = "text",
x = wet_pivot$CYR,
y = -Inf,
label = wet_pivot$WYR,
size = 8.8 / .pt,
vjust = 4) +
# Titles
annotate(geom = "text",
x = -Inf,
y = -Inf,
label = c("CYR", "WYR"),
vjust = c(2.5, 4), hjust = 1,
size = 8.8 / .pt
) +
coord_cartesian(clip = "off") +
theme(
# Make extra space between axis ticks and axis title
axis.text.x.bottom = element_text(margin = margin(t = 8.8, b = 8.8))
)
Created on 2022-05-19 by the reprex package (v2.0.1)

Remove certain observations based on two criteria

all the rows that have observations for the same date and Key are marked.
#Data
Key Date Value ...other columns
C 2000-04 0.55 name1 x1 <-
C 2000-04 0.60 name2 x2 <-
C 2000-05 1.2
A 2001-06 4
A 2001-07 5
A 2002-08 2
Ist there an easy way to remove those observations instead of summing them up?
Thanks a lot! :)
Just use duplicated twice, one with the default fromLast = FALSE and the other setting fromLast = TRUE.
Then, negate that result index.
inx <- duplicated(Data[c('Key', 'Date')]) | duplicated(Data[c('Key', 'Date')], fromLast = TRUE)
Data[!inx, ]
# Key Date Value
#3 C 2000-05 1.2
#4 A 2001-06 4.0
#5 A 2001-07 5.0
#6 A 2002-08 2.0
Data.
Data <-
structure(list(Key = structure(c(2L, 2L, 2L, 1L, 1L, 1L), .Label = c("A",
"C"), class = "factor"), Date = structure(c(1L, 1L, 2L, 3L, 4L,
5L), .Label = c("2000-04", "2000-05", "2001-06", "2001-07", "2002-08"
), class = "factor"), Value = c(0.55, 0.6, 1.2, 4, 5, 2)), class = "data.frame", row.names = c(NA,
-6L))
I am not sure whether I understand correctly. If yes, a very simple answer is unique() function.
First, you need to have very strong reason to remove these duplications.
Key <- c("C", "C", "C", "A", "A", "A", "C", "C", "C")
Value <- c(1, 2, 1, 2, 1, 2, 1, 2, 1)
df <- data.frame(Key, Value)
unique(df)

How to create multiple scatterplots & color them differently based on Day-type(as.factors)

I used sqldf function to join two tables & create a final TABLE to make the scatterplot. My final table has 6 variables as follows:-
'data.frame': 11520 obs. of 6 variables:
`$ DATE : chr "01/01/2013" "01/01/2013" "01/01/2013" "01/01/2013" ...
`$ HOUR_NUM : int 1 2 3 4 5 6 7 8 9 10 ...'
`$ CONGESTION_ZONE_CD: Factor w/ 4 levels "H","N","S","W": 1 1 1 1 1 1 1 1 1 1 ...'
`$ DAY_TYPE_CD : Factor w/ 2 levels "WD","WE": 1 1 1 1 1 1 1 1 1 1 ...'
`$ LOAD : num 182 171 158 147 141 ...'
`$ AVG_TEMP : num 66.3 65.9 66.3 65 62.9 61.4 58.3 56.7 55.5 54.3 ...'
i used the simple plot function to get the scatterplot.
`TOTAL_LOAD = WE_TABLE$LOAD'
`TEMP = WE_TABLE$AVG_TEMP
`plot(TEMP, TOTAL_LOAD, col="blue")
Works perfect! Now, i am using sqldf again to make a subset where CONGESTION_ZONE_CD='H' & rewriting that coad again for diff table(H). same thing for North, south & west too. So, basically creating 4 subset table & rewriting the same code. My first question is:'
1) Can i just output the scatterplots for all four different types of zone by some functions at once? ( i read a lot about 'by' functions but couldn't wrap my head around it to perform this).
Now, for the second part, as i make scatterplot for each zone, i know i have both Weekdays(WD) and weekend(WE) in there. but,
2.) Is there a way i could color code my scatterplot based on WD & WE (DAY_TYPE_CD as factors) (let's say blue for WD & red for WE)?
Really like to thank you guys in advance! Since, i am still a beginner in R & i have checked questions on scatterplots before i post it here. i have a feeling this might have a simple solution that i am not aware of.
Thanks again.
dput(Table)
structure(list(DATE = c("01/01/2013", "01/01/2013", "01/01/2013",
"01/01/2013", "01/01/2013", "01/01/2013", "01/01/2013", "01/01/2013",
"01/01/2013", "01/01/2013"), HOUR_NUM = 1:10, CONGESTION_ZONE_CD = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("H", "N", "S",
"W"), class = "factor"), DAY_TYPE_CD = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("WD", "WE"), class = "factor"),
LOAD = c(181.801152, 170.512058, 157.95493, 147.299889, 140.645532,
139.216646, 141.670543, 149.122035, 160.893591, 181.996018
), AVG_TEMP = c(66.3, 65.9, 66.3, 65, 62.9, 61.4, 58.3, 56.7,
55.5, 54.3)), .Names = c("DATE", "HOUR_NUM", "CONGESTION_ZONE_CD",
"DAY_TYPE_CD", "LOAD", "AVG_TEMP"), row.names = c(NA, 10L), class = "data.frame")
Much better to use ggplot for this:
# not tested...
library(ggplot2)
# all on one plot
ggplot(df) + geom_point(aes(x=AVG_TEMP,y=TOTAL_LOAD,color=CONGESTION_ZONE))
#four plots
ggplot(df) + geom_point(aes(x=AVG_TEMP,y=TOTAL_LOAD)) +
facet_wrap(~CONGESTION_ZONE)
# coloring beased on day_type
ggplot(df) + geom_point(aes(x=AVG_TEMP,y=TOTAL_LOAD, color=DAY_TYPE_CD))+
facet_wrap(~CONGESTION_ZONE)