I have created a lollipop chart that I love. However, when the code runs to create the plot, the colors of the lines, segments, and points all change from what they were set to. Everything else runs great, so this isn't the end of the world, but I am trying to stick with a color palette throughout a report.
The colors should be this ("#9a0138", and "#000775" specifically):
But come out like this:
Any ideas?
Here is the data:
TabPercentCompliant <- structure(list(Provider_ShortName = c("ProviderA", "ProviderA", "ProviderA", "ProviderB",
"ProviderB", "ProviderB", "ProviderC", "ProviderC", "ProviderC", "ProviderD"), SubMeasureID = c("AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2"), AdaptedCompliant = c(139, 2, 117, 85, 1, 33, 36, 2, 22, 43), TotalEligible = c(238, 27, 155, 148, 10, 34, 61, 3, 24, 76), PercentCompliant = c(0.584033613445378, 0.0740740740740741, 0.754838709677419, 0.574324324324324, 0.1, 0.970588235294118, 0.590163934426229, 0.666666666666667, 0.916666666666667, 0.565789473684211 ), PercentTotalEligible = c(0.00516358587173479, 0.00058578495183546, 0.00336283953831467, 0.00321096936561659, 0.000216957389568689, 0.000737655124533542, 0.001323440076369, 6.50872168706066e-05, 0.000520697734964853, 0.00164887616072203), ClaimsAdjudicatedThrough = structure(c(19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024 ), class = "Date"), AdaptedNCQAMean = c(0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57), PerformanceLevel = c(0.0140336134453782, -0.315925925925926, -0.175161290322581, 0.00432432432432439, -0.29, 0.0405882352941176, 0.0201639344262295, 0.276666666666667, -0.0133333333333334, -0.00421052631578944)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
VBP_Report_Date = "2022-09-01"
And the code for the plot:
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(x = Provider_ShortName,
y = AdaptedNCQAMean,
group = SubMeasureID,
color = "#9a0138",
size = .001)
) +
geom_point(aes(color = "#000775",
size = (PercentTotalEligible)
)
) +
geom_segment(aes(x = Provider_ShortName,
xend = Provider_ShortName,
y = 0,
yend = PercentCompliant,
color = "#000775")
)+
facet_grid(cols = vars(SubMeasureID),
scales = "fixed",
space = "fixed")+
theme_classic()+
theme(legend.position = "none") +
theme(panel.spacing = unit(.5, "lines"),
panel.border = element_rect(
color = "black",
fill = NA,
linewidth = .5),
panel.grid.major.y = element_line(
color = "gray",
linewidth = .5),
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
color = NULL,
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")
If you have an aesthetic constant, it is often easier / better to have it "outside" your aes call. If you want to have a legend for your color, then you need to keep it "inside", but you will need to manually set the colors with + scale_color/fill_manual.
I've had to cut down quite a lot in your code to make it work. I've also removed bits that are extraneous to the problem. I've removed line size = 0.001 or the line wasn't visible. I've removed the weird filter step or the plot wasn't possible.
Tips: when defining a global aesthetic with ggplot(aes(x = ... etc), you don't need to specify this aesthetic in each geom layer (those aesthetics will be inherited)- makes a more concise / readable code.
library(ggplot2)
ggplot(TabPercentCompliant, aes(x = Provider_ShortName, y = PercentCompliant)) +
geom_line(aes(y = AdaptedNCQAMean, group = SubMeasureID),
color = "#9a0138") +
geom_point(aes(size = PercentTotalEligible), color = "#000775") +
geom_segment(aes(xend = Provider_ShortName, y = 0, yend = PercentCompliant),
color = "#000775") +
facet_grid(~SubMeasureID) +
theme(strip.background = element_rect(color = NULL, fill = "#e1e7fa"))
Here is the final code. Thanks again tjebo!
# Lollipop Chart ----------------------------------------------------------
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(y = AdaptedNCQAMean,
group = SubMeasureID),
color = "#9a0138"
) +
geom_point(aes(size = PercentTotalEligible),
color = "#000775",
) +
geom_segment(aes(xend = Provider_ShortName,
y = 0,
yend = PercentCompliant),
color = "#000775"
)+
facet_grid(cols = vars(SubMeasureID)
)+
theme_bw()+
theme(legend.position = "none",
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")
I have weekly time series for 2 variables. Now I need to add 2 vertical lines (geom_vline) at "2019-11-10" and "2020-04-26 date points.
structure(list(week = structure(c(18119, 18126, 18133, 18140,
18147, 18154, 18161, 18168, 18175, 18182, 18189, 18196, 18203,
18210, 18217, 18224, 18231, 18238, 18245, 18252, 18259, 18266,
18273, 18280, 18287, 18294, 18301, 18308, 18315, 18322, 18329,
18336, 18343, 18350, 18357, 18364, 18371, 18378, 18385, 18392,
18399, 18406, 18413, 18420, 18427, 18434, 18441, 18448, 18455,
18462, 18469, 18476, 18483, 18490, 18497, 18504, 18511, 18518,
18525, 18532, 18539, 18546, 18553, 18560, 18567, 18574, 18581,
18588, 18595, 18602, 18609, 18616, 18623, 18630, 18637, 18644,
18651, 18658, 18665, 18672, 18679, 18686, 18693, 18700, 18707,
18714, 18721, 18728, 18735, 18742, 18749, 18756, 18763, 18770,
18777, 18784, 18791, 18798, 18805, 18812, 18819, 18826, 18833,
18840, 18847, 18854, 18861, 18868, 18875, 18882, 18889, 18896,
18903, 18910, 18917, 18924, 18931, 18938, 18945, 18952, 18959,
18966, 18973, 18980, 18987, 18994, 19001, 19008, 19015, 19022,
19029, 19036, 19043, 19050, 19057, 19064, 19071, 19078, 19085,
19092, 19099, 19106, 19113, 19120, 19127, 19134, 19141, 19148,
19155, 19162, 19169, 19176, 19183, 19190, 19197, 19204, 19211,
19218, 19225, 19232, 19239, 19246, 19253, 19260, 19267), class = "Date"),
X = c(6L, 104L, 123L, 82L, 67L, 108L, 89L, 1153L, 311L, 346L,
220L, 219L, 184L, 257L, 585L, 342L, 197L, 184L, 351L, 278L,
120L, 204L, 206L, 146L, 216L, 186L, 171L, 200L, 198L, 170L,
192L, 139L, 136L, 414L, 256L, 306L, 281L, 221L, 271L, 311L,
315L, 277L, 342L, 394L, 493L, 855L, 1617L, 954L, 1143L, 1031L,
692L, 380L, 378L, 493L, 381L, 706L, 546L, 653L, 447L, 1004L,
499L, 442L, 926L, 564L, 568L, 755L, 581L, 572L, 780L, 601L,
739L, 563L, 300L, 454L, 727L, 733L, 673L, 648L, 614L, 754L,
827L, 719L, 874L, 692L, 752L, 536L, 658L, 817L, 913L, 813L,
844L, 811L, 977L, 877L, 818L, 673L, 1419L, 809L, 818L, 709L,
577L, 802L, 508L, 536L, 663L, 782L, 634L, 665L, 583L, 685L,
908L, 1013L, 903L, 965L, 981L, 1030L, 1205L, 1197L, 956L,
936L, 901L, 707L, 565L, 384L, 341L, 529L, 510L, 597L, 610L,
587L, 715L, 777L, 638L, 619L, 617L, 677L, 1118L, 1173L, 1025L,
655L, 1006L, 1129L, 811L, 773L, 796L, 993L, 891L, 900L, 1072L,
1182L, 947L, 743L, 759L, 616L, 584L, 613L, 597L, 734L, 671L,
1119L, 906L, 825L, 1109L, 1085L, 913L), Y = c(0L, 23L, 50L,
35L, 38L, 38L, 43L, 173L, 128L, 134L, 115L, 103L, 104L, 122L,
168L, 186L, 158L, 136L, 122L, 91L, 87L, 73L, 105L, 97L, 96L,
84L, 121L, 108L, 83L, 106L, 108L, 107L, 96L, 151L, 128L,
135L, 148L, 133L, 114L, 150L, 175L, 172L, 182L, 185L, 181L,
224L, 451L, 369L, 399L, 377L, 325L, 259L, 245L, 268L, 284L,
338L, 375L, 383L, 307L, 318L, 341L, 290L, 401L, 333L, 336L,
404L, 406L, 333L, 372L, 372L, 388L, 236L, 162L, 264L, 378L,
390L, 350L, 327L, 341L, 359L, 462L, 413L, 421L, 400L, 424L,
283L, 377L, 459L, 478L, 482L, 411L, 416L, 522L, 557L, 502L,
479L, 631L, 533L, 514L, 442L, 477L, 432L, 378L, 365L, 443L,
407L, 384L, 451L, 381L, 435L, 603L, 612L, 685L, 567L, 603L,
576L, 564L, 643L, 671L, 722L, 695L, 463L, 328L, 228L, 160L,
235L, 282L, 346L, 332L, 340L, 396L, 433L, 354L, 356L, 355L,
367L, 462L, 445L, 454L, 355L, 566L, 628L, 491L, 517L, 429L,
569L, 595L, 520L, 601L, 646L, 630L, 590L, 513L, 465L, 390L,
356L, 356L, 417L, 420L, 443L, 450L, 482L, 540L, 611L, 572L
)), row.names = c(NA, -165L), class = c("tbl_df", "tbl",
"data.frame"))
Firstly I convert it into xts object and next use ts_plot() for visualization
#Create xts object
df.xts <- xts(df[, 2:3], order.by = df.w$week)
#plot
library(TSstudio)
ts_plot(df.xts,
slider = TRUE)
I tried the following codes, but it give me the "NULL" in the output
+ geom_vline(xintercept = as.Date("2020-03-01"))
or
+ geom_vline(xintercept = as.POSIXct("2020-03-01"))
Edit
You could use the add_segments function from plotly. You could also add multiple lines by specifying vectors like this:
library(xts)
library(TSstudio)
library(dplyr)
library(plotly)
#Create xts object
df.xts <- xts(df[, 2:3], order.by = df$week)
# plot
ts_plot(df.xts) %>%
add_segments(y = c(0, 0, 0),
x = as.Date(c("2020-03-01", "2020-06-01", "2020-08-01")),
yend = c(1500, 1500, 1500),
xend = as.Date(c("2020-03-01", "2020-06-01", "2020-08-01")),
color = c('red', 'green', 'purple'),
showlegend = FALSE)
Created on 2022-10-12 with reprex v2.0.2
Because you are using xts data format, you can use the function addEventLines to add some vertical lines. Here is a reproducible example:
library(xts)
library(rtweet)
#Create xts object
df.xts <- xts(df[, 2:3], order.by = df$week)
# plot
plot(df.xts)
addEventLines(xts('', as.Date("2020-03-01")), pos = 2, srt = 90, col = 'blue')
Created on 2022-10-11 with reprex v2.0.2
I would like to add 2-row x-axis labels on my line plot, but not sure how to also incorporate the continuous labels and breaks I have for my 1st x-axis (Column "CYR" - short for calendar year). I'd like to have the 2nd axis (WYR) start half-way between the first label and the second (WYR = 2010 starts between CYR = 2009 -> 2010). I also wasn't sure how to add 2 x-axis titles either, maybe at the beginning of each x-axis row?
My data:
> dput(wet_pivot)
structure(list(WYR = c("WR_2010", "WR_2011", "WR_2012", "WR_2013",
"WR_2014", "WR_2015", "WR_2016", "WR_2017", "WR_2018", "WR_2019",
"WR_2020", "WR_2021", "WR_2022"), CYR = c(2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021), Season = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("DRY",
"WET"), class = "factor"), N = c(59L, 63L, 69L, 70L, 72L, 71L,
71L, 72L, 71L, 68L, 70L, 48L, 72L), n_mean = c(0.00696806934430411,
0.000649730847004026, 0.00288256551918419, 0.01141088388474,
0.000536174103147671, 0.00349584646220785, 0.000482925207291882,
0.00245359625194744, 0.00292096956686587, 0.00252817293686805,
0.00196286772014134, 0.00501799463867351, 0.00132244297252478
), n_median = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), sd = c(0.030946706350869,
0.00248965525641742, 0.0100973832581282, 0.051577934580242, 0.00331468784320076,
0.0266064084754242, 0.00212505905295283, 0.00675243933898364,
0.0119729983336735, 0.00639785127193391, 0.00930625647382774,
0.0136275258272549, 0.00543420856675111), se = c(0.00402891799826298,
0.000313667078988821, 0.00121558209746373, 0.0061647423020683,
0.000390639708573979, 0.00315759975690469, 0.000252198110662322,
0.000795782607691024, 0.00142093348159893, 0.000775853428563995,
0.00111231039833223, 0.00196696392618855, 0.000640427621321956
)), row.names = c(NA, -13L), class = "data.frame")
My attempt:
years <- seq(2009,2021,1)
labs <- seq(2009,2021,by=1)
myplot <- ggplot(wet_pivot, aes(x = CYR, y = n_mean)) +
geom_errorbar(aes(ymin=n_mean-se, ymax=n_mean+se), width=.2, color = "black") +
geom_point(color = "black", shape = 1, size = 2) +
geom_line(color = "black") +
scale_y_continuous(limits = c(0, 0.04), expand = expansion(mult = c(0, 0.05))) +
scale_x_continuous(breaks= years, labels = labs)
myplot +
annotate(geom = "text",
x = 1:nrow(wet_pivot),
y = min(wet_pivot$n_mean),
label = labs,
vjust = 3.5) +
annotate(geom = "text",
x = 1:nrow(wet_pivot),
y = min(wet_pivot$n_mean),
label = wet_pivot$WYR,
vjust = 5)
You indeed can use text annotations to substitute for x-axis labels. A few recommendations:
Set y = -Inf to automatically place text as bottom, independent of whatever data is on the plot. vjust can indeed be used to place it further down.
You'd need coord_cartesian(clip = "off") to actually show the text.
You can place 'titles' with an extra annotation layer, with x = -Inf to place it on the left.
I used the above for the example below. Maybe the text is still to big, so you could set the 8.8 / .pt to something smaller. (The / .pt translates between mm, which geom_text() uses, to points, which is used in theme)
library(ggplot2)
# wet_pivot <- structure(...) # omitted for previty
ggplot(wet_pivot, aes(x = CYR, y = n_mean)) +
geom_errorbar(aes(ymin=n_mean-se, ymax=n_mean+se), width=.2, color = "black") +
geom_point(color = "black", shape = 1, size = 2) +
geom_line(color = "black") +
scale_y_continuous(limits = c(0, 0.04), expand = expansion(mult = c(0, 0.05))) +
scale_x_continuous(breaks= years, labels = ~ rep("", length(.x))) +
annotate(geom = "text",
x = wet_pivot$CYR,
y = -Inf,
label = labs,
size = 8.8 / .pt,
vjust = 2.5) +
annotate(geom = "text",
x = wet_pivot$CYR,
y = -Inf,
label = wet_pivot$WYR,
size = 8.8 / .pt,
vjust = 4) +
# Titles
annotate(geom = "text",
x = -Inf,
y = -Inf,
label = c("CYR", "WYR"),
vjust = c(2.5, 4), hjust = 1,
size = 8.8 / .pt
) +
coord_cartesian(clip = "off") +
theme(
# Make extra space between axis ticks and axis title
axis.text.x.bottom = element_text(margin = margin(t = 8.8, b = 8.8))
)
Created on 2022-05-19 by the reprex package (v2.0.1)
I used sqldf function to join two tables & create a final TABLE to make the scatterplot. My final table has 6 variables as follows:-
'data.frame': 11520 obs. of 6 variables:
`$ DATE : chr "01/01/2013" "01/01/2013" "01/01/2013" "01/01/2013" ...
`$ HOUR_NUM : int 1 2 3 4 5 6 7 8 9 10 ...'
`$ CONGESTION_ZONE_CD: Factor w/ 4 levels "H","N","S","W": 1 1 1 1 1 1 1 1 1 1 ...'
`$ DAY_TYPE_CD : Factor w/ 2 levels "WD","WE": 1 1 1 1 1 1 1 1 1 1 ...'
`$ LOAD : num 182 171 158 147 141 ...'
`$ AVG_TEMP : num 66.3 65.9 66.3 65 62.9 61.4 58.3 56.7 55.5 54.3 ...'
i used the simple plot function to get the scatterplot.
`TOTAL_LOAD = WE_TABLE$LOAD'
`TEMP = WE_TABLE$AVG_TEMP
`plot(TEMP, TOTAL_LOAD, col="blue")
Works perfect! Now, i am using sqldf again to make a subset where CONGESTION_ZONE_CD='H' & rewriting that coad again for diff table(H). same thing for North, south & west too. So, basically creating 4 subset table & rewriting the same code. My first question is:'
1) Can i just output the scatterplots for all four different types of zone by some functions at once? ( i read a lot about 'by' functions but couldn't wrap my head around it to perform this).
Now, for the second part, as i make scatterplot for each zone, i know i have both Weekdays(WD) and weekend(WE) in there. but,
2.) Is there a way i could color code my scatterplot based on WD & WE (DAY_TYPE_CD as factors) (let's say blue for WD & red for WE)?
Really like to thank you guys in advance! Since, i am still a beginner in R & i have checked questions on scatterplots before i post it here. i have a feeling this might have a simple solution that i am not aware of.
Thanks again.
dput(Table)
structure(list(DATE = c("01/01/2013", "01/01/2013", "01/01/2013",
"01/01/2013", "01/01/2013", "01/01/2013", "01/01/2013", "01/01/2013",
"01/01/2013", "01/01/2013"), HOUR_NUM = 1:10, CONGESTION_ZONE_CD = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("H", "N", "S",
"W"), class = "factor"), DAY_TYPE_CD = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("WD", "WE"), class = "factor"),
LOAD = c(181.801152, 170.512058, 157.95493, 147.299889, 140.645532,
139.216646, 141.670543, 149.122035, 160.893591, 181.996018
), AVG_TEMP = c(66.3, 65.9, 66.3, 65, 62.9, 61.4, 58.3, 56.7,
55.5, 54.3)), .Names = c("DATE", "HOUR_NUM", "CONGESTION_ZONE_CD",
"DAY_TYPE_CD", "LOAD", "AVG_TEMP"), row.names = c(NA, 10L), class = "data.frame")
Much better to use ggplot for this:
# not tested...
library(ggplot2)
# all on one plot
ggplot(df) + geom_point(aes(x=AVG_TEMP,y=TOTAL_LOAD,color=CONGESTION_ZONE))
#four plots
ggplot(df) + geom_point(aes(x=AVG_TEMP,y=TOTAL_LOAD)) +
facet_wrap(~CONGESTION_ZONE)
# coloring beased on day_type
ggplot(df) + geom_point(aes(x=AVG_TEMP,y=TOTAL_LOAD, color=DAY_TYPE_CD))+
facet_wrap(~CONGESTION_ZONE)