How do I set legend element titles? - ggplot2

I am using code from this biostars post to get myself more acquainted with creating plots in ggplot. I am a bit stuck on setting the legend variables though
Is there a way to set the colour and control the number of breaks/dots in the legend (under numDEInCat)
term <-c("snoRNA binding", "preprophase band", "kinesin complex", "microtubule motor activity", "DNA replication")
fc <-runif(5, 1.00, 5.00)
padj_BH <-runif(5, 0.001, 0.05)
numDEInCat <-runif(5, 30, 300)
ggdata <- data.frame(term,fc,padj_BH, numDEInCat)
gg1 <- ggplot(ggdata,
aes(x = term, y = fc, size = numDEInCat, color = padj_BH)) +
expand_limits(y = 1) +
geom_point(shape = 16,inherit.aes = T) +
scale_size(range = c(2.5,12.5)) +
scale_color_gradient(low= "#ff0303",
high="#1e00b3")+ #colour for p value
xlab('') + ylab('Fold Enrichment') + #lavel fold enrichment axis
labs(
title = "Gene Ontology all",
subtitle = 'BH-adjusted',
caption = '',
color="Adjusted P-value", #label the aacolor
size="count") + #label dot size
theme_bw(base_size = 24) +
theme(
legend.position = 'right',
legend.background = element_rect(),
plot.title = element_text(angle = 0, size = 16, face = 'bold', vjust = 1),
plot.subtitle = element_text(angle = 0, size = 14, face = 'bold', vjust = 1),
plot.caption = element_text(angle = 0, size = 12, face = 'bold', vjust = 1),
axis.text.x = element_text(angle = 0, size = 12, face = 'bold', hjust = 1.10),
axis.text.y = element_text(angle = 0, size = 12, face = 'bold', vjust = 0.5),
axis.title = element_text(size = 12, face = 'bold'),
axis.title.x = element_text(size = 12, face = 'bold'),
axis.title.y = element_text(size = 12, face = 'bold'),
axis.line = element_line(colour = 'black'),
#Legend
legend.key = element_blank(), # removes the border
legend.key.size = unit(1, "cm"), # Sets overall area/size of the legend
legend.text = element_text(size = 14, face = "bold"), # Text size
title = element_text(size = 14, face = "bold")) +
coord_flip()
gg1

I think what you're looking for are guides(size = guide_legend(override.aes(BLABLA))) and scale_size(breaks = c(BLABLA))
gg1 +
guides(size = guide_legend(override.aes = list(colour = "red"))) +
scale_size(limits = c(1, 1000), breaks = c(10, 500, 1000))
Created on 2021-11-18 by the reprex package (v2.0.1)

Related

Colors don't stick when lollipop plot is run

I have created a lollipop chart that I love. However, when the code runs to create the plot, the colors of the lines, segments, and points all change from what they were set to. Everything else runs great, so this isn't the end of the world, but I am trying to stick with a color palette throughout a report.
The colors should be this ("#9a0138", and "#000775" specifically):
But come out like this:
Any ideas?
Here is the data:
TabPercentCompliant <- structure(list(Provider_ShortName = c("ProviderA", "ProviderA", "ProviderA", "ProviderB",
"ProviderB", "ProviderB", "ProviderC", "ProviderC", "ProviderC", "ProviderD"), SubMeasureID = c("AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2"), AdaptedCompliant = c(139, 2, 117, 85, 1, 33, 36, 2, 22, 43), TotalEligible = c(238, 27, 155, 148, 10, 34, 61, 3, 24, 76), PercentCompliant = c(0.584033613445378, 0.0740740740740741, 0.754838709677419, 0.574324324324324, 0.1, 0.970588235294118, 0.590163934426229, 0.666666666666667, 0.916666666666667, 0.565789473684211 ), PercentTotalEligible = c(0.00516358587173479, 0.00058578495183546, 0.00336283953831467, 0.00321096936561659, 0.000216957389568689, 0.000737655124533542, 0.001323440076369, 6.50872168706066e-05, 0.000520697734964853, 0.00164887616072203), ClaimsAdjudicatedThrough = structure(c(19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024 ), class = "Date"), AdaptedNCQAMean = c(0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57), PerformanceLevel = c(0.0140336134453782, -0.315925925925926, -0.175161290322581, 0.00432432432432439, -0.29, 0.0405882352941176, 0.0201639344262295, 0.276666666666667, -0.0133333333333334, -0.00421052631578944)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
VBP_Report_Date = "2022-09-01"
And the code for the plot:
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(x = Provider_ShortName,
y = AdaptedNCQAMean,
group = SubMeasureID,
color = "#9a0138",
size = .001)
) +
geom_point(aes(color = "#000775",
size = (PercentTotalEligible)
)
) +
geom_segment(aes(x = Provider_ShortName,
xend = Provider_ShortName,
y = 0,
yend = PercentCompliant,
color = "#000775")
)+
facet_grid(cols = vars(SubMeasureID),
scales = "fixed",
space = "fixed")+
theme_classic()+
theme(legend.position = "none") +
theme(panel.spacing = unit(.5, "lines"),
panel.border = element_rect(
color = "black",
fill = NA,
linewidth = .5),
panel.grid.major.y = element_line(
color = "gray",
linewidth = .5),
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
color = NULL,
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")
If you have an aesthetic constant, it is often easier / better to have it "outside" your aes call. If you want to have a legend for your color, then you need to keep it "inside", but you will need to manually set the colors with + scale_color/fill_manual.
I've had to cut down quite a lot in your code to make it work. I've also removed bits that are extraneous to the problem. I've removed line size = 0.001 or the line wasn't visible. I've removed the weird filter step or the plot wasn't possible.
Tips: when defining a global aesthetic with ggplot(aes(x = ... etc), you don't need to specify this aesthetic in each geom layer (those aesthetics will be inherited)- makes a more concise / readable code.
library(ggplot2)
ggplot(TabPercentCompliant, aes(x = Provider_ShortName, y = PercentCompliant)) +
geom_line(aes(y = AdaptedNCQAMean, group = SubMeasureID),
color = "#9a0138") +
geom_point(aes(size = PercentTotalEligible), color = "#000775") +
geom_segment(aes(xend = Provider_ShortName, y = 0, yend = PercentCompliant),
color = "#000775") +
facet_grid(~SubMeasureID) +
theme(strip.background = element_rect(color = NULL, fill = "#e1e7fa"))
Here is the final code. Thanks again tjebo!
# Lollipop Chart ----------------------------------------------------------
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(y = AdaptedNCQAMean,
group = SubMeasureID),
color = "#9a0138"
) +
geom_point(aes(size = PercentTotalEligible),
color = "#000775",
) +
geom_segment(aes(xend = Provider_ShortName,
y = 0,
yend = PercentCompliant),
color = "#000775"
)+
facet_grid(cols = vars(SubMeasureID)
)+
theme_bw()+
theme(legend.position = "none",
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")

How do I separate the geom_label_repel in the geom_bar?

I want to pull away these overlapping labels in ggplot.
I am seeking for help to pull away each label to avoid overlapping. I've tried modify box.padding, max.overlaps, min.segment.length but still faild.
data = read.csv("995_matched_cancer_types.csv", header=T)
names(data) <- c("cancer_types","primary_disease","cell_lines")
data <- subset(data, data$primary_disease!="Unknown")
data["counts"] <- data$cell_lines/sum(data$cell_lines)
data["info"] <- paste0(data$cancer_types,"(",data$cell_lines,")")
ggplot(data, aes(x=1, y=counts, label=info, fill=cancer_types)) +
geom_bar(stat="identity", position = position_dodge2(), color="black") +
geom_text_repel(position=position_dodge2(.9),
box.padding = .5,
max.overlaps = 30,
min.segment.length = 0,
ylim = c(.03,Inf)) +
labs(x = NULL, y = NULL, fill = NULL)+
theme_classic() + theme(axis.line.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 12),
legend.position = "none"
)+
facet_wrap(~primary_disease)
If someone would help me with this problem, I would really appreciate it.
I have changed ylim = c(.03,Inf) to ylim = c(NA,Inf). This removes most of the overlaps. You can play around with the xlim and ylim values (in my opinion). You can try different height and width parameters of the ggsave function as well.
ggplot(data, aes(x=1, y=counts, label=info, fill=cancer_types)) +
geom_bar(stat="identity", position = position_dodge2(), color="black") +
geom_text_repel(position=position_dodge2(.9),
box.padding = 0.5,
max.overlaps = 30,
min.segment.length = 0,
ylim = c(NA,Inf)) +
labs(x = NULL, y = NULL, fill = NULL)+
theme_classic() + theme(axis.line.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 12),
legend.position = "none"
)+
facet_wrap(~primary_disease)
# save plot as png
ggsave("fig.png",
width = 20,
height = 10,
units = "in"
)

How to change a map tilt

I'm struggling with the map tilt. I would like help to change the tilt of the following map. Thanks!
The first map is my result, the second map is how I would like the slope to be.
library(usmap)
library (ggplot2)
read.table("NY_data.txt", header = T)->NY_data
NY1 <- plot_usmap(regions = "county", include = c("NY"), data = NY_data, values = "YEAR_2010") +
labs(title = "New York by county", subtitle = "2010") +
theme(plot.title = element_text(face="bold", size=18, hjust = 0.5),
plot.subtitle = element_text(face="bold", size=16)) +
scale_fill_continuous(low = "white", high = "#CB454A", limits=c(0, 35),
name = "Cumulative cases",
guide = guide_colourbar(barwidth = 27, barheight = 0.5,
title.position = "top"),
label = scales::comma) +
theme(legend.position = "bottom",
legend.title=element_text(size=12, face = "bold"),
legend.text=element_text(size=10))
NY1
map1
map2

What is Julia's equivalent ggplot code of R's?

I would like to plot a sophisticated graph in Julia. The code below is in Julia's version using ggplot.
using CairoMakie, DataFrames, Effects, GLM, StatsModels, StableRNGs, RCall
#rlibrary ggplot2
rng = StableRNG(42)
growthdata = DataFrame(; age=[13:20; 13:20],
sex=repeat(["male", "female"], inner=8),
weight=[range(100, 155; length=8); range(100, 125; length=8)] .+ randn(rng, 16))
mod_uncentered = lm(#formula(weight ~ 1 + sex * age), growthdata)
refgrid = copy(growthdata)
filter!(refgrid) do row
return mod(row.age, 2) == (row.sex == "male")
end
effects!(refgrid, mod_uncentered)
refgrid[!, :lower] = #. refgrid.weight - 1.96 * refgrid.err
refgrid[!, :upper] = #. refgrid.weight + 1.96 * refgrid.err
df= refgrid
ggplot(df, aes(x=:age, y=:weight, group = :sex, shape= :sex, linetype=:sex)) +
geom_point(position=position_dodge(width=0.15)) +
geom_ribbon(aes(ymin=:lower, ymax=:upper), fill="gray", alpha=0.5)+
geom_line(position=position_dodge(width=0.15)) +
ylab("Weight")+ xlab("Age")+
theme_classic()
However, I would like to modify this graph a bit more. For example, I would like to change the scale of the y axis, the colors of the ribbon, add some error bars, and also change the text size of the legend and so on. Since I am new to Julia, I am not succeding in finding the equivalent language code for these modifications. Could someone help me translate this R code below of ggplot into Julia's language?
t1= filter(df, sex=="male") %>% slice_max(df$weight)
ggplot(df, aes(age, weight, group = sex, shape= sex, linetype=sex,fill=sex, colour=sex)) +
geom_line(position=position_dodge(width=0.15)) +
geom_point(position=position_dodge(width=0.15)) +
geom_errorbar(aes(ymin = lower, ymax = upper),width = 0.1,
linetype = "solid",position=position_dodge(width=0.15))+
geom_ribbon(aes(ymin = lower, ymax = upper, fill = sex, colour = sex), alpha = 0.2) +
geom_text(data = t1, aes(age, weight, label = round(weight, 1)), hjust = -0.25, size=7,show_guide = FALSE) +
scale_y_continuous(limits = c(70, 150), breaks = seq(80, 140, by = 20))+
theme_classic()+
scale_colour_manual(values = c("orange", "blue")) +
guides(color = guide_legend(override.aes = list(linetype = c('dotted', 'dashed'))),
linetype = "none")+
xlab("Age")+ ylab("Average marginal effects") + ggtitle("Title") +
theme(
axis.title.y = element_text(color="Black", size=28, face="bold", hjust = 0.9),
axis.text.y = element_text(face="bold", color="black", size=16),
plot.title = element_text(hjust = 0.5, color="Black", size=28, face="bold"),
legend.title = element_text(color = "Black", size = 13),
legend.text = element_text(color = "Black", size = 16),
legend.position="bottom",
axis.text.x = element_text(face="bold", color="black", size=11),
strip.text = element_text(face= "bold", size=15)
)
As I commented before, you can use R-strings to run R code. To be clear, this isn't like your post's approach where you piece together many Julia objects that wrap many R objects, this is RCall converting a Julia Dataframe to an R dataframe then running your R code.
Running an R script may not seem very Julian, but code reuse is very Julian. Besides, you're still using an R library and active R session either way, and there might even be a slight performance benefit from reducing how often you make wrapper objects and switch between Julia and R.
## import libraries for Julia and R; still good to do at top
using CairoMakie, DataFrames, Effects, GLM, StatsModels, StableRNGs, RCall
R"""
library(ggplot2)
library(dplyr)
"""
## your Julia code without the #rlibrary or ggplot lines
rng = StableRNG(42)
growthdata = DataFrame(; age=[13:20; 13:20],
sex=repeat(["male", "female"], inner=8),
weight=[range(100, 155; length=8); range(100, 125; length=8)] .+ randn(rng, 16))
mod_uncentered = lm(#formula(weight ~ 1 + sex * age), growthdata)
refgrid = copy(growthdata)
filter!(refgrid) do row
return mod(row.age, 2) == (row.sex == "male")
end
effects!(refgrid, mod_uncentered)
refgrid[!, :lower] = #. refgrid.weight - 1.96 * refgrid.err
refgrid[!, :upper] = #. refgrid.weight + 1.96 * refgrid.err
df= refgrid
## convert Julia's df and run your R code in R-string
## - note that $df is interpolation of Julia's df into R-string,
## not R's $ operator like in rdf$weight
## - call the R dataframe rdf because df is already an R function
R"""
rdf <- $df
t1= filter(rdf, sex=="male") %>% slice_max(rdf$weight)
ggplot(rdf, aes(age, weight, group = sex, shape= sex, linetype=sex,fill=sex, colour=sex)) +
geom_line(position=position_dodge(width=0.15)) +
geom_point(position=position_dodge(width=0.15)) +
geom_errorbar(aes(ymin = lower, ymax = upper),width = 0.1,
linetype = "solid",position=position_dodge(width=0.15))+
geom_ribbon(aes(ymin = lower, ymax = upper, fill = sex, colour = sex), alpha = 0.2) +
geom_text(data = t1, aes(age, weight, label = round(weight, 1)), hjust = -0.25, size=7,show_guide = FALSE) +
scale_y_continuous(limits = c(70, 150), breaks = seq(80, 140, by = 20))+
theme_classic()+
scale_colour_manual(values = c("orange", "blue")) +
guides(color = guide_legend(override.aes = list(linetype = c('dotted', 'dashed'))),
linetype = "none")+
xlab("Age")+ ylab("Average marginal effects") + ggtitle("Title") +
theme(
axis.title.y = element_text(color="Black", size=28, face="bold", hjust = 0.9),
axis.text.y = element_text(face="bold", color="black", size=16),
plot.title = element_text(hjust = 0.5, color="Black", size=28, face="bold"),
legend.title = element_text(color = "Black", size = 13),
legend.text = element_text(color = "Black", size = 16),
legend.position="bottom",
axis.text.x = element_text(face="bold", color="black", size=11),
strip.text = element_text(face= "bold", size=15)
)
"""
The result is the same as your post's R code:
I used Vega-Lite (https://github.com/queryverse/VegaLite.jl) which is also grounded in the "Grammar of Graphics", and LinearRegression (https://github.com/ericqu/LinearRegression.jl) which provides similar features as GLM, although I think it is possible to get comparable results with the other plotting and linear regression packages. Nevertheless, I hope that this gives you a starting point.
using LinearRegression: Distributions, DataFrames, CategoricalArrays
using DataFrames, StatsModels, LinearRegression
using VegaLite
growthdata = DataFrame(; age=[13:20; 13:20],
sex=categorical(repeat(["male", "female"], inner=8), compress=true),
weight=[range(100, 155; length=8); range(100, 125; length=8)] .+ randn(16))
lm = regress(#formula(weight ~ 1 + sex * age), growthdata)
results = predict_in_sample(lm, growthdata, req_stats="all")
fp = select(results, [:age, :weight, :sex, :uclp, :lclp, :predicted]) |> #vlplot() +
#vlplot(
mark = :errorband, color = :sex,
y = { field = :uclp, type = :quantitative, title="Average marginal effects"},
y2 = { field = :lclp, type = :quantitative },
x = {:age, type = :quantitative} ) +
#vlplot(
mark = :line, color = :sex,
x = {:age, type = :quantitative},
y = {:predicted, type = :quantitative}) +
#vlplot(
:point, color=:sex ,
x = {:age, type = :quantitative, axis = {grid = false}, scale = {zero = false}},
y = {:weight, type = :quantitative, axis = {grid = false}, scale = {zero = false}},
title = "Title", width = 400 , height = 400
)
which gives:
You can change the style of the elements by changing the "config" as indicated here (https://www.queryverse.org/VegaLite.jl/stable/gettingstarted/tutorial/#Config-1).
As the Julia Vega-Lite is a wrapper to Vega-Lite additional documentation can be found on the Vega-lite website (https://vega.github.io/vega-lite/)

How to combine line and points to a same legend?

Here is my data frame
Days,Observed,Simulated
0,0,653.8209779
1,982,1300.359539
2,2002,2245.28519
3,3086,3465.029007
4,4290,4891.001133
5,6030,6431.473538
6,7658,7994.170186
7,9063,9501.969562
8,10405,10899.95599
9,11625,12155.67626
10,12759,13255.52404
11,13949,14199.72821
12,14961,14997.49918
13,16151,15663.0628
14,16931,16212.76706
15,17554,16663.16302
16,17874,17029.85323
17,18114,17326.89033
18,18231,17566.54139
19,18231,17759.27762
20,18231,17913.89111
The cod I used is as follows:
R <- ggplot(Data, aes(x = Days)) +
geom_line(aes(y = Simulated, color="Simulated")) +
geom_point(aes(y = Observed, color="Observed"))
a <- ggtitle("C14=2kg of Placenta & 0.8kg of seed")
n <- scale_color_calc(name = "Legend")
c <- labs(x = 'Time(Days)', y = "Cumulative Biogas Yield(ml)")
h <- theme(plot.title = element_text(hjust = 0.1))
o <- theme(
plot.title = element_text(colour = "black"),
axis.title.x = element_text(colour = "black", size = 10),
axis.title.y = element_text(colour = "black", size = 10),
legend.title = element_text(colour = "black", size = 12.5),
legend.text = element_text(colour = "black", size = 10),
axis.text.x = element_text(colour = "black", size = 10),
axis.text.y = element_text(colour = "black", size = 10))
MyPlot <- R+a+n+c+h+o
MyPlot
This is what I got.
The legend shows combination of line and points
The legend shows combination of line and points
This is what I want.
Only Points in the legend of point
Only Points in the legend of point
Is this what you are looking for?
The issue relates to using the same aesthetic - colour - for different geoms.
You are not actually plotting red points so I am not sure why you want red dots in the legend?
library(ggplot2)
ggplot(df, aes(x = Days)) +
geom_line(aes(y = Simulated, color="Simulated")) +
geom_point(aes(y = Observed, fill = "Observed")) +
ggtitle("C14=2kg of Placenta & 0.8kg of seed")+
labs(x = 'Time(Days)',
y = "Cumulative Biogas Yield(ml)",
colour = NULL,
fill = "Legend title")+
theme(plot.title = element_text(hjust = 0.1))+
theme( plot.title = element_text(colour = "black"),
axis.title.x = element_text(colour = "black", size = 10),
axis.title.y = element_text(colour = "black", size = 10),
legend.title = element_text(colour = "black", size = 12.5),
legend.text = element_text(colour = "black", size = 10),
axis.text.x = element_text(colour = "black", size = 10),
axis.text.y = element_text(colour = "black", size = 10))
data
structure(list(Days = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20), Observed = c(0, 982, 2002,
3086, 4290, 6030, 7658, 9063, 10405, 11625, 12759, 13949, 14961,
16151, 16931, 17554, 17874, 18114, 18231, 18231, 18231), Simulated = c(653.8209779,
1300.359539, 2245.28519, 3465.029007, 4891.001133, 6431.473538,
7994.170186, 9501.969562, 10899.95599, 12155.67626, 13255.52404,
14199.72821, 14997.49918, 15663.0628, 16212.76706, 16663.16302,
17029.85323, 17326.89033, 17566.54139, 17759.27762, 17913.89111
)), class = "data.frame", row.names = c(NA, -21L))
Created on 2020-05-22 by the reprex package (v0.3.0)