ggplot: How can I put my legend information below the xaxis? - ggplot2

This is my data:
data_g <- data.frame(
study_ID = c("ben-amitay2006","ben-ari2018", "ben-ari2019a","ben-ari2019b","connolly2004","lopez2008","phelan2009","toren2007","ben-amitay2006","ben-ari2019b","connolly2004","ben-amitay2006",
"demaso2014","toren2007","ben-amitay2006","demaso2014","toren2007","demaso2014","ben-ari2018","ben-ari2019a","ben-ari2019b","kubota2011","sarrechia2015","ben-ari2018","ben-ari2019a",
"sarrechia2015"),
symptom = c("PTSD","PTSD","PTSD","PTSD","PTSD","PTSD","PTSD","PTSD",
"Subthreshold PTSD","Subthreshold PTSD","Subthreshold PTSD","Depressive Symptoms",
"Depressive Symptoms","Depressive Symptoms","Anxiety Sympomts","Anxiety Sympomts","Anxiety Sympomts",
"Disruptive Behavior Sympomts","CBCL Clinical Range","CBCL Clinical Range","CBCL Clinical Range",
"CBCL Clinical Range","CBCL Clinical Range","CBCL Borderline Range","CBCL Borderline Range","CBCL Borderline Range"),
prevalence = c(0,10.39, 33.3, 10.2, 12, 0,0,29.03,7.7,26.4, 12,5, 4,18.52,12.5, 5,16.13,
16, 11, 30, 27.3, 45, 8.1,4,26.7, 3.9)
)
data_g$symptom <- factor(data_g$symptom, levels = c('PTSD', 'Subthreshold PTSD', 'Depressive Symptoms', 'Anxiety Sympomts', 'Disruptive Behavior Sympomts', 'CBCL Clinical Range', 'CBCL Borderline Range'))
library(RColorBrewer)
And here is what I have so far:
ggplot(data_g, aes(symptom, prevalence, fill = study_ID, label = study_ID)) +
geom_bar(stat="identity", position = position_dodge(preserve = 'single')) +
scale_fill_brewer(palette = "Paired") +
labs(x = "", fill = "Study ID") +
ylim(0, 100) +
theme(text = element_text(size=15)) +
scale_x_discrete(guide = guide_axis(n.dodge = 2))+
labs(title="Prevalence Rates of Psychological Symptoms after Surgery across Studies", x = "Psychological Symptoms", y = "Prevalence Rates in Percentage") ```
First Version with Legend
How can I either insert my legend information below the xaxis or make more clear which bars belong to which symptom by using a vertical line or anything?
I would be happy with ablines between the groups of symptoms. But if someone also has an idea of how to put the Study IDs below the xaxis vertical written that would be just great!!
Here is what I have tried:
ggplot(data_g, aes(symptom, prevalence, fill = study_ID, label = study_ID)) +
geom_bar(stat="identity", position = position_dodge(preserve = 'single')) +
scale_fill_brewer(palette = "Paired") +
labs(x = "", fill = "Study ID") +
ylim(0, 100) +
theme(text = element_text(size=15)) +
scale_x_discrete(guide = guide_axis(n.dodge = 2))+
geom_text(position = position_dodge(width = 1), aes(x=symptom, y=0), angle = 90, vjust=0, hjust = -0.06, size=2.5) +
theme(legend.position = "none") +
labs(title="Prevalence Rates of Psychological Symptoms after Surgery across Studies", x = "Psychological Symptoms", y = "Prevalence Rates in Percentage")
With that know one really can know which bar is for which Study.
Second Version with Text but on the plot instead of below xaxis

One approach to achieve this is by using facets. Instead of mapping symptom on the x-axis facet by symptom and map the study_ID on the x-axis. Try this:
data_g <- data.frame(
study_ID = c("ben-amitay2006","ben-ari2018", "ben-ari2019a","ben-ari2019b","connolly2004","lopez2008","phelan2009","toren2007","ben-amitay2006","ben-ari2019b","connolly2004","ben-amitay2006",
"demaso2014","toren2007","ben-amitay2006","demaso2014","toren2007","demaso2014","ben-ari2018","ben-ari2019a","ben-ari2019b","kubota2011","sarrechia2015","ben-ari2018","ben-ari2019a",
"sarrechia2015"),
symptom = c("PTSD","PTSD","PTSD","PTSD","PTSD","PTSD","PTSD","PTSD",
"Subthreshold PTSD","Subthreshold PTSD","Subthreshold PTSD","Depressive Symptoms",
"Depressive Symptoms","Depressive Symptoms","Anxiety Sympomts","Anxiety Sympomts","Anxiety Sympomts",
"Disruptive Behavior Sympomts","CBCL Clinical Range","CBCL Clinical Range","CBCL Clinical Range",
"CBCL Clinical Range","CBCL Clinical Range","CBCL Borderline Range","CBCL Borderline Range","CBCL Borderline Range"),
prevalence = c(0,10.39, 33.3, 10.2, 12, 0,0,29.03,7.7,26.4, 12,5, 4,18.52,12.5, 5,16.13,
16, 11, 30, 27.3, 45, 8.1,4,26.7, 3.9)
)
data_g$symptom <- factor(data_g$symptom, levels = c('PTSD', 'Subthreshold PTSD', 'Depressive Symptoms', 'Anxiety Sympomts', 'Disruptive Behavior Sympomts', 'CBCL Clinical Range', 'CBCL Borderline Range'))
library(RColorBrewer)
library(ggplot2)
ggplot(data_g, aes(study_ID, prevalence, fill = study_ID)) +
geom_bar(stat="identity", position = position_dodge(preserve = 'single')) +
scale_fill_brewer(palette = "Paired") +
labs(x = "", fill = "Study ID") +
ylim(0, 100) +
theme(text = element_text(size=15)) +
scale_x_discrete(guide = guide_axis(n.dodge = 1))+
labs(title="Prevalence Rates of Psychological Symptoms after Surgery across Studies", x = "Psychological Symptoms", y = "Prevalence Rates in Percentage") +
facet_grid(.~symptom, scales = "free_x", switch = "x", space = "free_x") +
theme(panel.spacing.x = unit(2, "pt"),
strip.placement = "outside",
axis.text.x = element_text(angle = 90, size = 6),
legend.position = "none",
strip.text.x = element_text(size = 8))

Related

Automatically assigning p-value position in ggplot loop

I am running an mapply loop on a huge set of data to graph 13 parameters for 19 groups. This is working great except the p-value position. Due to the data varying for each plot I cannot assign position using label.y = 125 for example, in some plots it is in the middle of the bar/error bar. However, I can't assign it higher without having it way to high on other graphs. Is there a way to adjust to the data and error bars?
This is my graphing function and like I said the graph is great, except p-value position. Specifically, the stat compare means anova line.
ANOVA_plotter <- function(Variable, treatment, Grouping, df){
Inputdf <- df %>%
filter(Media == treatment, Group == Grouping) %>%
ggplot(aes_(x = ~ID, y = as.name(Variable))) +
geom_bar(aes(fill = ANOVA_Status), stat = "summary", fun = "mean", width = 0.9) +
stat_summary(geom = "errorbar", fun.data = "mean_sdl", fun.args = list(mult = 1), size = 1) +
labs(title = paste(Variable, "in", treatment, "in Group", Grouping, sep = " ")) +
theme(legend.position = "none",axis.title.x=element_blank(), axis.text = element_text(face="bold", size = 18 ), axis.text.x = element_text(angle = 45, hjust = 1)) +
stat_summary(geom = "errorbar", fun.data = "mean_sdl", fun.args = list(mult = 1), width = 0.2) +
stat_compare_means(method = "anova", label.y = 125) +
stat_compare_means(label = "p.signif", method = "t.test", paired = FALSE, ref.group = "Control")
}
I get graphs that look like this
(https://i.stack.imgur.com/hV9Ad.jpg)
But I can't assign it to label.y = 200 because of plots like this
(https://i.stack.imgur.com/uStez.jpg)

Strange Issues with ggplot?

PLEASE HELP, something seems not to be working here,I previously ploted this figure and it was working fine, however, now it seems something is off.
The categories are no longer orderly and some of them appear twice...
...
ggplot(data = data_c, aes(x = reorder(Sektor, -Preis), y = Preis)) +
geom_bar(stat = "identity", width = data_c$Menge/26) +
geom_line(data = data_c, aes(group = 1, x = Sektor,
y = Preis, colour = "40 € pro MWh")) +
geom_line(data = data_d, aes(group = 1, x = sektoren2,
y = preise2, colour = "60 € pro MWh")) +
geom_label(aes(label = preise2a), nudge_y = 30, size = 4) +
geom_label(aes(label = twh2), nudge_y = 6, size = 4) +
geom_text(aes(label = euros), vjust = 1.5, colour = "white", size = 2) +
scale_y_continuous(labels = dollar_format(suffix = " €", prefix = "")) +
labs(title = "Merrit Order Curve Wasserstoff",
subtitle = "Mengen in TWh und Preise in Euro pro MWh",
x = "Sektoren",
y = "Preise in Euro pro MWh") +
theme_bw()
´´´

Start ggplot continuous axis with a squiggly line break? [duplicate]

I have a dataframe (dat) with two columns 1) Month and 2) Value. I would like to highlight that the x-axis is not continuous in my boxplot by interrupting the x-axis with two angled lines on the x-axis that are empty between the angled lines.
Example Data and Boxplot
library(ggplot2)
set.seed(321)
dat <- data.frame(matrix(ncol = 2, nrow = 18))
x <- c("Month", "Value")
colnames(dat) <- x
dat$Month <- rep(c(1,2,3,10,11,12),3)
dat$Value <- rnorm(18,20,2)
ggplot(data = dat, aes(x = factor(Month), y = Value)) +
geom_boxplot() +
labs(x = "Month") +
theme_bw() +
theme(panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14, color = "black"),
axis.text.y = element_text(size = 14, color = "black"))
The ideal figure would look something like below. How can I make this discontinuous axis in ggplot?
You could make use of the extended axis guides in the ggh4x package. Alas, you won't easily be able to create the "separators" without a hack similar to the one suggested by user Zhiqiang Wang
guide_axis_truncated accepts vectors to define lower and upper trunks. This also works for units, by the way, then you have to pass the vector inside the unit function (e.g., trunc_lower = unit(c(0,.45), "npc") !
library(ggplot2)
library(ggh4x)
set.seed(321)
dat <- data.frame(matrix(ncol = 2, nrow = 18))
x <- c("Month", "Value")
colnames(dat) <- x
dat$Month <- rep(c(1,2,3,10,11,12),3)
dat$Value <- rnorm(18,20,2)
# this is to make it slightly more programmatic
x1end <- 3.45
x2start <- 3.55
p <-
ggplot(data = dat, aes(x = factor(Month), y = Value)) +
geom_boxplot() +
labs(x = "Month") +
theme_classic() +
theme(axis.line = element_line(colour = "black"))
p +
guides(x = guide_axis_truncated(
trunc_lower = c(-Inf, x2start),
trunc_upper = c(x1end, Inf)
))
Created on 2021-11-01 by the reprex package (v2.0.1)
The below is taking user Zhiqiang Wang's hack a step further. You will see I am using simple trigonometry to calculate the segment coordinates. in order to make the angle actually look as it is defined in the function, you would need to set coord_equal.
# a simple function to help make the segments
add_separators <- function(x, y = 0, angle = 45, length = .1){
add_y <- length * sin(angle * pi/180)
add_x <- length * cos(angle * pi/180)
## making the list for your segments
myseg <- list(x = x - add_x, xend = x + add_x,
y = rep(y - add_y, length(x)), yend = rep(y + add_y, length(x)))
## this function returns an annotate layer with your segment coordinates
annotate("segment",
x = myseg$x, xend = myseg$xend,
y = myseg$y, yend = myseg$yend)
}
# you will need to set limits for correct positioning of your separators
# I chose 0.05 because this is the expand factor by default
y_sep <- min(dat$Value) -0.05*(min(dat$Value))
p +
guides(x = guide_axis_truncated(
trunc_lower = c(-Inf, x2start),
trunc_upper = c(x1end, Inf)
)) +
add_separators(x = c(x1end, x2start), y = y_sep, angle = 70) +
# you need to set expand to 0
scale_y_continuous(expand = c(0,0)) +
## to make the angle look like specified, you would need to use coord_equal()
coord_cartesian(clip = "off", ylim = c(y_sep, NA))
I think it is possible to get what you want. It may take some work.
Here is your graph:
library(ggplot2)
set.seed(321)
dat <- data.frame(matrix(ncol = 2, nrow = 18))
x <- c("Month", "Value")
colnames(dat) <- x
dat$Month <- rep(c(1,2,3,10,11,12),3)
dat$Value <- rnorm(18,20,2)
p <- ggplot(data = dat, aes(x = factor(Month), y = Value)) +
geom_boxplot() +
labs(x = "Month") +
theme_bw() +
theme(panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14, color = "black"),
axis.text.y = element_text(size = 14, color = "black"))
Here is my effort:
p + annotate("segment", x = c(3.3, 3.5), xend = c(3.6, 3.8), y = c(14, 14), yend = c(15, 15))+
coord_cartesian(clip = "off", ylim = c(15, 25))
Get something like this:
If you want to go further, it may take several tries to get it right:
p + annotate("segment", x = c(3.3, 3.5), xend = c(3.6, 3.8), y = c(14, 14), yend = c(15, 15))+
annotate("segment", x = c(0, 3.65), xend = c(3.45, 7), y = c(14.55, 14.55), yend = c(14.55, 14.55)) +
coord_cartesian(clip = "off", ylim = c(15, 25)) +
theme_classic()+
theme(axis.line.x = element_blank())
Just replace axis with two new lines. This is a rough idea, it may take some time to make it perfect.
You could use facet_wrap. If you assign the first 3 months to one group, and the other months to another, then you can produce two plots that are side by side and use a single y axis.
It's not exactly what you want, but it will show the data effectively, and highlights the fact that the x axis is not continuous.
dat$group[dat$Month %in% c("1", "2", "3")] <- 1
dat$group[dat$Month %in% c("10", "11", "12")] <- 2
ggplot(data = dat, aes(x = factor(Month), y = Value)) +
geom_boxplot() +
labs(x = "Month") +
theme_bw() +
theme(panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14, color = "black"),
axis.text.y = element_text(size = 14, color = "black")) +
facet_wrap(~group, scales = "free_x")
* Differences in the plot are likely due to using different versions of R where the set.seed gives different result

The second description of the x-axis in ggplot2?

I am wondering if there is a way to add the second description of x-axis in ggplot2 as follows: Here "the second description" refers "Sample A / Sample B / two arrows" colored in red (shown in the figure).
Please click for the figure!
Of course, I can just put the "second description" using PowerPoint as I did, but I just wonder if it is possible to add it using ggplot2.
Here is the code for the background plot.
library(ggplot2)
library(ggridges)
x <- data.frame(v1=rnorm(100, mean = -2, sd = 0.022),
v2=rnorm(100, mean = -1, sd = 0.022),
v3=rnorm(100, mean = 0, sd = 0.022),
v4=rnorm(100, mean = 1, sd = 0.022),
v5=rnorm(100, mean = 2, sd = 0.022),
v6=rnorm(100, mean = 3, sd = 0.022),
v7=rnorm(100, mean = 4, sd = 0.022))
colnames(x) <- c("A",
"B",
"C",
"D",
"E",
"F",
"G")
head(x)
# Manipulate the data
library(reshape2)
data <- melt(x)
head(data)
# Generating plot
colors <- rainbow(7)
ggplot(data, aes(x = value, y = variable)) +
geom_density_ridges(aes(fill = variable), alpha=0.6, bandwidth=0.1) +
scale_fill_manual(values = colors)+
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.text = element_text(size = 12),
plot.title = element_text(size = 17, face = "bold",
margin = margin(b=10), hjust = 0.5),
panel.spacing = unit(0.1, "lines"),
legend.position="none") +
geom_vline(xintercept = 0, linetype="dotted") +
geom_vline(xintercept = 2, linetype="dotted",
color = "red", size=1.2) +
xlab("") +
ylab("Groups") +
labs(title = 'Density plot of each group')
Thank you in advance!
I'm not 100% sure this is what you mean, but you can add text on the x-axis using the following in labs:
labs(x="← Sample A Sample B →")
I got the arrows from unicode here: http://xahlee.info/comp/unicode_arrows.html
There are bigger arrows in the link if needed.
EDIT:
Here's your code adapted with the new labels in red font:
ggplot(data, aes(x = value, y = variable)) +
geom_density_ridges(aes(fill = variable), alpha=0.6, bandwidth=0.1) +
scale_fill_manual(values = colors)+
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.text = element_text(size = 12),
plot.title = element_text(size = 17, face = "bold",
margin = margin(b=10), hjust = 0.5),
panel.spacing = unit(0.1, "lines"),
legend.position="none") +
geom_vline(xintercept = 0, linetype="dotted") +
geom_vline(xintercept = 2, linetype="dotted",
color = "red", size=1.2) +
xlab("🢀 Sample A Sample B 🢂") +
theme(axis.title.x = element_text(size=40,colour = "red")) +
ylab("Groups") +
labs(title = 'Density plot of each group')
You can also push the labels further apart by adding extra spaces. Bring them closer together with fewer spaces.

R ggplot2: adding custom text to legend and value counts on sides of the heat map

My input data looks like:
COMPANY DOMAIN REVIEW PROGRESS
Company A Service Good +
Company A Response Good +
Company A Delay Very Good
Company A Cost Poor -
Company B Service Poor -
Company B Delay Average
Company B Cost Good +
Company C Service Very Poor +
Company C Cost Average
I produced a heat map in which I add some text (value of the "PROGRESS" variable - i.e. plus or minus sign).
Here is my code:
require("ggplot2")
graph <- read.table("input.tab", header=T, sep="\t")
ggplot(data=graph, aes(x=COMPANY, y=DOMAIN, group=REVIEW, fill=REVIEW)) +
geom_tile() +
geom_text(aes(x=COMPANY, y=DOMAIN, label=PROGRESS)) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
geom_vline(xintercept=seq(1.5, length(graph$COMPANY)+0.5)) +
geom_hline(yintercept=seq(1.5, length(graph$DOMAIN)+0.5)) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
axis.title=element_blank(),
axis.text.x = element_text(angle=45, size=12, hjust=1)
)
However I am struggling adding (see figure modified manually below):
(1) the following "PROGRESS" legend as part of the color code already listed:
+ Better
- Worse
(2) the count of data available on each row between the right side of the plot and the legend
(3) the count of data available on each column on top of the plot
Any advice?
Here's my proposed solution, I added comments in the code for you to understand what I did. There is probably a better way of generating the grid, though. Hope it helps.
graph <- read_csv(
"COMPANY ,DOMAIN ,REVIEW ,PROGRESS
Company A ,Service ,Good ,+
Company A ,Response ,Good ,+
Company A ,Delay ,Very Good ,
Company A ,Cost ,Poor ,-
Company B ,Service ,Poor ,-
Company B ,Delay ,Average ,
Company B ,Cost ,Good ,+
Company C ,Service ,Very Poor ,+
Company C ,Cost ,Average ,")
ggplot() +
# moved aesthetics and data to each geom,
# if you keep them in the ggplot call,
# you have to specify `inherit.aes = FALSE` in the rest of the geoms
geom_tile(data = graph,
aes(x = COMPANY,
y = DOMAIN,
fill = REVIEW)) +
# changed from `geom_text` to `geom_point` with custom shapes
geom_point(data = graph,
aes(x = COMPANY,
y = DOMAIN,
shape = factor(PROGRESS, labels = c("Worse", "Better"))),
size = 3) +
# custom shape scale
scale_shape_manual(name = "", values = c("-", "+")) +
# calculate marginal totals "on the fly"
# top total
geom_text(data = summarize(group_by(graph, COMPANY),
av_data = length(!is.na(PROGRESS))),
aes(x = COMPANY,
y = length(unique(graph$DOMAIN)) + 0.7,
label = av_data)) +
# right total
geom_text(data = summarize(group_by(graph, DOMAIN),
av_data = length(!is.na(PROGRESS))),
aes(x = length(unique(graph$COMPANY)) + 0.7,
y = DOMAIN, label = av_data)) +
# expand the plotting area to accomodate for the marginal totals
scale_x_discrete(expand = c(0, 0.8)) +
scale_y_discrete(expand = c(0, 0.8)) +
# changed to `geom_segment` to generate the grid, otherwise grid extends
# beyond the heatmap
# horizontal lines
geom_segment(aes(y = rep(0.5, 1 + length(unique(graph$COMPANY))),
yend = rep(length(unique(graph$DOMAIN)) + 0.5,
1 + length(unique(graph$COMPANY))),
x = seq(0.5, 1 + length(unique(graph$COMPANY))),
xend = seq(0.5, 1 + length(unique(graph$COMPANY))))) +
# vertical lines
geom_segment(aes(x = rep(0.5, 1 + length(unique(graph$DOMAIN))),
xend = rep(length(unique(graph$COMPANY)) + 0.5,
1 + length(unique(graph$DOMAIN))),
y = seq(0.5, 1 + length(unique(graph$DOMAIN))),
yend = seq(0.5, 1 + length(unique(graph$DOMAIN))))) +
# custom legend order
guides(fill = guide_legend(order = 1),
shape = guide_legend(order = 2)) +
# theme tweaks
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
axis.title = element_blank(),
axis.text.x = element_text(angle = 45,
size = 12,
hjust = 1,
# move text up 20 pt
margin = margin(-20,0,0,0, "pt")),
# move text right 20 pt
axis.text.y = element_text(margin = margin(0,-20,0,0, "pt"))
)