How to color jitter by category in ggboxplot - ggplot2

I have the following data frame:
library(tidyverse)
dat <- structure(list(feat = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", "KyteDoo", "FoldIndex"
), class = "factor"), dose = c("0.3mM", "1mM", "3mM", "10mM",
"20mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0.3mM", "1mM",
"3mM", "10mM", "20mM"), vimp = c(-0.0025, 0, 0.0328571428571429,
0.025, 0.0425, 0.005, 0.015, 0.0228571428571429, 0.0175, 0.02,
0.0125, 0.01, 0.02, 0.0325, 0.015)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -15L))
It looks like this:
> dat
# A tibble: 15 × 3
feat dose vimp
<fct> <chr> <dbl>
1 A 0.3mM -0.0025
2 A 1mM 0
3 A 3mM 0.0329
4 A 10mM 0.025
5 A 20mM 0.0425
6 KyteDoo 0.3mM 0.005
7 KyteDoo 1mM 0.015
8 KyteDoo 3mM 0.0229
9 KyteDoo 10mM 0.0175
10 KyteDoo 20mM 0.02
11 FoldIndex 0.3mM 0.0125
12 FoldIndex 1mM 0.01
13 FoldIndex 3mM 0.02
14 FoldIndex 10mM 0.0325
15 FoldIndex 20mM 0.015
And with this code:
library(ggpubr)
p <- ggboxplot(dat, x = "feat", y = "vimp", add = "jitter") +
theme(axis.text.x=element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
xlab("") +
ylab("Variable Importance")
I can get this plot:
What I want to do is to color each dot in the jitter based on dose category.
I tried this but failed
ggboxplot(dat, x = "feat", y = "vimp", add = "jitter", color = "dose")
What's the right way to do it?

One option to achieve your desired result would be to add the jittered points via a geom_jitter layer:
library(ggpubr)
#> Loading required package: ggplot2
library(ggplot2)
ggboxplot(dat, x = "feat", y = "vimp") +
geom_jitter(aes(color = dose)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
xlab("") +
ylab("Variable Importance")

Related

How to plot: Connected BEFORE and AFTER hormone levels with lines? [duplicate]

This question already has answers here:
Lines connecting jittered points - dodging by multiple groups
(2 answers)
Closed 25 days ago.
I have never posted on stack overflow (or any coding website) so I hope I can ask this well...
I am trying to make a plot showing how corticosterone (a hormone) increases in 30 minutes from baseline (base) to stress-induced (SI) levels in birds.
I captured starlings and took a baseline blood sample (Basecort), then waited 30 minutes and took a second blood sample (SIcort).
I would like to make a plot with each individual bird's Basecort and SIcort connected by lines.
(I have been on Google for 2 hours (not an exaggeration) and can't make anything work).
I used the following code to make this plot:
create list of variables
x <- list('Base CORT' = df_adults$Base.cort, 'SI CORT' = df_adults$SI.cort)
x
create plot that contains one strip chart per variable
stripchart(x,
main = 'Individual Changes in CORT',
xlab = 'CORT Sample',
col = c('#9A8822', '#F5CDB4'),
pch = 16,
method = 'jitter',
vertical = TRUE)
SEE PLOT HERE
I can't get any kind of "group" variable to work.
Does anyone have a clue how to connect the dots by BirdID?
This is what my dataframe looks like:
Dataframe
Thank you SO MUCH to anyone who's able to help.
As per comments, this has been asked in other threads - to help you here a suggestion how to do this on your data (as a wiki, I will close this question thereafter).
Please do your readers (and reviewers!) a favour and plot your data as a scatter plot!
library(tidyr)
library(dplyr)
library(ggplot2)
## I've slightly modified the data from above suggested threads.
df <- structure(list(BirdID = c("id_1", "id_2", "id_3", "id_4", "id_5", "id_6", "id_7", "id_8", "id_9", "id_10", "id_11", "id_12", "id_13", "id_14", "id_15", "id_16", "id_17", "id_18", "id_19", "id_20"), Basecort = c(9L, 7L, 2L, 2L, 1L, 5L, 6L, 7L, 5L, 9L, 5L, 2L, 9L, 4L, 6L, 10L, 4L, 10L, 7L, 9L), SIcort = c(4L, 1L, 7L, 3L, 5L, 10L, 10L, 9L, 5L, 9L, 5L, 10L, 1L, 3L, 10L, 6L, 4L, 9L, 6L, 8L)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))
df %>%
pivot_longer(-BirdID, "type", "value") %>%
mutate(value = jitter(value),
x = jitter(as.integer(factor(type)))) %>%
ggplot(aes(x, value, group = BirdID)) +
geom_point() +
geom_line() +
## you will need to change the x axis labels
scale_x_continuous(breaks = 1:2, labels = c("Basecort", "SIcort"))
## MUCH better
ggplot(df) +
geom_point(aes(Basecort, SIcort)) +
## you can add a line of equality to make it even more intuitive
geom_abline(intercept = 0, slope = 1, lty = 2, linewidth = .2) +
coord_equal()

GGplot geom_line works but ggplotly() only connect points of identical continuous values

I can create a coloured graph in ggplot where the geom_line() works as expected. However, when I pipe to ggplotly() , the line starts/ends at seemingly randon data points. How can I make it look like this, but with a tooltip?
Instead of:
libary(ggplot2)
library(plotly)
# df
data <- structure(list( Percent = c(0.32, 0.23, 0.75, 0.25, 0.482, 0.421, 0.5114, 0.3423, 0.27, 0.4324, 0.347, 0.377, 0.26,
0.375, 0.18604, 0.241378, 0.3095, 0.348837209, 0.33333, 0.1875, 0.2820, 0.65, 0.72, 0.75, 0.81, 0.87, 0.8244), finalpoint = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.8244), date = structure(c(18262, 18293, 18322, 18353, 18383, 18414, 18444, 18475, 18506, 18536,
18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779, 18809, 18840, 18871, 18901, 18932, 18962, 18993, 19024, 19052), class = "Date"), Status_perc = structure(c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L), levels = c("<70%", "70-80%", "≥80%"), class = "factor")), row.names = c(NA,
-27L), class = c("tbl_df", "tbl", "data.frame"))
# create and save plot
test <- data %>%
ggplot( aes ( x = date, y = Percent,
label = finalpoint ,
colour= Percent,
group = 1)) + # Note sure why, but I have to add this
geom_line( ) +
geom_point ( ) +
geom_text(aes(label = ifelse(is.na(finalpoint), "", sprintf("%1.1f%%",finalpoint*100))) ,
nudge_y = +0.2, nudge_x = -50 ) + # Add label for final point, formatted as %.
scale_y_continuous(limits = c(0,1)) +
scale_colour_gradient(low = "red", high = "green",
limits = c(0,1))
# Pipe through ggplotly for interactivity
test %>% plotly::ggplotly( ) %>%
# tooltip = c("Percent", "edtriage", "Num_Denom" , ")) %>%
config(displayModeBar = F)

R: How to add fading geom_line() colours centred on the datapoint itself rather than the subsequent linking line?

I have created a ggplot that looks like this, except I want the colours to be 'centered' around the datapoint itself and then have a gradient/fade into the colour assigned to the next datapoint. Currently, it takes the factor assigned to one month and then carries that colour in the connecting line rather than centering. :
This is the fading colours I am aiming for:
library(ggplot)
# Create dataframe
data <- structure(list( Percent = c(0.32, 0.23, 0.75, 0.25, 0.482, 0.421, 0.5114, 0.3423, 0.27, 0.4324, 0.347, 0.377, 0.26,
0.375, 0.18604, 0.241378, 0.3095, 0.348837209, 0.33333, 0.1875, 0.2820, 0.65, 0.72, 0.75, 0.81, 0.87, 0.8244), finalpoint = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.8244), date = structure(c(18262, 18293, 18322, 18353, 18383, 18414, 18444, 18475, 18506, 18536,
18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779, 18809, 18840, 18871, 18901, 18932, 18962, 18993, 19024, 19052), class = "Date"), Status_perc = structure(c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L), levels = c("<70%", "70-80%", "≥80%"), class = "factor")), row.names = c(NA,
-27L), class = c("tbl_df", "tbl", "data.frame"))
# Create custom colours (red, yellow, green) based on indicator status (Status_perc)
cols_status <- c("#A20000","#F6BE00" , "#C4C224")
data %>%
ggplot( aes ( x = date, y = Percent,
label = finalpoint ,
colour= Status_perc,
group =1)) + # Note sure why, but I have to add this
geom_line( ) +
geom_point () +
geom_text(aes(label = ifelse(is.na(finalpoint), "", sprintf("%1.1f%%",finalpoint*100))) ,
nudge_y = +0.2, nudge_x = -50 ) + # Add label for final point, formatted as %.
geom_hline( yintercept = 0.8, colour = "darkgrey" , linetype = 2, size = 0.4, alpha = 0.8) + # 80% Goal
scale_colour_manual( values = cols_status )

Rearrange stacked barplot legend labels without changing plot (and fix tick marks) in R

Is there a way to change the order of factor levels in a stacked barplot legend without changing the order of the plot too (and without mislabeling the data)? I'd like to change the order to "Presence" first, then "Absence".
I'm also having trouble with the tick marks being slightly shifted to one side.
dput(prop)
structure(list(WYR = c(2005L, 2005L, 2006L, 2006L, 2007L, 2007L,
2008L, 2008L, 2009L, 2009L, 2010L, 2010L, 2011L, 2011L, 2012L,
2012L, 2013L, 2013L, 2014L, 2014L, 2015L, 2015L, 2016L, 2016L,
2017L, 2017L, 2018L, 2018L, 2019L, 2019L, 2020L, 2020L), CYR = c(2005L,
2005L, 2006L, 2006L, 2007L, 2007L, 2008L, 2008L, 2009L, 2009L,
2010L, 2010L, 2011L, 2011L, 2012L, 2012L, 2013L, 2013L, 2014L,
2014L, 2015L, 2015L, 2016L, 2016L, 2017L, 2017L, 2018L, 2018L,
2019L, 2019L, 2020L, 2020L), class = structure(c(1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("prop_zero",
"prop_nonzero"), class = "factor"), proportions = c(0.170212765957447,
0.829787234042553, 0.170212765957447, 0.829787234042553, 0.361702127659574,
0.638297872340426, 0.234042553191489, 0.765957446808511, 0.234042553191489,
0.765957446808511, 0.434782608695652, 0.565217391304348, 0.58695652173913,
0.41304347826087, 0.574468085106383, 0.425531914893617, 0.51063829787234,
0.48936170212766, 0.595744680851064, 0.404255319148936, 0.608695652173913,
0.391304347826087, 0.51063829787234, 0.48936170212766, 0.404255319148936,
0.595744680851064, 0.319148936170213, 0.680851063829787, 0.468085106382979,
0.531914893617021, 0.608695652173913, 0.391304347826087)), row.names = c(NA,
-32L), class = c("tbl_df", "tbl", "data.frame"))
ggplot(prop, aes(x = CYR, y = proportions, fill = class)) +
geom_bar(position = "fill", stat = "identity") +
scale_fill_manual(values = c("grey70", "grey20"), labels = c("Absence", "Presence")) +
scale_y_continuous(limits = c(0, 1.0), expand = expansion(mult = c(0, 0.05))) +
scale_x_continuous(breaks = years, labels = ~ rep("", length(.x))) +
# CYR labels
annotate(
geom = "text",
x = prop$CYR,
y = -Inf,
label = prop$CYR,
size = 6.5 / .pt,
vjust = 2.5
) +
# WYR labels
annotate(
geom = "text",
x = prop$CYR,
y = -Inf,
label = prop$WYR,
size = 6.5 / .pt,
vjust = 4,
color = "grey"
) +
# CYR title
annotate(
geom = "text",
x = -Inf,
y = -Inf,
label = c("CYR"),
vjust = 2.5, hjust = 1,
size = 6.5 / .pt
) +
# WYR title
annotate(
geom = "text",
x = -Inf,
y = -Inf,
label = c("WYR"),
vjust = 4, hjust = 1,
size = 6.5 / .pt,
color = "grey") +
coord_cartesian(clip = "off") +
theme(
axis.text.x.bottom = element_text(margin = margin(t = 8.8, b = 8.8)),
axis.title.x = element_blank(),
axis.text.y = element_text(size = 10),
axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size = 14),
axis.ticks = element_line(colour = "black", size = 1),
legend.title=element_blank(),
panel.border = element_rect(fill = NA, color = "black", size = 1),
plot.title = element_text(hjust = 0.5)) +
labs(y = "% presence/absence") +
ggtitle("DRY SEASONS")
Found the answer here!: Flip ordering of legend without altering ordering in plot
Just add this code to the end of the ggplot: + guides(fill = guide_legend(reverse = TRUE))

Why "group=1" is not linking discrete variables together using geom_line?

To link discrete variables together, we use group=1 as recommended here ggplot: line plot for discrete x-axis and here Using `geom_line()` with X axis being factors. However, this is not working in my case. Can someone help with that please ?
My starting code is the following:
ggplot(df, aes(x = cohort, y = est,color=name)) +
geom_point(position=position_dodge(width=0.3)) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0,position=position_dodge(width=0.3))
I get the following plot:
Now the aim is to link the red dots using a line, and the same applies for the blue dots.
If I use the following code:
ggplot(df, aes(x = cohort, y = est,color=name,group=1)) +
geom_point(position=position_dodge(width=0.3)) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0,position=position_dodge(width=0.3)) +
geom_line(position=position_dodge(width=0.3))
I get this:
Here is my data:
df=structure(list(est = c(-0.0584741125344448, 0.021444947354496,
0.0283447339408831, 0.040105980055471, 0.0374816335738256, -0.00876274041718853
), se = c(0.0139423923119975, 0.0116604369679277, 0.012226641006313,
0.0224145123205577, 0.0207370700447159, 0.0214173453687987),
cohort = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("S",
"B", "G"), class = "factor"), upper = c(-0.0332526819060428,
0.0416835581439528, 0.0495098759734735, 0.0840384242037641,
0.0781262908614688, 0.0332152565056569), lower = c(-0.0836955431628468,
0.00120633656503907, 0.00717959190829267, -0.0038264640928221,
-0.0031630237138175, -0.0507407373400339), name = structure(c(2L,
2L, 2L, 1L, 1L, 1L), .Label = c("Low-skilled working class",
"Upper-middle class"), class = "factor")), row.names = c("Silent generation...1",
"Baby boomers...2", "Generation X...3", "Silent generation...4",
"Baby boomers...5", "Generation X...6"), class = "data.frame")
You could set group in your line to the variable name. I set an alpha for the error bars to make the plot more clear. You can use this code:
ggplot(df, aes(x = cohort, y = est,color=name)) +
geom_point(position=position_dodge(width=0.0)) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0,position=position_dodge(width=0.0), alpha =0.5) +
geom_line(aes(x = cohort, y = est, group = name))
Output: