Stacked Bar Chart Labels-- Using geom_text to label % on a value based y-axis - ggplot2

I am looking to create a stacked bar chart where my y-axis measures the value but the table shows the % of total bar.
I think I need to add a pct column to my table then use that but am not sure how to get the pct column either.
Df for example is:
date, type, value, pct
Jan 1, A, 5, 45% (5/11)
Jan 1, B, 6, 55% (6/11)
table and chart image

Maybe something like this?
library(dplyr)
library(ggplot2)
test.df <- data.frame(date = c("2020-01-01", "2020-01-01", "2020-01-02", "2020-01-02"),
type = c("A", "B", "A", "B"),
val = c(5:6, 1, 7))
test.df <- test.df %>%
group_by(date) %>%
mutate(
type.num = as.numeric(type),
prop = val/sum(val),
y_text_pos = ifelse(type=="B", val, sum(val))) %>%
ungroup()
ggplot(data = test.df, aes(x = as.Date(date), y = val, fill = type)) +
geom_col() +
geom_text(aes(y = y_text_pos, label = paste0(round(prop*100,1), "%")), color = "black", vjust = 1.1)
With the output:

Related

ggplot2_ combining line and barplot in one graph

Let's say I'm creating the grouped barplot by something like this:
data <- data.frame(time = factor(1:3), type = LETTERS[1:4], values = runif(24)*10)
ggplot(data, aes(x = type, y = values, fill = time)) +
stat_summary(fun=mean, geom='bar', width=0.55, size = 1, position=position_dodge(0.75))
Inside each type I want to connect all bar tops (meaning to connect 3 bars for A, 3 bars for B, and so on) with the line.
I'd like to get something like that as a result:
Is there a way to do that ?
Thank you!
I changed the code to another logic that I prefer, that is to prepare the data before using ggplot().
Code
library(dplyr)
library(ggplot2)
data <- data.frame(time = factor(1:3), type = LETTERS[1:4], values = runif(24)*10)
pdata <- data %>% group_by(type,time) %>% summarise(values = mean(values,na.rm = TRUE)) %>% ungroup()
pdata %>%
ggplot(aes(x = type, y = values)) +
geom_col(
mapping = aes(fill = time, group = time),
width = 0.55,
size = 1,
position = position_dodge(0.75)
)+
geom_line(
mapping = aes(group = type),
size = 1,
position = position_dodge2(.75)
)
Output

How to add count (n) / summary statistics as a label to ggplot2 boxplots?

I am new to R and trying to add count labels to my boxplots, so the sample size per boxplot shows in the graph.
This is my code:
bp_east_EC <-total %>% filter(year %in% c(1977, 2020, 2021, 1992),
sampletype == "groundwater",
East == 1,
#EB == 1,
#N59 == 1,
variable %in% c("EC_uS")) %>%
ggplot(.,aes(x = as.character(year), y = value, colour = as.factor(year))) +
theme_ipsum() +
ggtitle("Groundwater EC, eastern Curacao") +
theme(plot.title = element_text(hjust = 0.5, size=14)) +
theme(legend.position = "none") +
labs(x="", y="uS/cm") +
geom_jitter(color="grey", size=0.4, alpha=0.9) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", shape=23, size=2) #shows mean
I have googled a lot and tried different things (with annotate, with return functions, mtext, etc), but it keeps giving different errors. I think I am such a beginner I cannot figure out how to integrate such suggestions into my own code.
Does anybody have an idea what the best way would be for me to approach this?
I would create a new variable that contained your sample sizes per group and plot that number with geom_label. I've generated an example of how to add count/sample sizes to a boxplot using the iris dataset since your example isn't fully reproducible.
library(tidyverse)
data(iris)
# boxplot with no label
ggplot(iris, aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_boxplot()
# boxplot with label
iris %>%
group_by(Species) %>%
mutate(count = n()) %>%
mutate(mean = mean(Sepal.Length)) %>%
ggplot(aes(x = Species, y = Sepal.Length, fill = Species)) +
geom_boxplot() +
geom_label(aes(label= count , y = mean + 0.75), # <- change this to move label up and down
size = 4, position = position_dodge(width = 0.75)) +
geom_jitter(alpha = 0.35, aes(color = Species)) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 6)

How to delete NA from the graph

I'm still learning R and I'm not sure why there is NA data in my graph. Considering that I have used the table function to check the variables in the column.
graph
Any suggestions to remove the NA variable in my graph?
Please find below sample of code(not actual dataset):
*Install and load relevant packages
install.packages("tidyverse")
install.packages("lubridate")
install.packages("ggplot2")
install.packages("tibble")
library(tidyverse)
library(lubridate)
library(ggplot2)
library(tibble)
library(dplyr)
*data frame
all_trips <- tribble(~start, ~end, ~start_name, ~type,
"2020-03-22 03:20:20", "2020-03-22 04:10:15", "A", "member",
"2020-03-25 01:01:07", "2020-03-25 05:09:45", NA, "member",
"2020-03-26 07:09:55", "2020-03-26 08:10:20", "B", "casual",
"2020-03-29 09:10:30", "2020-03-29 09:00:20", "A", "casual",
"2020-03-30 11:09:18", "2020-03-30 03:40:10", "B", "member")
*generate new columns
all_trips$date <- as.Date(all_trips$start) #The default format is yyyy-mm-dd
all_trips$month <- format(as.Date(all_trips$date), "%m")
all_trips$day <- format(as.Date(all_trips$date), "%d")
all_trips$year <- format(as.Date(all_trips$date), "%Y")
all_trips$day_of_week <- format(as.Date(all_trips$date), "%A")
all_trips$ride_length <- difftime(all_trips$end,all_trips$start)
is.factor(all_trips$ride_length)
all_trips$ride_length <- as.numeric(as.character(all_trips$ride_length))
is.numeric(all_trips$ride_length)
*data cleaning
all_trips_v2 <- all_trips[!(all_trips$start_name == "NA" |
all_trips$ride_length<0),]
*data viz
all_trips_v2 %>%
mutate(weekday = wday(start, label = TRUE)) %>% #creates weekday field using wday()
group_by(type, weekday) %>% #groups by usertype and weekday
summarise(number_of_rides = n() #calculates the number of rides and average duration
,average_duration = mean(ride_length)) %>% # calculates the average duration
arrange(type, weekday) %>% # sorts
ggplot(aes(x = weekday, y = number_of_rides, fill = type)) +
geom_col(position = "dodge", na.rm = TRUE) +
scale_x_discrete(na.translate = FALSE)
Bar Chart:
Click here
Adding na.rmand na.translate arguments will remove missing values from bar chart without a warning message as shown here:
tibble(x = rep(c('One', 'Two', 'Two', NA),2), Group=rep(c("A","B"),each=4)) %>%
ggplot(aes(x, fill=Group)) +
labs(title="Sample Group Bar Chart with NA's Removed") +
geom_bar(stat="Count", position=position_dodge(), na.rm = TRUE) +
scale_x_discrete(na.translate = FALSE)

Stack bars with percentages and values shown

Here is my dataframe - data_long1
data.frame(
value = c(88, 22, 100, 12, 55, 17, 10, 2, 2),
Subtype = as.factor(c("lung","prostate",
"oesophagus","lung","prostate","oesophagus","lung",
"prostate","oesophagus")),
variable = as.factor(c("alive","alive",
"alive","dead","dead","dead","uncertain","uncertain",
"uncertain"))
)
The following code gives me a nice graph that I want, with all the values displayed, but none in percentages.
ggplot(data_long1, aes(x = Subtype, y = value, fill = variable)) + geom_bar(stat = "identity") +
geom_text(aes(label= value), size = 3, hjust = 0.1, vjust = 2, position = "stack")
What I am looking for is a stacked bar chart with The actual values displayed on the Y Axis not percentages(like previous graph) BUT also a percentage figure displayed on each subsection of the actual Bar Chart. I try this code and get a meaningless graph with every stack being 33.3%.
data_long1 %>% count(Subtype, variable) %>% group_by(Subtype) %>% mutate(pct= prop.table(n) * 100) %>% ggplot() + aes(x = Subtype, y = variable, fill=variable) +
geom_bar(stat="identity") + ylab("Number of Patients") +
geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")), position=position_stack(vjust=0.5)) + ggtitle("My Tumour Sites") + theme_bw()
I cannot seem to find a way to use the mutate function to resolve this problem. Please help.
I would pre-compute the summaries you want. Here is the proportion within each subtype:
data_long2 <- data_long1 %>%
group_by(Subtype) %>%
mutate(proportion = value / sum(value))
ggplot(data_long2, aes(x = Subtype, y = value, fill = variable)) +
geom_bar(stat = "identity") +
geom_text(aes(label= sprintf('%0.0f%%', proportion * 100)), size = 3, hjust = 0.1, vjust = 2, position = "stack")
You can also get the proportion across all groups and types simply by removing the group_by statement:
data_long2 <- data_long1 %>%
mutate(proportion = value / sum(value))
ggplot(data_long2, aes(x = Subtype, y = value, fill = variable)) +
geom_bar(stat = "identity") +
geom_text(aes(label= sprintf('%0.0f%%', proportion * 100)), size = 3, hjust = 0.1, vjust = 2, position = "stack")

ggplot - line ordering, one line on top of the other

Here is my example:
library(ggplot2)
forecast <- c(2,2,1,2,2,3,2,3,3,3,3)
actual <- c(2,2,1,2,2,3,2,3,2,2,1)
my_df <- data.frame(forecast = forecast, actual = actual)
my_df$seq_order <- as.factor(1:NROW(my_df))
my_df <-gather(my_df, "line_type", "value", -seq_order)
ggplot(data=my_df, aes(x=seq_order, y = value,
colour = line_type, group=line_type))+geom_line()+theme(legend.position="bottom")
Here is how it looks:
I would like to have red line to be on top of blue line everywhere where they coincide. I tried scale_color_manual(values = c("forecast" = "red" ,"actual" = "blue")), but it did not work.
Change the factor level order. Don't forget to change the group too.
See this related thread, why I used scales::hue() etc
library(tidyverse)
forecast <- c(2,2,1,2,2,3,2,3,3,3,3)
actual <- c(2,2,1,2,2,3,2,3,2,2,1)
my_df <- data.frame(forecast = forecast, actual = actual, seq_order = 1:11)
my_df <-gather(my_df, line_type, value, -seq_order) %>% mutate(type = factor(line_type, levels = c('forecast','actual')))
ggplot(data=my_df, aes(x=seq_order, y = value,
colour = type, group = type)) +
geom_line()+
theme(legend.position="bottom") +
scale_color_manual(values = rev(scales::hue_pal()(2)))
Created on 2020-03-24 by the reprex package (v0.3.0)