ggplot, drawing lines across multiple facets (x axis is date) - ggplot2

My question is almost identical to this question EXCEPT that I am using dates for my x axis. I have tried the code from the answer in the linked question. The example provided works for me, but I cannot get it to work for my dataset. I am guessing it is because of the dates?
(Sorry I could not comment on the previous question chain - I'm new and don't have enough points to comment)
Here is the sample code:
library(ggplot2)
library(gtable)
library(grid)
data<-data.frame(Date=rep(seq(as.Date("2018-09-22","%Y-%m-%d"),
as.Date("2019-06-19","%Y-%m-%d"),
by=30),9),
Station=c(rep("A",30),rep("B",30),rep("C",30)),
Description=rep(c(rep("Var1",10),rep("Var2",10),
rep("Var3",10)),3),
Data=c(seq(1,10,by=1),seq(500,800,length.out=10),seq(30,90,length.out=10), seq(5,19,length.out=10),seq(450,1080,length.out=10),seq(20,60,length.out=10), seq(2,15,length.out=10),seq(600,750,length.out=10),seq(80,25,length.out=10)))
plot<-ggplot(data,aes(x=Date,y=Data,color=as.factor(Station)))+
geom_line(size=1)+
facet_grid(Description~.,scales="free_y",switch="y")+
xlab("")+
ylab("")+
theme(panel.background=element_blank(),
panel.grid.major.y=element_line(color="grey80",
size=0.25),
panel.grid.major.x=element_blank(),
axis.line=element_line(color="black"),
strip.placement="outside",
strip.background=element_blank(),
legend.position="top",
legend.key=element_blank(),
legend.title=element_blank())
plot
plot.b<-ggplot_build(plot)
plot.g<-ggplot_gtable(plot.b)
data2npc <- function(x, panel = 1L, axis = "x") {
range <- plot.b$layout$panel_params[[panel]][[paste0(axis,".range")]]
scales::rescale(c(range, x), c(0,1))[-c(1,2)]
}
start <- sapply(as.Date("2018-10-10"),"%Y-%m-%d"), data2npc, panel=1, axis="x")
plot.g <- gtable_add_grob(plot.g, segmentsGrob(x0=start, x1=start, y0=0, y1=1, gp=gpar(lty=2)), t=7, b=9,l=5)
grid.newpage()
grid.draw(plot.g)
resulting plot

I have figured out my own answer!
The key is in changing t, b, and l in gtable_add_grob:
plot.g <- gtable_add_grob(plot.g, segmentsGrob(x0=start, x1=start, y0=0, y1=1, gp=gpar(lty=2)), t=7, b=13,l=7)
Although, it seems to me to be trial and error to identify the correct values of t,b, and l.
New code:
library(ggplot2)
library(gtable)
library(grid)
data<-data.frame(Date=rep(seq(as.Date("2018-09-22","%Y-%m-%d"),
as.Date("2019-06-19","%Y-%m-%d"),
by=30),9),
Station=c(rep("A",30),rep("B",30),rep("C",30)),
Description=rep(c(rep("Var1",10),rep("Var2",10),
rep("Var3",10)),3),
Data=c(seq(1,10,by=1),seq(500,800,length.out=10),seq(30,90,length.out=10), seq(5,19,length.out=10),seq(450,1080,length.out=10),seq(20,60,length.out=10), seq(2,15,length.out=10),seq(600,750,length.out=10),seq(80,25,length.out=10)))
plot<-ggplot(data,aes(x=Date,y=Data,color=as.factor(Station)))+
geom_line(size=1)+
facet_grid(Description~.,scales="free_y",switch="y")+
xlab("")+
ylab("")+
theme(panel.background=element_blank(),
panel.grid.major.y=element_line(color="grey80",
size=0.25),
panel.grid.major.x=element_blank(),
axis.line=element_line(color="black"),
strip.placement="outside",
strip.background=element_blank(),
legend.position="top",
legend.key=element_blank(),
legend.title=element_blank())
plot
plot.b<-ggplot_build(plot)
plot.g<-ggplot_gtable(plot.b)
data2npc <- function(x, panel = 1L, axis = "x") {
range <- plot.b$layout$panel_params[[panel]][[paste0(axis,".range")]]
scales::rescale(c(range, x), c(0,1))[-c(1,2)]
}
start <- sapply(as.Date("2018-10-10","%Y-%m-%d"), data2npc, panel=1, axis="x")
plot.g <- gtable_add_grob(plot.g, segmentsGrob(x0=start, x1=start, y0=0, y1=1, gp=gpar(lty=2)), t=7, b=13,l=7)
grid.newpage()
grid.draw(plot.g)
And new resulting plot

Related

I'm having making an animated line chart, problems with the X axis

I'm trying to animate a plot I have where the X axis is non-numeric. The plot itself looks great, but I get a few error messages trying to animate it using the transition_reveal function.
I've got a data set called df100m that tracks the times/speeds of 10 meter splits of the 100 meter dash for various Olympic runners. It looks like this.
splits
runners
times(s)
speed(mph)
10-20
Bolt_08
1.070
21.93
20-30
Bolt_08
0.910
24.58
84 more rows of different splits and runners omitted for space.
Plotting the average speed for this data set using stat_smooth looks great. I removed the reaction time (RT), the final time (TOTAL), and the starting 10m (Start-10), so that it only shows the numeric splits. Here is the code for the plot I have so far:
df100m %>%
filter(!grepl("RT", splits)) %>%
filter(!grepl("TOTAL", splits)) %>%
filter(!grepl("Start-10", splits)) %>%
ggplot(mapping = aes(x = splits, y = speed, col = runner, group = runner)) +
stat_smooth(method = loess, se = F, fullrange = F) +
theme(axis.text.x = element_text(angle = 90)) +
theme(aspect.ratio = 3/7) +
theme_solarized_2(light=F)
However when I add +transition_reveal(~splits) I get the following error message:
Error in seq.default(range[1], range[2], length.out = nframes) :
'from' must be a finite number
In addition: Warning messages:
1: In min(x) : no non-missing arguments to min; returning Inf
2: In max(x) : no non-missing arguments to max; returning -Inf
Playing around with it, I sometimes also get the "invalid 'times' argument" error.
I know there are a few problems with the X axis (splits), it's a character rather than numeric, but also has a dash (-). I've seen a few posts attempting to fix this error, but I am unable to fix it as I am a beginner. Could someone point me to the right direction?
Using some minimal made-up data, this is one possible approach creating the smoothed lines before the plotting, then basing the transition_reveal on the splits mutated to integers (as splits_int).
library(tidyverse)
library(gganimate)
library(broom)
tribble(~splits, ~speed, ~runner,
"10-20", 20.0, "A",
"20-30", 21.0, "A",
"30-40", 22.0, "A",
"10-20", 19.0, "B",
"20-30", 20.0, "B",
"30-40", 21.0, "B"
) %>%
mutate(splits_int = factor(splits) %>% as.integer()) %>%
nest(data = -runner) %>%
mutate(
lm_model = map(data, ~loess(speed ~ splits_int, data = .x)),
augmented = map(lm_model, augment) %>% map(select, .fitted)
) %>%
unnest(c(augmented, data)) %>%
ggplot(aes(splits, .fitted, col = runner, group = runner)) +
geom_line() +
transition_reveal(splits_int)
Created on 2022-12-10 with reprex v2.0.2

ggplot2: add title changes point colors <-> scale_color_manual removes ggtitle

I am facing a silly point color in a dot plot with ggplot 2. I have a whole table of data of which i take relevant rows to make a dot plot. With scale_color_manual my points get colored according to the named palette and factor genotype specified in aes() and when i simply want to add a title specifying the cell line used, the points get colored back to automatic yellow and purple. Adding the title first and setting scale_color_manual as the last layer changes the points colors and removes the title.
What is wrong in there? I don't get it and it is a bit frustrating
thanks for your help!
Here's reproducible code to get my whole df and the subset for the plots:
# df of data to plot
exp <- c(rep(284, times = 6), rep(285, times = 12))
geno <- c(rep(rep(c("WT", "KO"), each =3), times = 6))
line <- c(rep(5, times = 6),rep(8, times= 12), rep(5, times =12), rep(8, times = 6))
ttt <- c(rep(c(0, 10, 60), times = 10), rep(c("ZAc60", "Cu60", "Cu200"), times = 2))
rep <- c(rep(1, times = 12), rep(2, times = 6), rep(c(1,2), times = 6), rep(1, times = 6))
rel_expr <- c(0.20688185, 0.21576131, 0.94046028, 0.30327675, 0.22865200,
0.92941881, 0.13787508, 0.13325281, 0.22114990, 0.95591724,
1.03239718, 0.83339248, 0.15332420, 0.17558160, 0.22475604,
1.02356351, 0.77882000, 0.69214403, 0.16874097, 0.15548158,
0.45207943, 0.28123760, 0.23500083, 0.51588856, 0.1399634,
0.14610184, 1.06716713, 0.16517801, 0.34736164, 0.64773650,
0.18334429, 0.05924757, 0.01803593, 0.86685230, 0.39554685,
0.25764805)
df_all <- data.frame(exp, geno, line, ttt, rep, rel_expr)
names(df_all) <- c("EXP", "Geno", "Line", "TTT", "Rep", "Rel_Expr")
str(df_all)
# make Geno an ordered factor
df_all$Geno <- ordered(df_all$Geno, levels = c("WT", "KO"))
# select set of whole dataset for current plot
df_ions <- df_all[df_all$Line == 8 & !df_all$TTT %in% c(10, 60),]
# add a treatment as factor columns fTTT
df_ions$fTTT <- ordered(df_ions$TTT, levels = c("0", "ZAc60", "Cu60", "Cu200"))
str(df_ions)
# plot rel_exp vs factor treatment, color points by geno
# with named color palette
library(ggplot2)
col_palette <- c("#000000", "#1356BC")
names(col_palette) <- c("WT", "KO")
plt <- ggplot(df_ions, aes(x = fTTT, y = Rel_Expr, color = Geno)) +
geom_jitter(width = 0.1)
plt # intermediate_plt_1.png
plt + scale_color_manual(values = col_palette) # intermediate_plt_2.png
plt + ggtitle("mRPTEC8") # final_plot.png
images:

Adding percentage labels to a barplot with y-axis 'count' in R

I'd like to add percentage labels per gear to the bars but keep the count y-scale.
E.g. 10% of all 'gear 3' are '4 cyl'
library(ggplot)
ds <- mtcars
ds$gear <- as.factor(ds$gear)
p1 <- ggplot(ds, aes(gear, fill=gear)) +
geom_bar() +
facet_grid(cols = vars(cyl), margins=T)
p1
Ideally only in ggplot, wihtout adding dplyr or tidy. I found some of these solutions but then I get other issues with my original data.
EDIT: Suggestions that this is a duplicate from:
enter link description here
I saw this also earlier, but wasn't able to integrate that code into what I want:
# i just copy paste some of the code bits and try to reconstruct what I had earlier
ggplot(ds, aes(gear, fill=gear)) +
facet_grid(cols = vars(cyl), margins=T) +
# ..prop.. meaning %, but i want to keep the y-axis as count
geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
# not sure why, but I only get 100%
geom_text(aes( label = scales::percent(..prop..),
y= ..prop.. ), stat= "count", vjust = -.5)
The issue is that ggplot doesn't know that each facet is one group. This very useful tutorial helps with a nice solution. Just add aes(group = 1)
P.S. At the beginning, I was often quite reluctant and feared myself to manipulate my data and pre-calculate data frames for plotting. But there is no need to fret! It is actually often much easier (and safer!) to first shape / aggregate your data into the right form and then plot/ analyse the new data.
library(tidyverse)
library(scales)
ds <- mtcars
ds$gear <- as.factor(ds$gear)
First solution:
ggplot(ds, aes(gear, fill = gear)) +
geom_bar() +
facet_grid(cols = vars(cyl), margins = T) +
geom_text(aes(label = scales::percent(..prop..), group = 1), stat= "count")
edit to reply to comment
Showing percentages across facets is quite confusing to the reader of the figure and I would probably recommend against such a visualization. You won't get around data manipulation here. The challenge is here to include your "facet margin". I create two summary data frames and bind them together.
ds_count <-
ds %>%
count(cyl, gear) %>%
group_by(gear) %>%
mutate(perc = n/sum(n)) %>%
ungroup %>%
mutate(cyl = as.character(cyl))
ds_all <-
ds %>%
count(cyl, gear) %>%
group_by(gear) %>%
summarise(n = sum(n)) %>%
mutate(cyl = 'all', perc = 1)
ds_new <- bind_rows(ds_count, ds_all)
ggplot(ds_new, aes(gear, fill = gear)) +
geom_col(aes(gear, n, fill = gear)) +
facet_grid(cols = vars(cyl)) +
geom_text(aes(label = scales::percent(perc)), stat= "count")
IMO, a better way would be to simply swap x and facetting variables. Then you can use ggplots summarising function as above.
ggplot(ds, aes(as.character(cyl), fill = gear)) +
geom_bar() +
facet_grid(cols = vars(gear), margins = T) +
geom_text(aes(label = scales::percent(..prop..), group = 1), stat= "count")
Created on 2020-02-07 by the reprex package (v0.3.0)

Issue when trying to plot geom_tile using ggplotly

I would like to plot a ggplot2 image using ggplotly
What I am trying to do is to initially plot rectangles of grey fill without any aesthetic mapping, and then in a second step to plot tiles and change colors based on aesthetics. My code is working when I use ggplot but crashes when I try to use ggplotly to transform my graph into interactive
Here is a sample code
library(ggplot2)
library(data.table)
library(plotly)
library(dplyr)
x = rep(c("1", "2", "3"), 3)
y = rep(c("K", "B","A"), each=3)
z = sample(c(NA,"A","L"), 9,replace = TRUE)
df <- data.table(x,y,z)
p<-ggplot(df)+
geom_tile(aes(x=x,y=y),width=0.9,height=0.9,fill="grey")
p<-p+geom_tile(data=filter(df,z=="A"),aes(x=x,y=y,fill=z),width=0.9,height=0.9)
p
But when I type this
ggplotly(p)
I get the following error
Error in [.data.frame(g, , c("fill_plotlyDomain", "fill")) :
undefined columns selected
The versions I use are
> packageVersion("plotly")
1 ‘4.7.1
packageVersion("ggplot2")
1 ‘2.2.1.9000’
##########Edited example for Arthur
p<-ggplot(df)+
geom_tile(aes(x=x,y=y,fill="G"),width=0.9,height=0.9)
p<- p+geom_tile(data=filter(df,z=="A"),aes(x=x,y=y,fill=z),width=0.9,height=0.9)
p<-p+ scale_fill_manual(
guide = guide_legend(title = "test",
override.aes = list(
fill =c("red","white") )
),
values = c("red","grey"),
labels=c("A",""))
p
This works
but ggplotly(p) adds the grey bar labeled G in the legend
The output of the ggplotly function is a list with the plotly class. It gets printed as Plotly graph but you can still work with it as a list. Moreover, the documentation indicates that modifying the list makes it possible to clear all or part of the legend. One only has to understand how the data is structured.
p<-ggplot(df)+
geom_tile(aes(x=x,y=y,fill=z),width=0.9,height=0.9)+
scale_fill_manual(values = c(L='grey', A='red'), na.value='grey')
p2 <- ggplotly(p)
str(p2)
The global legend is here in p2$x$layout$showlegend and setting this to false displays no legend at all.
The group-specific legend appears at each of the 9 p2$x$data elements each time in an other showlegend attribute. Only 3 of them are set to TRUE, corresponding to the 3 keys in the legend. The following loop thus clears all the undesired labels:
for(i in seq_along(p2$x$data)){
if(p2$x$data[[i]]$legendgroup!='A'){
p2$x$data[[i]]$showlegend <- FALSE
}
}
Voilà!
This works here:
ggplot(df)+
geom_tile(aes(x=x,y=y,fill=z),width=0.9,height=0.9)+
scale_fill_manual(values = c(L='grey', A='red'), na.value='grey')
ggplotly(p)
I guess your problem comes from the use of 2 different data sources, df and filter(df,z=="A"), with columns with the same name.
[Note this is not an Answer Yet]
(Putting for reference, as it is beyond the limits for comments.)
The problem is rather complicated.
I just finished debugging the code of plotly. It seems like it's occurring here.
I have opened an issue in GitHub
Here is the minimal code for the reproduction of the problem.
library(ggplot2)
set.seed(1503)
df <- data.frame(x = rep(1:3, 3),
y = rep(1:3, 3),
z = sample(c("A","B"), 9,replace = TRUE),
stringsAsFactors = F)
p1 <- ggplot(df)+
geom_tile(aes(x=x,y=y, fill="grey"), color = "black")
p2 <- ggplot(df)+
geom_tile(aes(x=x,y=y),fill="grey", color = "black")
class(plotly::ggplotly(p1))
#> [1] "plotly" "htmlwidget"
class(plotly::ggplotly(p2))
#> Error in `[.data.frame`(g, , c("fill_plotlyDomain", "fill")): undefined columns selected

shiny sliderInput range minimum and maximum values

I need to adjust a histogram output in shiny with a range values:
(where m is an arbitrary matrix)
#ui.R
sliderInput(inputId="adjust", label="Choose adjacency threshold", value=c(0.001, 0.9), min=0.0001, max=1),
plotOutput("hist")
#server.R
df<-reactive({
idx = m > min(input$adjust) & m < max(input$adjust)
data.frame(
id = row(m)[idx],
value = m[idx])
})
output$hist<-renderPlot({hist(df()$values)})
However this doesn't seem to affect the histogram- it re-renders when I toggle the slider but its the same each time.... it takes a very long time and it just seems to take all of the values in into account?
Does anyone know how to make this work?
when I try to print the slider's min/max value- nothing comes to the page:
#ui.R
verbatimTextOutput("x")
#server
output$x<-renderPrint({min(input$adjust)})
Hence I might be approaching this the completely wrong way... does anyone know how to do this?
FULL EXAMPLE
library(shiny)
runApp(list(ui = fluidPage(sliderInput(inputId="adjust", label="Choose adjacency threshold", value=c(0.001, 0.9), min=0.0001, max=1),
plotOutput("hist")
server=function(input, output){
adjacentmat<-reactive({adjacency(dat)})
data<-reactive({
adj_mat<-adjacentmat()
adj_mat[adj_mat < input$adjust] <- 0
m<-adj_mat
idx = m > min(input$adjust) & m < max(input$adjust)
data.frame(
source = row(m)[idx],
target = col(m)[idx],
corr = m[idx])
})
output$hist<-renderPlot({hist(data()$corr)})
}
)
generate the dat variable with the following code:
library('dplyr')
set.seed(1)
# generate a couple clusters
nodes_per_cluster <- 30
n <- 10
nvals <- nodes_per_cluster * n
# cluster 1 (increasing)
cluster1 <- matrix(rep((1:n)/4, nodes_per_cluster) +
rnorm(nvals, sd=1),
nrow=nodes_per_cluster, byrow=TRUE)
# cluster 2 (decreasing)
cluster2 <- matrix(rep((n:1)/4, nodes_per_cluster) +
rnorm(nvals, sd=1),
nrow=nodes_per_cluster, byrow=TRUE)
# noise cluster
noise <- matrix(sample(1:2, nvals, replace=TRUE) +
rnorm(nvals, sd=1.5),
nrow=nodes_per_cluster, byrow=TRUE)
dat <- rbind(cluster1, cluster2, noise)
colnames(dat) <- paste0('n', 1:n)
rownames(dat) <- c(paste0('cluster1_', 1:nodes_per_cluster),
paste0('cluster2_', 1:nodes_per_cluster),
paste0('noise_', 1:nodes_per_cluster))
This works for me:
library(shiny)
runApp(list(ui = fluidPage(
mainPanel(sliderInput("test", "Select values", value= c(.001,.9), min= 0.0001, max= 1)),
verbatimTextOutput("test2")
),
server = function(input, output, session) {
output$test2 <- renderPrint(min(input$test))
}))
I'm guessing your problem is somewhere in the code you haven't shown us. Can you give the code for the entire running example of your problem?