How do I separate the geom_label_repel in the geom_bar? - ggplot2

I want to pull away these overlapping labels in ggplot.
I am seeking for help to pull away each label to avoid overlapping. I've tried modify box.padding, max.overlaps, min.segment.length but still faild.
data = read.csv("995_matched_cancer_types.csv", header=T)
names(data) <- c("cancer_types","primary_disease","cell_lines")
data <- subset(data, data$primary_disease!="Unknown")
data["counts"] <- data$cell_lines/sum(data$cell_lines)
data["info"] <- paste0(data$cancer_types,"(",data$cell_lines,")")
ggplot(data, aes(x=1, y=counts, label=info, fill=cancer_types)) +
geom_bar(stat="identity", position = position_dodge2(), color="black") +
geom_text_repel(position=position_dodge2(.9),
box.padding = .5,
max.overlaps = 30,
min.segment.length = 0,
ylim = c(.03,Inf)) +
labs(x = NULL, y = NULL, fill = NULL)+
theme_classic() + theme(axis.line.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 12),
legend.position = "none"
)+
facet_wrap(~primary_disease)
If someone would help me with this problem, I would really appreciate it.

I have changed ylim = c(.03,Inf) to ylim = c(NA,Inf). This removes most of the overlaps. You can play around with the xlim and ylim values (in my opinion). You can try different height and width parameters of the ggsave function as well.
ggplot(data, aes(x=1, y=counts, label=info, fill=cancer_types)) +
geom_bar(stat="identity", position = position_dodge2(), color="black") +
geom_text_repel(position=position_dodge2(.9),
box.padding = 0.5,
max.overlaps = 30,
min.segment.length = 0,
ylim = c(NA,Inf)) +
labs(x = NULL, y = NULL, fill = NULL)+
theme_classic() + theme(axis.line.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 12),
legend.position = "none"
)+
facet_wrap(~primary_disease)
# save plot as png
ggsave("fig.png",
width = 20,
height = 10,
units = "in"
)

Related

Colors don't stick when lollipop plot is run

I have created a lollipop chart that I love. However, when the code runs to create the plot, the colors of the lines, segments, and points all change from what they were set to. Everything else runs great, so this isn't the end of the world, but I am trying to stick with a color palette throughout a report.
The colors should be this ("#9a0138", and "#000775" specifically):
But come out like this:
Any ideas?
Here is the data:
TabPercentCompliant <- structure(list(Provider_ShortName = c("ProviderA", "ProviderA", "ProviderA", "ProviderB",
"ProviderB", "ProviderB", "ProviderC", "ProviderC", "ProviderC", "ProviderD"), SubMeasureID = c("AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2", "FUH7", "HDO", "AMM2"), AdaptedCompliant = c(139, 2, 117, 85, 1, 33, 36, 2, 22, 43), TotalEligible = c(238, 27, 155, 148, 10, 34, 61, 3, 24, 76), PercentCompliant = c(0.584033613445378, 0.0740740740740741, 0.754838709677419, 0.574324324324324, 0.1, 0.970588235294118, 0.590163934426229, 0.666666666666667, 0.916666666666667, 0.565789473684211 ), PercentTotalEligible = c(0.00516358587173479, 0.00058578495183546, 0.00336283953831467, 0.00321096936561659, 0.000216957389568689, 0.000737655124533542, 0.001323440076369, 6.50872168706066e-05, 0.000520697734964853, 0.00164887616072203), ClaimsAdjudicatedThrough = structure(c(19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024, 19024 ), class = "Date"), AdaptedNCQAMean = c(0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57, 0.39, 0.93, 0.57), PerformanceLevel = c(0.0140336134453782, -0.315925925925926, -0.175161290322581, 0.00432432432432439, -0.29, 0.0405882352941176, 0.0201639344262295, 0.276666666666667, -0.0133333333333334, -0.00421052631578944)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
VBP_Report_Date = "2022-09-01"
And the code for the plot:
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(x = Provider_ShortName,
y = AdaptedNCQAMean,
group = SubMeasureID,
color = "#9a0138",
size = .001)
) +
geom_point(aes(color = "#000775",
size = (PercentTotalEligible)
)
) +
geom_segment(aes(x = Provider_ShortName,
xend = Provider_ShortName,
y = 0,
yend = PercentCompliant,
color = "#000775")
)+
facet_grid(cols = vars(SubMeasureID),
scales = "fixed",
space = "fixed")+
theme_classic()+
theme(legend.position = "none") +
theme(panel.spacing = unit(.5, "lines"),
panel.border = element_rect(
color = "black",
fill = NA,
linewidth = .5),
panel.grid.major.y = element_line(
color = "gray",
linewidth = .5),
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
color = NULL,
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")
If you have an aesthetic constant, it is often easier / better to have it "outside" your aes call. If you want to have a legend for your color, then you need to keep it "inside", but you will need to manually set the colors with + scale_color/fill_manual.
I've had to cut down quite a lot in your code to make it work. I've also removed bits that are extraneous to the problem. I've removed line size = 0.001 or the line wasn't visible. I've removed the weird filter step or the plot wasn't possible.
Tips: when defining a global aesthetic with ggplot(aes(x = ... etc), you don't need to specify this aesthetic in each geom layer (those aesthetics will be inherited)- makes a more concise / readable code.
library(ggplot2)
ggplot(TabPercentCompliant, aes(x = Provider_ShortName, y = PercentCompliant)) +
geom_line(aes(y = AdaptedNCQAMean, group = SubMeasureID),
color = "#9a0138") +
geom_point(aes(size = PercentTotalEligible), color = "#000775") +
geom_segment(aes(xend = Provider_ShortName, y = 0, yend = PercentCompliant),
color = "#000775") +
facet_grid(~SubMeasureID) +
theme(strip.background = element_rect(color = NULL, fill = "#e1e7fa"))
Here is the final code. Thanks again tjebo!
# Lollipop Chart ----------------------------------------------------------
Tab_PercentCompliant %>%
filter(ClaimsAdjudicatedThrough == VBP_Report_Date) %>%
ggplot(aes(x = Provider_ShortName,
y = PercentCompliant)
) +
geom_line(aes(y = AdaptedNCQAMean,
group = SubMeasureID),
color = "#9a0138"
) +
geom_point(aes(size = PercentTotalEligible),
color = "#000775",
) +
geom_segment(aes(xend = Provider_ShortName,
y = 0,
yend = PercentCompliant),
color = "#000775"
)+
facet_grid(cols = vars(SubMeasureID)
)+
theme_bw()+
theme(legend.position = "none",
axis.text.x = element_text(
angle = 65,
hjust=1),
axis.title.x = element_blank(),
axis.line = element_blank(),
strip.background = element_rect(
fill = "#e1e7fa"))+
scale_y_continuous(labels = scales::percent)+
labs(title = "Test",
subtitle = "Test",
caption = "Test")

cannot add p-values to my ggplot box plot

I am trying to add p-values to my boxplots but I keep get this error:
Error in is.factor(x) : object 'group' not found
p_vals<- tibble::tribble(
~group1, ~group2, ~p, ~y.position,
"Sync", "Meta", 0.0420994, 35,
"Sync", "Poly", 0.2497937, 30,
"Meta", "Poly", 0.5471125, 30)
and this is my ggplot command
ggplot(CD4, aes(x=group,y=data,group=group, fill=as.factor(group), outlier.colour="red")) + stat_boxplot(geom ='errorbar', width = 0.2) +
geom_boxplot(alpha = 0.75,outlier.shape = NA, show.legend = FALSE) +
geom_point(aes(fill=group),pch=21,size=2,colour="black",position = "jitter", show.legend = FALSE, alpha = 10)+
ggtitle("CD4_naive") +
labs(x = "Group",y="% of CD4+ T cells") +
theme_classic() +
theme(legend.position = "none") +
scale_y_continuous(labels = percent_format(accuracy = 1))+
scale_fill_manual(values = c("#29ada0", "#f7bb25", "#E71D36"))+
theme_prism(base_size = 9) +
add_pvalue(p_vals)
The only problem is with the last line in the command, could you please help me? is it related to the tibble or the names?

how to color different datasets separately when overlapping them using geom_smooth and color settings

i have 2 datasets that span full genomes, separated by chromosomes (scaffolds), for 2 group comparisons and i want to overlap them in a single graph.
the way i was doing was as follow:
ggplot(NULL, aes(color = as_factor(scaffold))) +
geom_smooth(data = windowStats_SBvsOC, aes(x = mid2, y = Fst_group1_group5), se=F) +
geom_smooth(data = windowStats_SCLvsSCU, aes(x = mid2, y = Fst_group3_group4), se=F) +
scale_y_continuous(expand = c(0,0), limits = c(0, 1)) +
scale_x_continuous(labels = chrom$chrID, breaks = axis_set$center) +
scale_color_manual(values = rep(c("#276FBF", "#183059"), unique(length(chrom$chrID)))) +
scale_size_continuous(range = c(0.5,3)) +
labs(x = NULL,
y = "Fst (smoothed means)") +
theme_minimal() +
theme(
legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.title.y = element_text(),
axis.text.x = element_text(angle = 60, size = 8, vjust = 0.5))
this way, i get each chromosome with alternating colors, and the smoothing is per chromosome. but i wanted the colors to be different between the 2 groups so i can distinguish when they are overlapped like this. is there a way to do it? i can only do it once i remove the color by scaffold, but then the smoothing gets done across the whole genome and i don't want that!
my dataset is big, so i'm attaching it here!
i'm running this in rstudio 2022.02.3, R v.3.6.2 and package ggplot2
EDIT: i've figured out! i just needed to change color = as_factor(scaffold) to group = as_factor(scaffold); and then add the aes(color) to each geom_smooth() function.

How do I set legend element titles?

I am using code from this biostars post to get myself more acquainted with creating plots in ggplot. I am a bit stuck on setting the legend variables though
Is there a way to set the colour and control the number of breaks/dots in the legend (under numDEInCat)
term <-c("snoRNA binding", "preprophase band", "kinesin complex", "microtubule motor activity", "DNA replication")
fc <-runif(5, 1.00, 5.00)
padj_BH <-runif(5, 0.001, 0.05)
numDEInCat <-runif(5, 30, 300)
ggdata <- data.frame(term,fc,padj_BH, numDEInCat)
gg1 <- ggplot(ggdata,
aes(x = term, y = fc, size = numDEInCat, color = padj_BH)) +
expand_limits(y = 1) +
geom_point(shape = 16,inherit.aes = T) +
scale_size(range = c(2.5,12.5)) +
scale_color_gradient(low= "#ff0303",
high="#1e00b3")+ #colour for p value
xlab('') + ylab('Fold Enrichment') + #lavel fold enrichment axis
labs(
title = "Gene Ontology all",
subtitle = 'BH-adjusted',
caption = '',
color="Adjusted P-value", #label the aacolor
size="count") + #label dot size
theme_bw(base_size = 24) +
theme(
legend.position = 'right',
legend.background = element_rect(),
plot.title = element_text(angle = 0, size = 16, face = 'bold', vjust = 1),
plot.subtitle = element_text(angle = 0, size = 14, face = 'bold', vjust = 1),
plot.caption = element_text(angle = 0, size = 12, face = 'bold', vjust = 1),
axis.text.x = element_text(angle = 0, size = 12, face = 'bold', hjust = 1.10),
axis.text.y = element_text(angle = 0, size = 12, face = 'bold', vjust = 0.5),
axis.title = element_text(size = 12, face = 'bold'),
axis.title.x = element_text(size = 12, face = 'bold'),
axis.title.y = element_text(size = 12, face = 'bold'),
axis.line = element_line(colour = 'black'),
#Legend
legend.key = element_blank(), # removes the border
legend.key.size = unit(1, "cm"), # Sets overall area/size of the legend
legend.text = element_text(size = 14, face = "bold"), # Text size
title = element_text(size = 14, face = "bold")) +
coord_flip()
gg1
I think what you're looking for are guides(size = guide_legend(override.aes(BLABLA))) and scale_size(breaks = c(BLABLA))
gg1 +
guides(size = guide_legend(override.aes = list(colour = "red"))) +
scale_size(limits = c(1, 1000), breaks = c(10, 500, 1000))
Created on 2021-11-18 by the reprex package (v2.0.1)

Integrate default color into personalized theme ggplot

I created my own theme and now I also want to standardize the color set that is used. I tried to do this with the list solution, provided in the answer of Viktor in this feed:
Associate a color palette with ggplot2 theme
df <- mtcars
uwvPalet <- c("#0078D2","#003282","#C4D600")
theme_uwv <- function(base_size = 22, base_family = "Verdana"){theme_hc(base_size = base_size, base_family = base_family)%+replace%theme(plot.title = element_text(color = rgb(0, 120, 210)), complete = TRUE)}
theme_uwv2 <- list(theme_uwv, scale_color_manual(values = uwvPalet))
ggplot(df, aes(fill = cyl, x = am, y = mpg)) + geom_bar(position = "dodge", stat="identity") + theme_uwv2()
Unfortunately, I get the error:
Error in theme_uwv2() : could not find function "theme_uwv2"
Anyone know how I can fix this?
The following worked for me. theme_uwv2 needed the value returned from theme_uwv() as a list element, not the function itself. Also, you were making a plot where the fill was the dominant colour variable, so I've substituted scale_color_manual() with scale_fill_manual() for demonstration purposes.
library(ggplot2)
library(ggthemes)
df <- mtcars
uwvPalet <- c("#0078D2","#003282","#C4D600")
theme_uwv <- function(base_size = 22, base_family = "Verdana"){
theme_hc(base_size = base_size, base_family = base_family) %+replace%
theme(plot.title = element_text(color = rgb(0, 120, 210, maxColorValue = 255)),
complete = TRUE)}
theme_uwv2 <- list(theme_uwv(), scale_fill_manual(values = uwvPalet))
ggplot(df, aes(fill = as.factor(cyl), x = am, y = mpg)) +
geom_col(position = "dodge") +
ggtitle("test") +
theme_uwv2