Matlab: subplot with sublegend - legend

I want to add a legend at the bottom of a subplot (2 by 2):
As you can see the charts in the 2nd row are squeezed a little bit because I manually adjusted it.
Is there a function like sublegend(...) or does it involve a lot of coding?
Source
This script produces a similar subplot:
%weights/weightsMV are 3 x 30 matrices
ticker = {'A','B','C'};
weights = [0.764602615068780,0.762329415005434,0.760055503116586,0.757781382654864,0.755508517683302,0.753234375934985,0.750960611173760,0.748686727917457,0.746413866211585,0.744140033854148,0.738367347277555,0.699505907957926,0.660644468638298,0.621783029318668,0.582921589999040,0.544060150679411,0.505198711359782,0.466337272040153,0.427475832720524,0.388614393400895,0.349752954081266,0.310891514761637,0.272030075442009,0.233168636122380,0.194307196802750,0.155445757483122,0.116584318163493,0.0777228788438641,0.0388614395242353,4.10026844681349e-12;0.235397384931220,0.211630038764514,0.187863021792451,0.164096524898550,0.140329000457761,0.116562305396725,0.0927953654491337,0.0690285024746061,0.0452609759124620,0.0214940798746062,0,5.55111512312733e-17,0,5.55111512312746e-17,0,3.70817036264992e-30,4.73852588619852e-30,5.76503212536727e-30,5.38827593281225e-30,2.64267976542398e-18,5.69348240589127e-18,8.74428504635863e-18,1.17950876868260e-17,0,2.75245082116940e-18,1.22032105779319e-17,9.15240793746451e-18,6.10160529699720e-18,6.10160531305980e-18,1.30251232759518e-10;1.10307850379949e-18,0.0260405462300517,0.0520814750909638,0.0781220924465861,0.104162481858937,0.130203318668290,0.156244023377106,0.182284769607937,0.208325157875953,0.234365886271246,0.261632652722445,0.300494092042074,0.339355531361703,0.378216970681332,0.417078410000960,0.455939849320589,0.494801288640218,0.533662727959847,0.572524167279476,0.611385606599105,0.650247045918734,0.689108485238363,0.727969924557991,0.766831363877620,0.805692803197249,0.844554242516878,0.883415681836507,0.922277121156136,0.961138560475765,0.999999999865655]
weightsMV = [0.304769232969962,0.313206616760582,0.299868860275947,0.286531103791370,0.273193347306773,0.259855590822158,0.246517834337581,0.233180077852984,0.219842321368369,0.206504564883792,0.193166808399157,0.179829051914561,0.166491295429983,0.153153538945368,0.139815782460772,0.126478025976194,0.113140269491579,0.0998025130069825,0.0864647565224054,0.0731270000377706,0.0597892435531935,0.0464514870685779,0.0331137305839816,0.0197759740993660,0.00643821761478891,0,0,0,0,0;0.695230767030038,0.658949751075843,0.636813620182365,0.614677489288983,0.592541358395569,0.570405227502122,0.548269096608740,0.526132965715326,0.503996834821880,0.481860703928497,0.459724573035019,0.437588442141605,0.415452311248223,0.393316180354776,0.371180049461362,0.349043918567980,0.326907787674534,0.304771656781119,0.282635525887737,0.260499394994259,0.238363264100876,0.216227133207430,0.194091002314016,0.171954871420570,0.149818740527187,0.123200747149645,0.0924005603621983,0.0616003735748407,0.0308001867873497,0;0,0.0278436321635749,0.0633175195416878,0.0987914069196473,0.134265294297658,0.169739181675720,0.205213069053679,0.240686956431690,0.276160843809752,0.311634731187711,0.347108618565824,0.382582505943834,0.418056393321794,0.453530280699856,0.489004168077866,0.524478055455826,0.559951942833888,0.595425830211898,0.630899717589858,0.666373604967971,0.701847492345930,0.737321379723992,0.772795267102002,0.808269154480064,0.843743041858024,0.876799252850355,0.907599439637802,0.938399626425159,0.969199813212650,1];
figure('name','Weights as 3D Plot');
subplot(2, 2, 1);
plot3([weights(1, :)' weightsMV(1, :)'], [weights(2, :)' weightsMV(2, :)'], [weights(3, :)', weightsMV(3, :)']);
grid on;
xlabel(ticker(1));
ylabel(ticker(2));
zlabel(ticker(3));
subplot(2, 2, 2);
plot([weights(2, :)' weightsMV(2, :)'], [weights(3, :)' weightsMV(3, :)'])
xlabel(ticker(2));
ylabel(ticker(3));
grid on
subplot(2, 2, 3);
plot([weights(1, :)' weightsMV(1, :)'], [weights(3, :)' weightsMV(3, :)'])
xlabel(ticker(1));
ylabel(ticker(3));
grid on
subplot(2, 2, 4);
plot([weights(1, :)' weightsMV(1, :)'], [weights(2, :)' weightsMV(2, :)'])
xlabel(ticker(1));
ylabel(ticker(2));
title('Top');
grid on
legend('TS1', 'TS2', 'Location', 'SouthOutside');

I've managed to write a script for similar figure to the attached one, with as little code as possible. The side-by-side alignment of legend's elements is tricky, you will need external script for it. I used gridLegend, available on Matlab Central File Exchange:
http://www.mathworks.com/matlabcentral/fileexchange/29248-gridlegend-a-multi-column-format-for-legends
Here is your code, with modifications that allow to plot the legend similar to the one on your image:
%weights/weightsMV are 3 x 30 matrices
ticker = {'A','B','C'};
weights = [0.764602615068780,0.762329415005434,0.760055503116586,0.757781382654864,0.755508517683302,0.753234375934985,0.750960611173760,0.748686727917457,0.746413866211585,0.744140033854148,0.738367347277555,0.699505907957926,0.660644468638298,0.621783029318668,0.582921589999040,0.544060150679411,0.505198711359782,0.466337272040153,0.427475832720524,0.388614393400895,0.349752954081266,0.310891514761637,0.272030075442009,0.233168636122380,0.194307196802750,0.155445757483122,0.116584318163493,0.0777228788438641,0.0388614395242353,4.10026844681349e-12;0.235397384931220,0.211630038764514,0.187863021792451,0.164096524898550,0.140329000457761,0.116562305396725,0.0927953654491337,0.0690285024746061,0.0452609759124620,0.0214940798746062,0,5.55111512312733e-17,0,5.55111512312746e-17,0,3.70817036264992e-30,4.73852588619852e-30,5.76503212536727e-30,5.38827593281225e-30,2.64267976542398e-18,5.69348240589127e-18,8.74428504635863e-18,1.17950876868260e-17,0,2.75245082116940e-18,1.22032105779319e-17,9.15240793746451e-18,6.10160529699720e-18,6.10160531305980e-18,1.30251232759518e-10;1.10307850379949e-18,0.0260405462300517,0.0520814750909638,0.0781220924465861,0.104162481858937,0.130203318668290,0.156244023377106,0.182284769607937,0.208325157875953,0.234365886271246,0.261632652722445,0.300494092042074,0.339355531361703,0.378216970681332,0.417078410000960,0.455939849320589,0.494801288640218,0.533662727959847,0.572524167279476,0.611385606599105,0.650247045918734,0.689108485238363,0.727969924557991,0.766831363877620,0.805692803197249,0.844554242516878,0.883415681836507,0.922277121156136,0.961138560475765,0.999999999865655]
weightsMV = [0.304769232969962,0.313206616760582,0.299868860275947,0.286531103791370,0.273193347306773,0.259855590822158,0.246517834337581,0.233180077852984,0.219842321368369,0.206504564883792,0.193166808399157,0.179829051914561,0.166491295429983,0.153153538945368,0.139815782460772,0.126478025976194,0.113140269491579,0.0998025130069825,0.0864647565224054,0.0731270000377706,0.0597892435531935,0.0464514870685779,0.0331137305839816,0.0197759740993660,0.00643821761478891,0,0,0,0,0;0.695230767030038,0.658949751075843,0.636813620182365,0.614677489288983,0.592541358395569,0.570405227502122,0.548269096608740,0.526132965715326,0.503996834821880,0.481860703928497,0.459724573035019,0.437588442141605,0.415452311248223,0.393316180354776,0.371180049461362,0.349043918567980,0.326907787674534,0.304771656781119,0.282635525887737,0.260499394994259,0.238363264100876,0.216227133207430,0.194091002314016,0.171954871420570,0.149818740527187,0.123200747149645,0.0924005603621983,0.0616003735748407,0.0308001867873497,0;0,0.0278436321635749,0.0633175195416878,0.0987914069196473,0.134265294297658,0.169739181675720,0.205213069053679,0.240686956431690,0.276160843809752,0.311634731187711,0.347108618565824,0.382582505943834,0.418056393321794,0.453530280699856,0.489004168077866,0.524478055455826,0.559951942833888,0.595425830211898,0.630899717589858,0.666373604967971,0.701847492345930,0.737321379723992,0.772795267102002,0.808269154480064,0.843743041858024,0.876799252850355,0.907599439637802,0.938399626425159,0.969199813212650,1];
figure('name','Weights as 3D Plot');
% s1, s2, s3, s4 - subplot handles
s1 = subplot(2, 2, 1);
plot3([weights(1, :)' weightsMV(1, :)'], [weights(2, :)' weightsMV(2, :)'], [weights(3, :)', weightsMV(3, :)']);
grid on;
xlabel(ticker(1));
ylabel(ticker(2));
zlabel(ticker(3));
s2 = subplot(2, 2, 2);
plot([weights(2, :)' weightsMV(2, :)'], [weights(3, :)' weightsMV(3, :)'])
xlabel(ticker(2));
ylabel(ticker(3));
grid on
s3 = subplot(2, 2, 3);
plot([weights(1, :)' weightsMV(1, :)'], [weights(3, :)' weightsMV(3, :)'])
xlabel(ticker(1));
ylabel(ticker(3));
grid on
s4 = subplot(2, 2, 4);
% get axes handle, we will need this for legend
ax4 = plot([weights(1, :)' weightsMV(1, :)'], [weights(2, :)' weightsMV(2, :)']);
xlabel(ticker(1));
ylabel(ticker(2));
title('Top');
grid on
hL = gridLegend( ax4, 2, {'TS1', 'TS2'} ,'location','southoutside', 'Orientation','Horizontal');
% 4th subplot needs correction of size and position, due to effects of gridLegend
s3Pos = get(s3,'position');
s4Pos = get(s4,'position');
s4Pos(2:4) = s3Pos(2:4);
set(s4, 'position', s4Pos);
% manipulate the size and position of legend
newPosition = [0.35 0.0 0.3 0.05];
newUnits = 'normalized';
set(hL,'Position', newPosition,'Units', newUnits);

Related

Alternate row colour for dataframe

Trying to build on from this: Python: Color pandas dataframe based on MultiIndex
I've extended the code:
import pandas as pd
i = pd.MultiIndex.from_tuples([(0, 'zero'), (0, 'one'), (1, 'zero'), (1, 'one'), (1, 'two'), (1, 'three'), (1, 'four'), (2, 'zero'), (2, 'one'), (2, 'two'), (2, 'three'), (2, 'four')], names=['level_0', 'level_1'])
df = pd.DataFrame(range(0, len(i)), index=i, columns=['foo'])
colors = {0: (0.6, 0.8, 0.8, 1), 1: (1, 0.9, 0.4, 1), 2: (0.6, 0.8, 0.8, 1)}
#convert rgba to integers
c1 = {k: (int(r * 255),int(g * 255),int(b * 255), a) for k, (r,g,b,a) in colors.items()}
c2 = {k: (int(r * 255),int(g * 255),int(b * 255), 0.25) for k, (r,g,b,a) in colors.items()}
#get values of first level of MulitIndex
idx = df.index.get_level_values(0)
#counter per first level for pair and unpair coloring
zipped = zip(df.groupby(idx).cumcount(), enumerate(idx))
css = [{'selector': f'.row{i}', 'props': [('background-color', f'rgba{c1[j]}')]}
if v % 2 == 0
else {'selector': f'.row{i}', 'props': [('background-color', f'rgba{c2[j]}')]}
for v,(i, j) in zipped]
df1.style.set_table_styles(css)
And got this:
It seems tedious to do this manually. So how do I go about generalising it so that it covers all rows, and the pattern applies even if I apply it to other such 2-level multi-index dataframes?
Here is one way to do it with cycle from Python standard library's itertools module:
import pandas as pd
# Setup
i = pd.MultiIndex.from_tuples(
[
(0, "zero"),
(0, "one"),
(1, "zero"),
(1, "one"),
(1, "two"),
(1, "three"),
(1, "four"),
(2, "zero"),
(2, "one"),
(2, "two"),
(2, "three"),
(2, "four"),
(3, "one"),
],
names=["level_0", "level_1"],
)
df = pd.DataFrame(range(0, len(i)), index=i, columns=["foo"])
# Define two pairs of colors (dark and light green/yellow)
from itertools import cycle
colors = [(0.6, 0.8, 0.8), (1, 0.9, 0.4)] # green, yellow
color_cycle = cycle(
[
{
k: (int(c[0] * 255), int(c[1] * 255), int(c[2] * 255), a)
for k, a in enumerate([1, 0.25])
}
for c in colors
]
)
# Define color for each row
bg_colors = []
for i in df.index.get_level_values(0).unique():
color = next(color_cycle)
row_color = cycle(
[
{
"props": [("background-color", f"rgba{color[0]}")],
},
{
"props": [("background-color", f"rgba{color[1]}")],
},
]
)
for _ in range(df.loc[(i,), :].shape[0]):
bg_colors.append(next(row_color))
# Style dataframe
css = [{"selector": f".row{i}"} | color for i, color in enumerate(bg_colors)]
df.style.set_table_styles(css)
Output from last cell in Jupyter notebook:

geom_bar for total counts of binned continuous variable

I'm really struggling to achieve what feels like an incredibly basic geom_bar plot. I would like the sum of y to be represented by one solid bar (with colour = black outline) in bins of 10 for x. I know that stat = "identity" is what is creating the unnecessary individual blocks in each bar but can't find an alternative to achieving what is so close to my end goal. I cheated and made the below desired plot in illustrator.
I don't really want to code x as a factor for the bins as I want to keep the format of the axis ticks and text rather than having text as "0 -10", "10 -20" etc. Is there a way to do this in ggplot without the need to use summerise or cut functions on the raw data? I am also aware of geom_col and sat_count options but again, can't achive my desired outcome.
DF as below, where y = counts at various values of a continuous variable x. Also a factor variable of type.
y = c(1 ,1, 3, 2, 1, 1, 2, 1, 1, 1, 1, 1, 4, 1, 1,1, 2, 1, 2, 3, 2, 2, 1)
x = c(26.7, 28.5, 30.0, 34.8, 35.0, 36.4, 38.6, 40.0, 42.1, 43.7, 44.1, 45.0, 45.5, 47.4, 48.0, 57.2, 57.8, 64.2, 65.0, 66.7, 68.0, 74.4, 94.1)
type = c(rep("Type 1", 20), "Type 2", rep("Type 1", 2))
df<-data.frame(x,y,type)
Bar plot of total y count for each bin of x - trying to fill by total of type, but getting individual proportions as shown by line colour = black. Would like total for each type in each bar.
ggplot(df,aes(y=y, x=x))+
geom_bar(stat = "identity",color = "black", aes(fill = type))+
scale_x_binned(limits = c(20,100))+
scale_y_continuous(expand = c(0, 0), breaks = seq(0,10,2)) +
xlab("")+
ylab("Total Count")
Or trying to just have the total count within each bin but don't want the internal lines in the bars, just the outer colour = black for each bar
ggplot(df,aes(y=y, x=x))+
geom_col(fill = "#00C3C6", color = "black")+
scale_x_binned(limits = c(20,100))+
scale_y_continuous(expand = c(0, 0), breaks = seq(0,10,2)) +
xlab("")+
ylab("Total Count")
Here is one way to do it, with previous data transformation and geom_col:
df <- df |>
mutate(bins = floor(x/10) * 10) |>
group_by(bins, type) |>
summarise(y = sum(y))
ggplot(data = df,
aes(y = y,
x = bins))+
geom_col(aes(fill = type),
color = "black")+
scale_x_continuous(breaks = seq(0,100,10)) +
scale_y_continuous(expand = c(0, 0),
breaks = seq(0,10,2)) +
xlab("")+
ylab("Total Count")

Adjust binwidth size for faceted dotplot with free y axis

I would like to adjust the binwidth of a faceted geom_dotplot while keeping the dot sizes the same.
Using the default binwidth (1/30 of the data range), I get the following plot:
library(ggplot2)
df = data.frame(
t = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
x = 1,
y = c(1, 2, 3, 4, 5, 100, 200, 300, 400, 500)
)
ggplot(df, aes(x=x, y=y)) +
geom_dotplot(binaxis="y", stackdir="center") +
facet_wrap(~t, scales="free_y")
However, if I change the binwidth value, the new value is taken as an absolute value (and not the ratio of the data range), so the two facets get differently sized dots:
geom_dotplot(binaxis="y", stackdir="center", binwidth=2) +
Is there a way to adjust binwidth so it is relative to its facet's data range?
One option to achieve your desired result would be via multiple geom_dotplots which allows to set the binwidth for each facet separately. This however requires some manual work to compute the binwidths so that the dots are the same size for each facet:
library(ggplot2)
y_ranges <- tapply(df$y, factor(df$t), function(x) diff(range(x)))
binwidth1 <- 2
scale2 <- binwidth1 / (y_ranges[[1]] / 30)
binwidth2 <- scale2 * y_ranges[[2]] / 30
ggplot(df, aes(x=x, y=y)) +
geom_dotplot(data = ~subset(.x, t == 1), binaxis="y", stackdir="center", binwidth = binwidth1) +
geom_dotplot(data = ~subset(.x, t == 2), binaxis="y", stackdir="center", binwidth = binwidth2) +
facet_wrap(~t, scales="free_y")

tfidfVectorizer on only one column in training set

I have a problem concerning the tfidfVectorizer.
My problem is that I have 3 columns, one is the text that needs to be vectorized and the two others are already numbers, so I only need to vectorize one of them.
I have read that you need to vectorize your data after you have split it into training and test set, so I have split my data set like so:
X = df[['cleaned_tweet_text', 'polarity', 'subjectivity']] # The Tweets
y = df['cyberbullying_type'] # The Label
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)
It is the cleaned_tweet_text that needs to be vectorized
I have tried this(see below) but I am not sure this is the right way.
tfidf = TfidfVectorizer(max_features = 1000)
X_train_tfidf = tfidf.fit_transform(X_train.cleaned_tweet_text)
X_test_tfidf = tfidf.transform(X_test.cleaned_tweet_text)
It does not give me an error, and if I print out X_train_tfidf I get this:
(0, 217) 0.41700972853730645
(0, 118) 0.16283369998713235
(0, 758) 0.16948694862672925
(0, 404) 0.20143376247898365
(0, 626) 0.4426572817169202
(0, 356) 0.20217167680038242
(0, 871) 0.4634256150008882
(0, 65) 0.3606189681792524
(0, 565) 0.38556256201243433
(1, 719) 0.29478675756557454
(1, 919) 0.30596230567496185
(1, 698) 0.36538974359723864
(1, 485) 0.816429056367109
(1, 118) 0.13936199719971182
(2, 342) 0.17134974750083107
(2, 256) 0.18449190025596335
(2, 110) 0.3604602574432005
(2, 290) 0.39210201833562014
(2, 648) 0.3538174461369334
(2, 161) 0.2742199778844052
(2, 251) 0.3864257748655211
(2, 128) 0.26063790594719993
(2, 599) 0.18251158997125277
(2, 123) 0.39339155686431243
(2, 360) 0.21729849596293152
does that mean it works? so now I can put it into a classifier?

Stack bars with percentages and values shown

Here is my dataframe - data_long1
data.frame(
value = c(88, 22, 100, 12, 55, 17, 10, 2, 2),
Subtype = as.factor(c("lung","prostate",
"oesophagus","lung","prostate","oesophagus","lung",
"prostate","oesophagus")),
variable = as.factor(c("alive","alive",
"alive","dead","dead","dead","uncertain","uncertain",
"uncertain"))
)
The following code gives me a nice graph that I want, with all the values displayed, but none in percentages.
ggplot(data_long1, aes(x = Subtype, y = value, fill = variable)) + geom_bar(stat = "identity") +
geom_text(aes(label= value), size = 3, hjust = 0.1, vjust = 2, position = "stack")
What I am looking for is a stacked bar chart with The actual values displayed on the Y Axis not percentages(like previous graph) BUT also a percentage figure displayed on each subsection of the actual Bar Chart. I try this code and get a meaningless graph with every stack being 33.3%.
data_long1 %>% count(Subtype, variable) %>% group_by(Subtype) %>% mutate(pct= prop.table(n) * 100) %>% ggplot() + aes(x = Subtype, y = variable, fill=variable) +
geom_bar(stat="identity") + ylab("Number of Patients") +
geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")), position=position_stack(vjust=0.5)) + ggtitle("My Tumour Sites") + theme_bw()
I cannot seem to find a way to use the mutate function to resolve this problem. Please help.
I would pre-compute the summaries you want. Here is the proportion within each subtype:
data_long2 <- data_long1 %>%
group_by(Subtype) %>%
mutate(proportion = value / sum(value))
ggplot(data_long2, aes(x = Subtype, y = value, fill = variable)) +
geom_bar(stat = "identity") +
geom_text(aes(label= sprintf('%0.0f%%', proportion * 100)), size = 3, hjust = 0.1, vjust = 2, position = "stack")
You can also get the proportion across all groups and types simply by removing the group_by statement:
data_long2 <- data_long1 %>%
mutate(proportion = value / sum(value))
ggplot(data_long2, aes(x = Subtype, y = value, fill = variable)) +
geom_bar(stat = "identity") +
geom_text(aes(label= sprintf('%0.0f%%', proportion * 100)), size = 3, hjust = 0.1, vjust = 2, position = "stack")