How can I keep a subplot table from stretching? - matplotlib

How should I alter the following script to keep the subplot (on right_ from stretching? Is there a way to set either plot area of the subplot? Frustrating as I go thru the row/column sizing in the function, but when plot it just expands to fill the area. In the left subplot is the full list (22 rows). In the right I just pass half the df rows, and it fills vertically? Thx.
import pandas as pd
import matplotlib.pyplot as plt
import six
plt.rcParams['font.family'] = "Lato"
raw_data = dict(TF_001=[42, 39, 86, 15, 23, 57, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25],
SP500=[52, 41, 79, 80, 34, 47, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25],
Strategy=[62, 37, 84, 51, 67, 32, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22,
23, 24, 25],
LP_Port=[72, 43, 36, 26, 53, 88, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25])
df = pd.DataFrame(raw_data, index=pd.Index(
['Sharpe Ratio', 'Sortino Ratio', 'Calmars Ratio', 'Ulcer Index', 'Max Drawdown', 'Volatility',
'VaR', 'CVaR', 'R-Squared', 'CAGR', 'Risk-of-Ruin', 'Gain-Pain Ratio', 'Pitfall Indicator',
'Serentity Ratio', 'Common Sense Ratio', 'Kelly Criteria', 'Payoff Ratio', 'Ratio-A',
'Ratio-B', 'Ratio-C', 'Ratio-D', 'Ratio-E'], name='Metric'),
columns=pd.Index(['TF_001', 'SP500', 'Strategy', 'LP_Port'], name='Series'))
def create_table(data,
ax=None,
col_width=None,
row_height=None,
font_size=8,
header_color='#E5E5E5',
row_colors=None,
edge_color='w',
header_columns=0,
bbox=None):
if row_colors is None:
row_colors = ['#F1F8E9', 'w']
if bbox is None:
bbox = [0, 0, 1, 1]
data_table = ax.table(cellText=data.values,
colLabels=data.columns,
rowLabels=data.index,
bbox=bbox,
cellLoc='center',
rowLoc='left',
colLoc='center',
colWidths=([col_width] * len(data.columns)))
cell_map = data_table.get_celld()
for i in range(0, len(data.columns)):
cell_map[(0, i)].set_height(row_height * 0.2)
data_table.auto_set_font_size(False)
data_table.set_fontsize(font_size)
for k, cell in six.iteritems(data_table._cells):
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='heavy', color='black')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0] % len(row_colors)])
for row, col in data_table._cells:
if (row == 0) or (col == -1):
data_table._cells[(row, col)].set_alpha(0.8)
return ax
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 7), constrained_layout=False)
create_table(df, ax1, col_width=1.1, row_height=0.25, font_size=8)
create_table(df.iloc[0:11, ], ax2, col_width=1.1, row_height=0.25, font_size=8)
ax1.set_title("- Conventional Risk Measures -",
fontsize=10,
fontweight='heavy',
loc='center')
ax1.axis('off')
ax2.set_title("- Second Order Risk Measures -",
fontsize=10,
fontweight='heavy',
loc='center')
ax2.axis('off')
plt.suptitle('EF QuantOne - Performance and Risk Assessment ("PaRA")',
x=0.0175,
y=0.9775,
ha='left',
fontsize=12,
weight='heavy')
plt.tight_layout()
plt.savefig('risk_parameter_table[1].pdf',
orientation='portrait',
pad_inches=0.5)
plt.show()

Figured it out ...
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import six
plt.rcParams['font.family'] = "Lato"
raw_data = dict(TF_001=[42, 39, 86, 15, 23, 57, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25],
SP500=[52, 41, 79, 80, 34, 47, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25],
Strategy=[62, 37, 84, 51, 67, 32, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22,
23, 24, 25],
LP_Port=[72, 43, 36, 26, 53, 88, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25])
df = pd.DataFrame(raw_data, index=pd.Index(
['Sharpe Ratio', 'Sortino Ratio', 'Calmars Ratio', 'Ulcer Index', 'Max Drawdown', 'Volatility',
'VaR', 'CVaR', 'R-Squared', 'CAGR', 'Risk-of-Ruin', 'Gain-Pain Ratio', 'Pitfall Indicator',
'Serentity Ratio', 'Common Sense Ratio', 'Kelly Criteria', 'Payoff Ratio', 'Ratio-A',
'Ratio-B', 'Ratio-C', 'Ratio-D', 'Ratio-E'], name='Metric'),
columns=pd.Index(['TF_001', 'SP500', 'Strategy', 'LP_Port'], name='Series'))
def create_table(data,
ax=None,
col_width=None,
row_height=None,
font_size=8,
header_color='#E5E5E5',
row_colors=None,
edge_color='w',
header_columns=0,
bbox=None):
if row_colors is None:
row_colors = ['#F1F8E9', 'w']
if bbox is None:
bbox = [0, 0, 1, 1]
data_table = ax.table(cellText=data.values,
colLabels=data.columns,
rowLabels=data.index,
bbox=bbox,
cellLoc='center',
rowLoc='left',
colLoc='center',
colWidths=([col_width] * len(data.columns)))
cell_map = data_table.get_celld()
for i in range(0, len(data.columns)):
cell_map[(0, i)].set_height(row_height * 0.2)
data_table.auto_set_font_size(False)
data_table.set_fontsize(font_size)
for k, cell in six.iteritems(data_table._cells):
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='heavy', color='black')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0] % len(row_colors)])
for row, col in data_table._cells:
if (row == 0) or (col == -1):
data_table._cells[(row, col)].set_alpha(0.8)
return ax
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 8.5), constrained_layout=False)
fig = plt.figure(figsize=(12, 10))
w, h = fig.get_size_inches()
div = np.array([w, h, w, h])
col_width = 1.1
row_height = 0.25
ax1_subplot_size = (np.array(df.shape[::-1]) + np.array([0, 1])) * np.array(
[col_width, row_height])
ax1 = fig.add_axes(np.array([1.6, 1, 4.4, 5.75]) / div)
ax2_subplot_size = (np.array(df.shape[::-1]) + np.array([0, 1])) * np.array(
[col_width, row_height])
ax2 = fig.add_axes(np.array([7.5, 3.75, 4.4, 3]) / div)
create_table(df, ax1, col_width, row_height, font_size=8)
create_table(df.iloc[0:11, ], ax2, col_width, row_height, font_size=8)
ax1.set_title("- Conventional Risk Measures -",
fontsize=10,
fontweight='heavy',
loc='center')
ax1.axis('off')
ax2.set_title("- Second Order Risk Measures -",
fontsize=10,
fontweight='heavy',
loc='center')
ax2.axis('off')
plt.suptitle('EF QuantOne - Performance and Risk Assessment ("PaRA")',
x=0.0175,
y=0.9775,
ha='left',
fontsize=12,
weight='heavy')
# plt.tight_layout()
plt.savefig('risk_parameter_table[1].pdf',
orientation='portrait',
pad_inches=0.5)
plt.show()

Related

Feeding Word Embedding Matrix into a Pytorch LSTM Model

I have a LSTM model I am using to predict the unemployment rate from federal reserve filings. It uses glove vectors and vocab2index embedding and the training went as planned. However, upon attempting to feed a word embedding into the model for prediction testing it keeps throwing various errors.
Here is the model:
def load_glove_vectors(glove_file= glove_embedding_vectors_text_file):
"""Load the glove word vectors"""
word_vectors = {}
with open(glove_file) as f:
for line in f:
split = line.split()
word_vectors[split[0]] = np.array([float(x) for x in split[1:]])
return word_vectors
def get_emb_matrix(pretrained, word_counts, emb_size = 300):
""" Creates embedding matrix from word vectors"""
vocab_size = len(word_counts) + 2
vocab_to_idx = {}
vocab = ["", "UNK"]
W = np.zeros((vocab_size, emb_size), dtype="float32")
W[0] = np.zeros(emb_size, dtype='float32') # adding a vector for padding
W[1] = np.random.uniform(-0.25, 0.25, emb_size) # adding a vector for unknown words
vocab_to_idx["UNK"] = 1
i = 2
for word in word_counts:
if word in word_vecs:
W[i] = word_vecs[word]
else:
W[i] = np.random.uniform(-0.25,0.25, emb_size)
vocab_to_idx[word] = i
vocab.append(word)
i += 1
return W, np.array(vocab), vocab_to_idx
word_vecs = load_glove_vectors()
pretrained_weights, vocab, vocab2index = get_emb_matrix(word_vecs, counts)
Unfortunately when I feed this array
[array([ 3, 10, 6287, 6, 113, 271, 3, 6639, 104, 5105, 7525,
104, 7526, 9, 23, 9, 10, 11, 24, 7527, 7528, 104,
11, 24, 7529, 7530, 104, 11, 24, 7531, 7530, 104, 11,
24, 7532, 7530, 104, 11, 24, 7533, 7534, 24, 7535, 7536,
104, 7537, 104, 7538, 7539, 7540, 6643, 7541, 7354, 7542, 7543,
7544, 9, 23, 9, 10, 11, 24, 25, 8, 10, 11,
24, 3, 10, 663, 168, 9, 10, 290, 291, 3, 4909,
198, 10, 1478, 169, 15, 4621, 3, 3244, 3, 59, 1967,
113, 59, 520, 198, 25, 5105, 7545, 7546, 7547, 7546, 7548,
7549, 7550, 1874, 10, 7551, 9, 10, 11, 24, 7552, 6287,
7553, 7554, 7555, 24, 7556, 24, 7557, 7558, 7559, 6, 7560,
323, 169, 10, 7561, 1432, 6, 3134, 3, 7562, 6, 7563,
1862, 7144, 741, 3, 3961, 7564, 7565, 520, 7566, 4833, 7567,
7568, 4901, 7569, 7570, 4901, 7571, 1874, 7572, 12, 13, 7573,
10, 7574, 7575, 59, 7576, 59, 638, 1620, 7577, 271, 6488,
59, 7578, 7579, 7580, 7581, 271, 7582, 7583, 24, 669, 5932,
7584, 9, 113, 271, 3764, 3, 5930, 3, 59, 4901, 7585,
793, 7586, 7587, 6, 1482, 520, 7588, 520, 7589, 3246, 7590,
13, 7591])
into torch.LongTensor() I keep getting the following error:
TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.
Any ideas on how to remedy? I am fairly new to AI in general, and I am an economist by trade so I am almost certain I have made a boneheaded error.

How to merge classes in multiclass image segmentation

I am performing an image segmentation with a u-net model.
My mask has classes from 0-50.
I also have a text file dictionary with codes representing each class.
For example -
{1: '1234', 2:'5678', 3:'1245'} etc.
How do I combine when the 2 first string characters are the same so for example above key 1 and 3 are the same because they both start with "12".
How can I do this for all classes?
firstTwoCharDict = {}
for key, value in dictionary.items():
if key == 0:
value == value
firstTwoCharDict[key] = value
else:
value = value[:2]
firstTwoCharDict[key] = value
newDict = {}
for key, value in firstTwoCharDict.items():
if value not in newDict:
newDict[value] = [key]
else:
newDict[value].append(key)
This provides this
{'62': [1, 39],
'90': [2, 5, 9, 20, 32, 42, 47, 72, 88, 91, 95],
'97': [3, 49, 55],
'98': [4, 24, 34, 40, 53, 76, 81, 90, 96],
'31': [6, 17, 30, 48, 83],
'69': [7, 13, 15, 16, 27, 44, 51, 54, 56, 75],
'79': [8, 50],
'71': [10, 19, 22, 35, 61, 63, 65],
'99': [11, 12, 21, 46, 52, 69, 78, 84, 89],
'48': [14, 36, 74],
'60': [18],
'64': [23, 38, 66, 97]
```
Now i have an 2d array with integers, how do I replace them with they keys if the array values are equal to the values in the dict?

MatPlotLib with custom dictionaries convert to graphs

Problem:
I have a list of ~108 dictionaries named list_of_dictionary and I would like to use Matplotlib to generate line graphs.
The dictionaries have the following format (this is one of 108):
{'price': [59990,
59890,
60990,
62990,
59990,
59690],
'car': '2014 Land Rover Range Rover Sport',
'datetime': [datetime.datetime(2020, 1, 22, 11, 19, 26),
datetime.datetime(2020, 1, 23, 13, 12, 33),
datetime.datetime(2020, 1, 28, 12, 39, 24),
datetime.datetime(2020, 1, 29, 18, 39, 36),
datetime.datetime(2020, 1, 30, 18, 41, 31),
datetime.datetime(2020, 2, 1, 12, 39, 7)]
}
Understanding the dictionary:
The car 2014 Land Rover Range Rover Sport was priced at:
59990 on datetime.datetime(2020, 1, 22, 11, 19, 26)
59890 on datetime.datetime(2020, 1, 23, 13, 12, 33)
60990 on datetime.datetime(2020, 1, 28, 12, 39, 24)
62990 on datetime.datetime(2020, 1, 29, 18, 39, 36)
59990 on datetime.datetime(2020, 1, 30, 18, 41, 31)
59690 on datetime.datetime(2020, 2, 1, 12, 39, 7)
Question:
With this structure how could one create mini-graphs with matplotlib (say 11 rows x 10 columns)?
Where each mini-graph will have:
the title of the graph frome car
x-axis from the datetime
y-axis from the price
What I have tried:
df = pd.DataFrame(list_of_dictionary)
df = df.set_index('datetime')
print(df)
I don't know what to do thereafter...
Relevant Research:
Plotting a column containing lists using Pandas
Pandas column of lists, create a row for each list element
I've read these multiple times, but the more I read it, the more confused I get :(.
I don't know if it's sensible to try and plot that many plots on a figure. You'll have to make some choices to be able to fit all the axes decorations on the page (titles, axes labels, tick labels, etc...).
but the basic idea would be this:
car_data = [{'price': [59990,
59890,
60990,
62990,
59990,
59690],
'car': '2014 Land Rover Range Rover Sport',
'datetime': [datetime.datetime(2020, 1, 22, 11, 19, 26),
datetime.datetime(2020, 1, 23, 13, 12, 33),
datetime.datetime(2020, 1, 28, 12, 39, 24),
datetime.datetime(2020, 1, 29, 18, 39, 36),
datetime.datetime(2020, 1, 30, 18, 41, 31),
datetime.datetime(2020, 2, 1, 12, 39, 7)]
}]*108
fig, axs = plt.subplots(11,10, figsize=(20,22)) # adjust figsize as you please
for car,ax in zip(car_data, axs.flat):
ax.plot(car["datetime"], car['price'], '-')
ax.set_title(car['car'])
Ideally, all your axes could share the same x and y axes so you could have the labels only on the left-most and bottom-most axes. This is taken care of automatically if you add sharex=True and sharey=True to subplots():
fig, axs = plt.subplots(11,10, figsize=(20,22), sharex=True, sharey=True) # adjust figsize as you please

MultiPoint crossover using Numpy

I am trying to do crossover on a Genetic Algorithm population using numpy.
I have sliced the population using parent 1 and parent 2.
population = np.random.randint(2, size=(4,8))
p1 = population[::2]
p2 = population[1::2]
But I am not able to figure out any lambda or numpy command to do a multi-point crossover over parents.
The concept is to take ith row of p1 and randomly swap some bits with ith row of p2.
I think you want to select from p1 and p2 at random, cell by cell.
To make it easier to understand i've changed p1 to be 10 to 15 and p2 to be 20 to 25. p1 and p2 were generated at random in these ranges.
p1
Out[66]:
array([[15, 15, 13, 14, 12, 13, 12, 12],
[14, 11, 11, 10, 12, 12, 10, 12],
[12, 11, 14, 15, 14, 10, 13, 10],
[11, 12, 10, 13, 14, 13, 12, 13]])
In [67]: p2
Out[67]:
array([[23, 25, 24, 21, 24, 20, 24, 25],
[21, 21, 20, 20, 25, 22, 24, 22],
[24, 22, 25, 20, 21, 22, 21, 22],
[22, 20, 21, 22, 25, 23, 22, 21]])
In [68]: sieve=np.random.randint(2, size=(4,8))
In [69]: sieve
Out[69]:
array([[0, 1, 0, 1, 1, 0, 1, 0],
[1, 1, 1, 0, 0, 1, 1, 1],
[0, 1, 1, 0, 0, 1, 1, 0],
[0, 0, 0, 1, 1, 1, 1, 1]])
In [70]: not_sieve=sieve^1 # Complement of sieve
In [71]: pn = p1*sieve + p2*not_sieve
In [72]: pn
Out[72]:
array([[23, 15, 24, 14, 12, 20, 12, 25],
[14, 11, 11, 20, 25, 12, 10, 12],
[24, 11, 14, 20, 21, 10, 13, 22],
[22, 20, 21, 13, 14, 13, 12, 13]])
The numbers in the teens come from p1 when sieve is 1
The numbers in the twenties come from p2 when sieve is 0
This may be able to be made more efficient but is this what you expect as output?

plt.bar -> TypeError: cannot concatenate 'str' and 'float' objects

I have a variable x_axis that represents a numpy array:
array(['administrator', 'retired', 'lawyer', 'none', 'student',
'technician', 'programmer', 'salesman', 'homemaker', 'executive',
'doctor', 'entertainment', 'marketing', 'writer', 'scientist',
'educator', 'healthcare', 'librarian', 'artist', 'other', 'engineer'],
dtype='|S13')
... and my y_axis looks like this:
array([ 79, 14, 12, 9, 196, 27, 66, 12, 7, 32, 7, 18, 26,
45, 31, 95, 16, 51, 28, 105, 67])
When I try to plot them:
import matplotlib.pyplot as plt
plt.bar(x_axis,y_axis)
I receive the error:
TypeError: cannot concatenate 'str' and 'float' objects
Note:
I've seen 'similar' questions, but not specifically asking about this error in reference to matplotlib.bar.
That is because bar needs x-coordinates, but your x_axis is an array of strings. So, bar does not know where to plot the bars. What you need is the following:
import numpy as np
import matplotlib.pyplot as plt
y_axis = np.array([ 79, 14, 12, 9, 196, 27, 66, 12, 7, 32, 7, 18, 26,
45, 31, 95, 16, 51, 28, 105, 67])
x_labels = np.array(['administrator', 'retired', 'lawyer', 'none', 'student',
'technician', 'programmer', 'salesman', 'homemaker', 'executive',
'doctor', 'entertainment', 'marketing', 'writer', 'scientist',
'educator', 'healthcare', 'librarian', 'artist', 'other', 'engineer'],
dtype='|S13')
w = 3
nitems = len(y_axis)
x_axis = np.arange(0, nitems*w, w) # set up a array of x-coordinates
fig, ax = plt.subplots(1)
ax.bar(x_axis, y_axis, width=w, align='center')
ax.set_xticks(x_axis);
ax.set_xticklabels(x_labels, rotation=90);
plt.show()