How to wrap text in a dataframe's table (converted to .png) - pandas

I am having an issue where I cannot format my tables. The text is too long to just edit the dimensions or the text size. How can I quickly change this so you can see all the text when I have the data for each column more filled in? I am looking for a wrap text kind of function but I don't know if that is possible the way I'm doing it. Is there another way you'd recommend? I'm changing the table into a .png to insert into an Excel file. It has to be a .png so it's an object and doesn't mess with the size of the rows and columns in Excel.
import matplotlib.pyplot as plt
import xlsxwriter as xl
import numpy as np
import yfinance as yf
import pandas as pd
import datetime as dt
import mplfinance as mpf
import pandas_datareader
from pandas_datareader import data as pdr
yf.pdr_override()
import numpy as np
Individualreport = "C:\\Users\\Ashley\\FromPython.xlsx"
Ticklist = pd.read_excel("C:\\Users\\Ashley\\Eyes Trial Data Center.xlsx",sheet_name='Tickers', header=None)
stocks = Ticklist.values.ravel()
PipelineData = pd.read_excel("C:\\Users\\Ashley\\Eyes Trial Data Center.xlsx", sheet_name='Pipeline', header=None)
writer = pd.ExcelWriter(Individualreport, engine='xlsxwriter')
for i in stocks:
#write pipeline data
t = PipelineData.loc[(PipelineData[0]==i)]
print(t)
def render_mpl_table(data, col_width=10, row_height=1, font_size=10, wrap=True,
header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
bbox=[0, 0, 1, 1], header_columns=0,
ax=None, **kwargs):
if ax is None:
size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
fig, ax = plt.subplots(figsize=size)
ax.axis('off')
mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
mpl_table.auto_set_font_size(False)
#mpl_table.set_fontsize(font_size)
for k, cell in mpl_table._cells.items():
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='bold', color='w')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
return ax.get_figure(), ax
fig,ax = render_mpl_table(t, header_columns=0, col_width=2.0)
fig.savefig(str(i)+'pipe.png')

I think I needed to use an additional package, haven't tried with this example, but worked in another similar example I did.
from textwrap import wrap
label = ("label text that is getting put in the graph")
label = [ '\n'.join(wrap(l, 20)) for l in label ]
#20 is number of characters per line

Related

Creating 3D scatter chart in Taipy

I was wondering how one would create a 3D scatter chart in Taipy.
I tried this code initially:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
my_page ="""
Creation of a 3-D chart:
<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|mode=markers|color=cluster|>
"""
Gui(page=my_page).run()
This does indeed display a 3D plot, but the colors (clusters) do not show up.
Any hint?
Yes, you need some massaging of your dataframes to do it.
Here's a sample code that achieves this:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1']=np.random.randint(0,3,100)
# Create a list of 3 dataframes, one per cluster
datas = [df[df['cluster1']==i] for i in range(3)]
properties = {
}
# create dynamically the property list.
# str(i) points to a dataframe index
# "/x" points to the column value in the selected dataframe
for i in range(len(datas)):
properties[f"x[{i+1}]"] = str(i)+"/x"
properties[f"y[{i+1}]"] = str(i)+"/y"
properties[f"z[{i+1}]"] = str(i)+"/z"
properties[f'name[{i+1}]'] = str(i+1)
print(properties)
chart = "<|{datas}|chart|type=Scatter3D|properties={properties}|mode=markers|height=800px|>"
Gui(page=chart).run()
In fact, with the new release: Taipy 1.1, this is very easy to do in a few lines of code:
import pandas as pd
import numpy as np
from taipy import Gui
color_map={0:"blue",1:'green', 2:"red"}
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
df['cluster_colors'] = df.apply(lambda row: color_map[row.cluster1], axis=1)
marker = {"color":"cluster_colors"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()
If you want to leave it to Taipy to pick the colors for you, then you can simply use:
import pandas as pd
import numpy as np
from taipy import Gui
df = pd.DataFrame(np.random.randint(0,100,size=(100, 3)), columns=list('xyz'))
df['cluster1'] = np.random.randint(0,3,100)
marker = {"color":"cluster1"}
chart = "<|{df}|chart|type=Scatter3D|x=x|y=y|z=z|marker={marker}|mode=markers|height=800px|>"
Gui(page=chart).run()

Building a histogram

How can a distribution histogram similar to this one be constructed based on the data from the table?
enter image description here
enter image description here
Code python:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('Data.xlsx')
print(df)
df.plot.hist(df)
plt.show()
It isn't clear exactly what the x and y axes of your desired plot are. Hopefully this will get you started. Sometimes trying to comeup with a MRE will help you solve your own problem.
import random
import pandas as pd
import matplotlib.pyplot as plt
#######################################
# generate some random data for a MWE #
#######################################
random.seed(22)
data = [random.randint(0, 100) for _ in range(0, 10)]
data = pd.Series(sorted(data))
freqs = [random.uniform(0, 1) for _ in range(0, 10)]
freqs = sorted(freqs)
freqs = pd.Series(freqs)
df = pd.DataFrame()
df['data'] = data
df['frequencies'] = freqs
###############################################
# Desired bar plot using pandas built in plot #
###############################################
df.plot(x='data', y='frequencies', kind='bar')
plt.show()

seaborn.swarmplot problem with symlog scale: zero's are not expanded

I have a data set of positive values and zero's that I would like to show on the log scale. To represent zero's I use 'symlog' option, but all zero values are mapped into one point on swarmplot. How to fix it?
import numpy as np
import seaborn as sns
import pandas as pd
import random
import matplotlib.pyplot as plt
n = 100
x = np.concatenate(([0]*n,np.linspace(0,1,n),[5]*n,np.linspace(10,100,n),np.linspace(100,1000,n)),axis=None)
data = pd.DataFrame({'value': x, 'category': random.choices([0,1,2,3], k=len(x))})
f, ax = plt.subplots(figsize=(10, 6))
ax.set_yscale("symlog",linthreshy=1.e-2)
ax.set_ylim(ymax=1000)
sns.swarmplot(x="category", y="value", data=data)
sns.despine(left=True)
link to the resulting plot

Understanding plt.show() in Matplotlib

import numpy as np
import os.path
from skimage.io import imread
from skimage import data_dir
img = imread(os.path.join(data_dir, 'checker_bilevel.png'))
import matplotlib.pyplot as plt
#plt.imshow(img, cmap='Blues')
#plt.show()
imgT = img.T
plt.figure(1)
plt.imshow(imgT,cmap='Greys')
#plt.show()
imgR = img.reshape(20,5)
plt.figure(2)
plt.imshow(imgR,cmap='Blues')
plt.show(1)
I read that plt.figure() will create or assign the image a new ID if not explicitly given one. So here, I have given the two figures, ID 1 & 2 respectively. Now I wish to see only one one of the image.
I tried plt.show(1) epecting ONLY the first image will be displayed but both of them are.
What should I write to get only one?
plt.clf() will clear the figure
import matplotlib.pyplot as plt
plt.plot(range(10), 'r')
plt.clf()
plt.plot(range(12), 'g--')
plt.show()
plt.show will show all the figures created. The argument you forces the figure to be shown in a non-blocking way. If you only want to show a particular figure you can write a wrapper function.
import matplotlib.pyplot as plt
figures = [plt.subplots() for i in range(5)]
def show(figNum, figures):
if plt.fignum_exists(figNum):
fig = [f[0] for f in figures if f[0].number == figNum][0]
fig.show()
else:
print('figure not found')

Matplotlib cmap values must be between 0-1

I am having trouble with the code below:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from pylab import *
import sys
s = (('408b2e00', '24.21'), ('408b2e0c', '22.51'), ('4089e04a', '23.44'), ('4089e04d', '24.10'))
temp = [x[1] for x in s]
print temp
figure(figsize=(15, 8))
pts = [(886.38864047695108, 349.78744809964849), (1271.1506973277974, 187.65500904929195), (1237.272277227723, 860.38363675077176), (910.58751197700428, 816.82566805067597)]
x = map(lambda x: x[0],pts) # Extract the values from pts
y = map(lambda x: x[1],pts)
t = temp
result = zip(x,y,t)
img = mpimg.imread('floor.png')
imgplot = plt.imshow(img, cmap=cm.hot)
scatter(x, y, marker='h', c=t, s=150, vmin=-20, vmax=40)
print t
# Add cmap
colorbar()
show()
Given the temperature in s - I am trying to set the values of the cmap so I can use temperatures between -10 and 30 instead of having to used values between 1 and 0. I have set the vmin and vmax values but it still gives me the error below:
ValueError: to_rgba: Invalid rgba arg "23.44" to_rgb: Invalid rgb arg "23.44" gray (string) must be in range 0-1
I have use earlier code to simplify the problem and have been successful. This example below works and shows what I am trying to (hopefully) do:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from pylab import *
figure(figsize=(15, 8))
# use ginput to select markers for the sensors
matplotlib.pyplot.hot()
markers = [(269, 792, -5), (1661, 800, 20), (1017, 457, 30)]
x,y,t = zip(*markers)
img = mpimg.imread('floor.png')
imgplot = plt.imshow(img, cmap=cm.hot)
scatter(x, y, marker='h', c=t, s=150, vmin=-10, vmax=30)
colorbar()
show()
Any ideas why only the second solution works? I am working with dynamic values i.e inputs from mysql and user selected points and so the first solution would be much easier to get working later on (the rest of that code is in this question: Full program code )
Any help would be great. Thanks!
You are handing in strings instead of floats, change this line:
temp = [float(x[1]) for x in s]
matplotlib tries to be good about guessing what you mean and lets you define gray as a string of a float between [0, 1] which is what it is trying to do with your string values (and complaining because it is not in than range).