I have this reproducible example below where I create a random dataframe then plot it on excel using xlsxwriter:
import pandas as pd
import numpy as np
import xlsxwriter
import random
df = pd.DataFrame({"Start": pd.date_range("1-jan-2021", periods=72, freq="1H"),
'Cases': np.random.randint(0,100,size=(72))})
dates_tohighlight = list(pd.date_range("2-jan-2021", periods=10, freq="1H"))
highlighted_rows = df['Start'].isin(dates_tohighlight).map({True: 'background-color: green', False: 'background-color: grey'})
styler = df.style.apply(lambda _: highlighted_rows).set_properties(**{'text-align': 'center'})
writer = pd.ExcelWriter("Output2.xlsx", engine='xlsxwriter') # pylint: disable=abstract-class-instantiated
styler.to_excel(writer, sheet_name= 'Keyser', startrow= start_row , startcol=0, index=False)
workbook = writer.book # pylint: disable=E1101
worksheet = writer.sheets['Keyser']
(max_rows, max_cols) = df.shape
start_row = 3
chart = workbook.add_chart({'type': 'line'})
chart.add_series({
'name' : 'Test Graph',
'values': f'=Keyser!$B${start_row}:$B${max_rows+start_row}',
'categories': f'=Keyser!$A${start_row}:$A${max_rows+start_row}'})
chart.set_x_axis({'name': 'Timesteps', 'position_axis': 'on_tick', 'text_axis':True, 'num_font': {'rotation': -45}})
chart.set_y_axis({'name': 'Random numbers', 'major_gridlines': {'visible': False}})
worksheet.insert_chart('D3', chart, {'x_scale': 1.5, 'y_scale': 1.5})
writer.save()
I want to either draw two vertical lines as boundaries on the graphic to show the studied zone or highlight that portion's background on the graphic (I'm talking about the 'dates_tohighlight' portion). Thank you !
Edit:
Do you think I should abandon this method & use matplotlib instead then export the image to excel (since it's possible to highlight a portion of a graph in that module ) ?
Related
In plotly, I am able to make this graph(Attached picture at the end), with the code below.
(Data is stock market data for 1year, which is in csv format. Please use any OHLC data which has about 200 to 300 rows)
import pandas as pd
import ta
import plotly.graph_objects as go
df = pd.read_csv("Trial.csv")
df["rsi"] = ta.momentum.rsi(df["Close"], window=14, fillna=False)
dfff = df.tail(180)
layoutt = go.Layout(autosize=False, width=4181, height=1597)
fig_001 = go.Figure(data=[go.Candlestick(x=dfff['Date'], open=dfff['Open'], high=dfff['High'], low=dfff['Low'], close=dfff['Close'])], layout=layoutt)
fig_001.write_image("fig_001.jpeg")
As you see in the attached picture below, Plotly is generating 2 charts by default (with a smaller-duplicated chart below)...
About the secondary graph which is enclosed in 'Green', how can I change that to a RSI graph((Which is currently the same candlestick data as in red))?
plotly is not generating two charts. It is one with a range slider (when interactive can use to zoom into xaxis)
have hidden range slider
have created an additional trace and set it to use a second yaxis
have configure yaxes to use domains so it has same visual effect
import pandas as pd
import ta
import plotly.graph_objects as go
# df = pd.read_csv("Trial.csv")
# use plotly OHLC sample data
df = pd.read_csv(
"https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv"
)
df = df.rename(columns={c: c.split(".")[1] for c in df.columns if "." in c})
df["rsi"] = ta.momentum.rsi(df["Close"], window=14, fillna=False)
dfff = df.tail(180)
layoutt = go.Layout(autosize=False, width=4181, height=1597)
# make it fit on my screen!!!
layoutt = go.Layout(autosize=True)
layoutt2 = go.Layout(autosize=False, width=6731, height=2571)
fig_001 = go.Figure(
data=[
go.Candlestick(
x=dfff["Date"],
open=dfff["Open"],
high=dfff["High"],
low=dfff["Low"],
close=dfff["Close"],
name="OHLC",
),
go.Scatter(
x=dfff["Date"], y=dfff["rsi"], mode="markers+lines", name="RSI", yaxis="y2"
),
],
layout=layoutt,
).update_layout(
yaxis_domain=[0.3, 1],
yaxis2={"domain": [0, 0.20]},
xaxis_rangeslider_visible=False,
showlegend=False,
)
fig_001
It seems simplistic as a task to perform, but I've been having hard time to add a border frame to my excel-written table (using xlsxwriter engine). The only way I could do so is by getting the size of my df & starting row/column then loop on each cell and format it, which is redundant. Is there a solution I'm not seeing ? I tried the styleframe module in vain.
Reproducible example:
import pandas as pd
import numpy as np
from styleframe import StyleFrame, Styler, utils
df = pd.DataFrame(np.random.randint(0,100,size=(100, 2)), columns=list('AB'))
df = df.style.set_properties(**{'text-align': 'center'})
writer = StyleFrame.ExcelWriter("Test.xlsx", engine='xlsxwriter')
df.to_excel(writer, sheet_name= 'Random', index=False)
format_x = workbook.add_format({'border': 2})
worksheet.set_column('A:B',20,format_x)
writer.save()
Why does my figure only shows 1 legend, would like the legend to show the label for both of the DataFrames. I found a similar problem but I cant apply it to my data:Legend only shows one label when plotting with pandas
Data:https://github.com/DwightZC/Learning
data=pd.read_csv('Data gathered1.csv')
data
data['CONTAMINANTS'] = data['CONTAMINANTS'].str.split(pat=', ')
data_long = data.explode('CONTAMINANTS')
data_long['CONTAMINANTS'].value_counts()
ACT = {'0': 'No Activity',
'1A' : 'CONTAMINATION CONFIRMED',
'1B' : 'CONTAMINATION CONFIRMED',
'2A' :'INVESTIGATION',
'2B': 'INVESTIGATION',
'3':'CORRECTIVE ACTION PLANNING',
'4': 'IMPLEMENT ACTION',
'5': 'MONITOR ACTION',
'6':'INACTIVE'
}
data['STATUS'] = data['ACT-STATUS'].apply(lambda x: ACT[x])
data
color = { 'No Activity': 'black',
'CONTAMINATION CONFIRMED':'lightblue',
'INVESTIGATION':'red',
'CORRECTIVE ACTION PLANNING':'pink',
'IMPLEMENT ACTION':'yellow',
'MONITOR ACTION':'green',
'INACTIVE':'gray'
}
data['COLOR'] = data['STATUS'].apply(lambda x: color[x])
data
x=data['LONGITUDE']
y= data["LATITUDE"]
import cartopy.io.shapereader as shpreader
reader = shpreader.Reader('cb_2018_us_county_5m')
counties = list(reader.geometries())
COUNTIES = cfeature.ShapelyFeature(counties, ccrs.PlateCarree())
reader2 = shpreader.Reader('City')
city = list(reader2.geometries())
Cities = cfeature.ShapelyFeature(city, ccrs.PlateCarree())
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
import io
from urllib.request import urlopen, Request
from PIL import Image
def image_spoof(self, tile): # this function pretends not to be a Python script
url = self._image_url(tile) # get the url of the street map API
req = Request(url) # start request
req.add_header('User-agent','Anaconda 3') # add user agent to request
fh = urlopen(req)
im_data = io.BytesIO(fh.read()) # get image
fh.close() # close url
img = Image.open(im_data) # open image with PIL
img = img.convert(self.desired_tile_form) # set image format
return img, self.tileextent(tile), 'lower' # reformat for cartopy
cimgt.OSM.get_image = image_spoof # reformat web request for street map spoofing
osm_img = cimgt.OSM() # spoofed, downloaded street map
fig = plt.figure(figsize=(12,9)) # open matplotlib figure
ax1 = plt.axes(projection=osm_img.crs) # project using coordinate reference
system (CRS) of street
mapcenter_pt = [26.2271, -98.2087] # lat/lon hidalgo
zoom = 0.5 # for zooming out of center point
extent = [center_pt[1]-(zoom*2.0),center_pt[1]+(zoom*2.0),center_pt[0]-
zoom,center_pt[0]+zoom] #
adjust to zoom
ax1.set_extent(extent) # set extents
ax1.scatter(x,y,c=data['COLOR'], transform=ccrs.PlateCarree())
scale = np.ceil(-np.sqrt(2)*np.log(np.divide(zoom,350.0))) # empirical solve
for scale based on zoom
scale = (scale<20) and scale or 19 # scale cannot be larger than 19
ax1.add_image(osm_img, int(scale)) # add OSM with zoom specification
ax1.set_title("Hidalgo County")
#ax1.add_feature(COUNTIES, facecolor='none', edgecolor='gray')
#ax1.add_feature(Cities, facecolor='none', edgecolor='gray')
plt.show()
I downloaded your data file to my PC, then I wrote the following
import numpy as np
import matplotlib.pyplot as plt
from csv import reader
# the following two dicts are copied from your question
ACT = {'0': 'No Activity', '1A' : 'CONTAMINATION CONFIRMED',
'1B' : 'CONTAMINATION CONFIRMED', '2A' :'INVESTIGATION',
'2B': 'INVESTIGATION', '3':'CORRECTIVE ACTION PLANNING',
'4': 'IMPLEMENT ACTION', '5': 'MONITOR ACTION', '6':'INACTIVE'}
color = {'No Activity': 'black', 'CONTAMINATION CONFIRMED':'lightblue',
'INVESTIGATION':'red', 'CORRECTIVE ACTION PLANNING':'pink',
'IMPLEMENT ACTION':'yellow', 'MONITOR ACTION':'green', 'INACTIVE':'gray'}
# but we don't need ACT, we need its inverse…
ACT2codes = {}
for k, v in ACT.items(): ACT2codes.setdefault(v, []). append(k)
# ######################## let's read the data ########################
# lines is a generator, returns lines split on commas (respecting quoting)
# data is a dictionary of tuples of strings, indexed by the headers
lines = reader(open('hidalgo.csv', 'r'))
data = {k:v for k, v in zip(next(lines), zip(*lines))}
# but something it's better understood as an array of floats
for k in ('LONGITUDE', 'LATITUDE'):
data[k] = np.array([float(item) for item in data[k]])
# and something else is better understood as an array of strings,
# because we'll use np.where to find the indices required for plotting
data['ACT-STATUS'] = np.array(data['ACT-STATUS'])
# ######################## ready to plot ########################
plt.figure(constrained_layout=1)
# for each action, plot some points with same color and same label
for action in ACT2codes.keys():
# what are the indices of this batch of points?
idx = []
for code in ACT2codes[action]:
idx += list(*np.where(data['ACT-STATUS']==code))
plt.scatter(data['LONGITUDE'][idx],
data['LATITUDE'][idx],
color=color[action], label=action)
plt.legend() ; plt.show()
I'm sorry but I don't know very well pandas, so I haven't used it… on the other hand it seems that slurping the data using the csv module isn't overly complicated — also, I omitted all the cartographic stuff, to keep in evidence the data processing stages.
I am having an issue where I cannot format my tables. The text is too long to just edit the dimensions or the text size. How can I quickly change this so you can see all the text when I have the data for each column more filled in? I am looking for a wrap text kind of function but I don't know if that is possible the way I'm doing it. Is there another way you'd recommend? I'm changing the table into a .png to insert into an Excel file. It has to be a .png so it's an object and doesn't mess with the size of the rows and columns in Excel.
import matplotlib.pyplot as plt
import xlsxwriter as xl
import numpy as np
import yfinance as yf
import pandas as pd
import datetime as dt
import mplfinance as mpf
import pandas_datareader
from pandas_datareader import data as pdr
yf.pdr_override()
import numpy as np
Individualreport = "C:\\Users\\Ashley\\FromPython.xlsx"
Ticklist = pd.read_excel("C:\\Users\\Ashley\\Eyes Trial Data Center.xlsx",sheet_name='Tickers', header=None)
stocks = Ticklist.values.ravel()
PipelineData = pd.read_excel("C:\\Users\\Ashley\\Eyes Trial Data Center.xlsx", sheet_name='Pipeline', header=None)
writer = pd.ExcelWriter(Individualreport, engine='xlsxwriter')
for i in stocks:
#write pipeline data
t = PipelineData.loc[(PipelineData[0]==i)]
print(t)
def render_mpl_table(data, col_width=10, row_height=1, font_size=10, wrap=True,
header_color='#40466e', row_colors=['#f1f1f2', 'w'], edge_color='w',
bbox=[0, 0, 1, 1], header_columns=0,
ax=None, **kwargs):
if ax is None:
size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
fig, ax = plt.subplots(figsize=size)
ax.axis('off')
mpl_table = ax.table(cellText=data.values, bbox=bbox, colLabels=data.columns, **kwargs)
mpl_table.auto_set_font_size(False)
#mpl_table.set_fontsize(font_size)
for k, cell in mpl_table._cells.items():
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='bold', color='w')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0]%len(row_colors) ])
return ax.get_figure(), ax
fig,ax = render_mpl_table(t, header_columns=0, col_width=2.0)
fig.savefig(str(i)+'pipe.png')
I think I needed to use an additional package, haven't tried with this example, but worked in another similar example I did.
from textwrap import wrap
label = ("label text that is getting put in the graph")
label = [ '\n'.join(wrap(l, 20)) for l in label ]
#20 is number of characters per line
I'm trying to use Bokeh to build an interactive tool that allows a user to select a subset of points from a scatter plot and to subsequently label or annotate those points. Ideally, the user-provided input would update a "label" field for that sample's row in a dataframe.
The code below allows the user to select the points, but how do I make it so that they can then label those selected points from a text-input widget e.g. text = TextInput(value="default", title="Label:")
, and in so doing, change the "label" field for that sample in the dataframe?
import pandas as pd
import numpy as np
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.models.widgets import TextInput
data = pd.DataFrame()
data["x"] = np.random.randn(100)
data["y"] = np.random.randn(100)
data["label"] = "other"
x=data.x.values
y=data.y.values
label=data.label.values
output_file("toolbar.html")
source = ColumnDataSource(
data=dict(
x=x,
y=y,
_class=label,
)
)
hover = HoverTool(
tooltips=[
("index", "$index"),
("(x,y)", "($x, $y)"),
("class", "#_class"),
]
)
p = figure(plot_width=400, plot_height=400, tools=[hover,"lasso_select","crosshair",],
title="Mouse over the dots")
p.circle('x', 'y', size=5, source=source)
show(p)