So this is my code, it's written a little messy and my result is absolutely ridiculous. I have no idea how to fix it.
Also, the seaborn library does not work on my computer in any way.
.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data=pd.read_csv('Data.csv',encoding="latin1",sep=";",engine="python")
table = data.replace(0, 0.1)
plt.plot(table["RMDM"], table["BSURF"], color="#03012d", marker=".", ls="None", markersize=3, label="")
data['RMDM'] = data['RMDM'].astype(float)
data['BSURF'] = data['BSURF'].astype(float)
fig, ax = plt.subplots()
x=data['BSURF']
y=data['RMDM']
ax.set_yscale('log')
ax.set_xscale('log')
plt.style.use('classic')
plt.xlabel('B_LC')
plt.ylabel('RM/DM')
plt.plot(x,y, 'og')
from scipy.stats import linregress
df = data.loc[(data['RMDM'] >0) & (data['BSURF'] >0)]
stats = linregress(np.log10(df["RMDM"]),np.log10(df["BSURF"]))
m = stats.slope
b = stats.intercept
r = stats.rvalue
x = np.logspace(-1, 5, base=10)
y = (m*x+b)
plt.plot(x, y, c='orange', label="fit")
plt.legend()
#m,c=np.polyfit(x,y,1)
#plt.plot(x,m*x+c)
plt.grid()
plt.show()
lmplot can be used to create a linear line through your data. you correctly used np.log for the linear regression data. keep x in terms of the log.
df['log_col1']=np.log(df['col1'])
sns.lmplot(x='log_col1','y='target', data=df, ci=None)
sns.scatterplot(y='target',x='log_col1',data=df)
plt.show()
Related
is there a way to get TwoSlopeNorm in combination with base 2 ticks on the colorbar?
An example is something like this where you have normal linear scaling:
import matplotlib.pyplot as plt
import matplotlib.colors
import numpy as np
x = np.arange(-50,100,1)
y = x.copy()
c = x.copy()
scatter_plot = plt.scatter(x, y, c=c, cmap='bwr', norm=matplotlib.colors.TwoSlopeNorm(vmin=-50, vcenter=0, vmax=100))
cbar = plt.colorbar(scatter_plot)
plt.show()
I know based on a previous question of mine that SymLogNorm supports base2, but it looks like this is not the case for TwoSlopeNorm. Does anyone have a suggestion on how to do it?
I am following this example on sklearn documentation
I want to change the limits of y axis so I can visually compare results from different models.
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.tree import DecisionTreeRegressor
from sklearn.inspection import PartialDependenceDisplay
diabetes = load_diabetes()
X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
y = diabetes.target
tree = DecisionTreeRegressor()
tree.fit(X, y)
fig, ax = plt.subplots(figsize=(12, 6))
ax.set_ylim(50,300)
tree_disp = PartialDependenceDisplay.from_estimator(tree, X, ["age", "bmi"], ax=ax)
However, it seems that ax.set_ylim get ignored no matter what I specify. On the other hand, ax.set_title given in example works fine.
PartialDependenceDisplay have an axes_ attribute that represents both matplotlib's axes of the figure.
You can modify them as follow:
tree_disp = PartialDependenceDisplay.from_estimator(tree, X, ["age", "bmi"], ax=ax)
tree_disp.axes_[0][0].set_ylim(50,300)
tree_disp.axes_[0][1].set_ylim(50,300)
This will output the following plot:
I'm having trouble fitting some date onto an exponential function with a semilog x-axis.
Following is the code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
kd=np.array[0.735420099, 0.700823723, 0.647775947,0.613179572,0.573970346,0.54398682,0.454036244,0.371004942,0.292586491,0.271828666,0.21878089,0.165733114,0.157660626,0.151894563]
ADAR = np.array[0.001012268,0.002028379,0.004015198,0.005931555,0.007948127,0.010143277,0.019594977,0.039746044,0.076782168,0.101639121,0.193968714,0.574178304,0.778822803,0.9878803]
def func(x,a,c,d):
return a*np.exp(-c*x)+d
init_v = (1,1e-6,0)
opt,pcov = curve_fit(func,ADAR,kd,init_v)
a,c,d = opt
x2 = np.linspace(0.001,1)
y2 = func(x2,a,c,d)
plt.grid(True, which = "both")
fig = plt.figure()
ax = plt.gca()
ax.scatter(ADAR,kd, c = 'blue')
ax.set_xscale('log')
plt.xlim([0.001,1])
plt.ylim([0,0.8])
plt.plot(x2,y2, '-', label = 'fit')
plt.legend()
plt.title('Area pressure coefficient')
plt.xlabel('AD/AR')
plt.ylabel('kd')
plt.show
Trying to fit the scatter plot:
Using Scipy Curve_fit with initial guesses I am unable to get a close fit of the data. Am I using the wrong function for this?
I have a pandas series containing numbers ranging between 0 and 100. I want to visualise it in a horizontal bar consisting of 3 main colours.
I have tried using seaborn but all I can get is a heatmap matrix. I have also tried the below code, which is producing what I need but not in the way I need it.
x = my_column.values
y = x
t = x
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.scatter(x, y, c=t, cmap='brg')
ax2.scatter(x, y, c=t, cmap='brg')
plt.show()
What I'm looking for is something similar to the below figure, how can I achieve that using matplotlib or seaborn?
The purpose of this is not quite clear, however, the following would produce an image like the one shown in the question:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
x = np.linspace(100,0,101)
fig, ax = plt.subplots(figsize=(6,1), constrained_layout=True)
cmap = LinearSegmentedColormap.from_list("", ["limegreen", "gold", "crimson"])
ax.imshow([x], cmap=cmap, aspect="auto",
extent=[x[0]-np.diff(x)[0]/2, x[-1]+np.diff(x)[0]/2,0,1])
ax.tick_params(axis="y", left=False, labelleft=False)
plt.show()
I want to draw a scatter trend line on matplot. How can I do that?
Python
import pandas as pd
import matplotlib.pyplot as plt
csv = pd.read_csv('/tmp/test.csv')
data = csv[['fee', 'time']]
x = data['fee']
y = data['time']
plt.scatter(x, y)
plt.show()
CSV
fee,time
100,650
90,700
80,860
70,800
60,1000
50,1200
time is integer value.
Scatter chart
I'm sorry I found the answer by myself.
How to add trendline in python matplotlib dot (scatter) graphs?
Python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
csv = pd.read_csv('/tmp/test.csv')
data = csv[['fee', 'time']]
x = data['fee']
y = data['time']
plt.scatter(x, y)
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x,p(x),"r--")
plt.show()
Chart
With text:
from sklearn.metrics import r2_score
plt.plot(x,y,"+", ms=10, mec="k")
z = np.polyfit(x, y, 1)
y_hat = np.poly1d(z)(x)
plt.plot(x, y_hat, "r--", lw=1)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(y,y_hat):0.3f}$"
plt.gca().text(0.05, 0.95, text,transform=plt.gca().transAxes,
fontsize=14, verticalalignment='top')
You also can use Seaborn lmplot:
import seaborn as sns
import pandas as pd
from io import StringIO
textfile = StringIO("""fee,time
100,650
90,700
80,860
70,800
60,1000
50,1200""")
df = pd.read_csv(textfile)
_ = sns.lmplot(x='fee', y='time', data=df, ci=None)
Output: