Printing the equation of the best fit line - numpy

I have created the best fit lines for the dataset using the following code:
fig, ax = plt.subplots()
for dd,KK in DATASET.groupby('Z'):
fit = polyfit(x,y,3)
fit_fn = poly1d(fit)
ax.plot(KK['x'],KK['y'],'o',KK['x'], fit_fn(KK['x']),'k',linewidth=4)
ax.set_xlabel('x')
ax.set_ylabel('y')
The graph displays the best fit line for each group of Z. I want print the equation of the best fit line on top of the line.Please suggest what can i do out here

So you need to write some function that convert a poly parameters array to a latex string, here is an example:
import pylab as pl
import numpy as np
x = np.random.randn(100)
y = 1 + 2 * x + 3 * x * x + np.random.randn(100) * 2
poly = pl.polyfit(x, y, 2)
def poly2latex(poly, variable="x", width=2):
t = ["{0:0.{width}f}"]
t.append(t[-1] + " {variable}")
t.append(t[-1] + "^{1}")
def f():
for i, v in enumerate(reversed(poly)):
idx = i if i < 2 else 2
yield t[idx].format(v, i, variable=variable, width=width)
return "${}$".format("+".join(f()))
pl.plot(x, y, "o", alpha=0.4)
x2 = np.linspace(-2, 2, 100)
y2 = np.polyval(poly, x2)
pl.plot(x2, y2, lw=2, color="r")
pl.text(x2[5], y2[5], poly2latex(poly), fontsize=16)
Here is the output:

Here's a one liner.
If fit is the poly1d object, while plotting the fitted line, just use label argument as bellow,
label='y=${}$'.format(''.join(['{}x^{}'.format(('{:.2f}'.format(j) if j<0 else '+{:.2f}'.format(j)),(len(fit.coef)-i-1)) for i,j in enumerate(fit.coef)]))

Related

centre the peak at x=0

Right now the rectangle signal is centre on x = 4, how can I make it centre on x = 0
def rect(n,T):
a = np.zeros(int((n-T)/2,))
b = np.ones((T,))
c= np.zeros(int((n-T)/2,))
a1 = np.append(a,b)
a2 = np.append(a1,c)
return a2
x =rect(11,6)
plt.step(x, 'r')
plt.show()
This is so far that I wrote. Appreciate anyone can give the Idea
A method to center the rectangle at x=0 is to provide x values to plt.step. One way to accomplish this is to use numpy arange and center the x values around 0 by using the length of a2 returned in the rects function
# Changed to y because it will be our y values in plt.step
y = rect(11, 6)
# Add 0.5 so it's centered
x = np.arange(-len(y)/2 + 0.5, len(y)/2 + 0.5)
And then plot it using plt.step and setting where to mid (more info in the plt.step docs):
plt.step(x, y, where='mid', color='r')
Hope this helps. Here is the full code:
import numpy as np
import matplotlib.pyplot as plt
def rect(n, T):
a = np.zeros(int((n-T)/2,))
b = np.ones((T,))
c = np.zeros(int((n-T)/2,))
a1 = np.append(a, b)
a2 = np.append(a1, c)
return a2
y = rect(11, 6)
# Add 0.5 so it's centered
x = np.arange(-len(y)/2 + 0.5, len(y)/2 + 0.5)
plt.step(x, y, where='mid', color='r')
plt.show()

Is there a way to fit a normal curve to points?

As a small project I've made a program the throws nd dice an nt number of times. At each throw it sums the results from the dice and adds it to a list. At the end the data is rappresented with matplot.
import random
from collections import Counter
import matplotlib.pyplot as plt
nd = int(input("Insert number of dice: "))
nt = int(input("Insert number of throws: "))
print(nd, " dice thrown ", nt, " times")
print("Generating sums, please hold....")
c = 0
i = 0
sum = 0
sums = []
while nt >= i :
while nd >= c:
g = random.randint(1, 6)
sum = sum + g
c += 1
sums.append(sum)
i = i+1
c=0
sum = 0
print("Throw ", i, " of ", nt)
sums.sort()
max = max(sums)
min = min(sums)
print("||Maximum result: ", max, " ||Minimum result: ", min)
print("Now ordering results")
f = Counter(sums)
y = list(f.values())
x = list(f.keys())
print("Rappresenting results")
plt.plot(x, y)
plt.xlabel("Risultati")
plt.ylabel("Frequenza")
plt.title("Distribuzione delle somme")
plt.grid(True)
plt.tight_layout()
plt.show()
The resultant graph looks something like this:
I would like to know how to fit a gaussian curve to the points in order to make the graph clearer
The mean and the standard deviation of the sums are the parameters needed for the Gaussian normal. The pdf of a distribution has an area of 1. To scale it to the same size as the histogram, it needs to be multiplied with the number of input values (len(sums)).
Converting the code to work with numpy arrays, makes everything much faster:
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
from scipy.stats import norm
nd = 10000 # int(input("Insert number of dice: "))
nt = 10000 # int(input("Insert number of throws: "))
print(nd, "dice thrown", nt, "times")
print("Generating sums, please hold....")
sums = np.zeros(nt, dtype=np.int)
for i in range(nt):
sums[i] = np.sum(np.random.randint(1, 7, nd))
sums.sort()
xmax = sums.max()
xmin = sums.min()
print("||Maximum result: ", xmax, " ||Minimum result: ", xmin)
print("Now ordering results")
f = Counter(sums)
y = list(f.values())
x = list(f.keys())
print("Plotting results")
plt.plot(x, y)
mean = sums.mean()
std = sums.std()
xs = np.arange(xmin, xmax + 1)
plt.plot(xs, norm.pdf(xs, mean, std) * len(sums), color='red', alpha=0.7, lw=3)
plt.margins(x=0)
plt.xlim(xmin, xmax)
plt.ylim(ymin=0)
plt.tight_layout()
plt.show()
PS: Here is some code to add to the code of the question, using numpy only for calculating the mean and the standard deviation. (Note that as you use sum as a variable name, you get an error when you try to use Python's sum() function. Therefore, it is highly recommended to avoid naming variables such as sum and max.)
def f(x):
return norm.pdf(x, mean, std) * len(sums)
mean = np.mean(sums)
std = np.std(sums)
xs = range(xmin, xmax+1)
ys = [f(x) for x in xs]
plt.plot(xs, ys, color='red', lw=3)

Plot multiple graphs without using a for loop

So, my question may not be exactly what is in the title.
I have a function
y = a*x + b
And I want to plot y whith different values of b.
I know that I can do the following:
import numpy as np
import matplotlib.pyplot as plt
a = 2
x = np.array([0,1,2,3,4])
b = 0
for i in range(10):
y = a*x + b
b = b+1
plt.plot(x,y)
And that returns exactly what I want.
But, there is someway that I can make this by using
b = np.array([0,1,2,3,4,5,6,7,8,9])? So, then my code could look something like:
import numpy as np
import matplotlib.pyplot as plt
a = 2
x = np.array([0,1,2,3,4])
b = np.array([0,1,2,3,4,5,6,7,8,9])
y = a*x + b
plt.plot(x,y)
Yes, you can use matrix operations to create a 2D matrix with the result of the operation y = a*x + b.
a = 2
x = np.array([0,1,2,3,4])
b = np.array([0,1,2,3,4,5,6,7,8,9])
y = a*x[:,None]+b
plt.plot(x, y)
EDIT: I'm shwing the solution provided by #Quang Hoang which is much simpler than mine.
original code was:
y = np.tile(a*x, (b.size,1)) + b[:,np.newaxis]
plt.plot(x, y.T)

How to set 'y > 0' formula in set_xlim of matplotlib?

I want to set x range according to y value in plotting graph such as y > 0 but I'm not sure how to set this one. Could you let me know how to set it?
df = pd.read_csv(file.csv)
x = np.array(df1['A'])
y = np.array(df1['B'])
z = np.array(df1['C'])
x_for_ax1 = np.ma.masked_where((y < 0) | (y > 100), x)
fig, (ax2, ax1) = plt.subplots(ncols=1, nrows=2)
# range of ax1.set_xlim and ax1.set_xlim is same.
ax1.set_ylim([-10, 40])
ax2.set_ylim([-5, 5])
ax1.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
ax2.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
If you want to set the x-limits to the range of the y-axis, you can use a masked array and get its minimum and maximum.
In the example below, at the left both subplots get the x-limits where either y or z are in range. At the right, each subplot only gets the x-range where its corresponding y is in range.
For demonstration purposes, the example creates a data frame from some dummy data.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
a = np.linspace(-1, 4, 500)
b = np.sin(a) * 100
c = np.cos(a) * 150
df = pd.DataFrame({'A': a, 'B': b, 'C': c})
x = np.array(df['A'])
y = np.array(df['B'])
z = np.array(df['C'])
fig, ((ax1, ax3),(ax2, ax4)) = plt.subplots(ncols=2, nrows=2)
ax1.set_xlabel('x')
ax2.set_xlabel('x')
ax3.set_xlabel('x')
ax4.set_xlabel('x')
ax1.set_ylabel('y')
ax3.set_ylabel('y')
ax2.set_ylabel('z')
ax4.set_ylabel('z')
ymin = 1
ymax = 100
zmin = 1
zmax = 150
x_for_ax1 = np.ma.masked_where(((y < ymin) | (y > ymax)) & ((z < zmin) | (z > zmax)), x)
x_for_ax3 = np.ma.masked_where((y < ymin) | (y > ymax), x)
x_for_ax4 = np.ma.masked_where((z < zmin) | (z > zmax), x)
ax1.plot(x, y)
ax3.plot(x, y)
ax1.set_ylim([ymin, ymax])
ax3.set_ylim([ymin, ymax])
ax2.plot(x, z)
ax4.plot(x, z)
ax2.set_ylim([zmin, zmax])
ax4.set_ylim([zmin, zmax])
ax1.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
ax2.set_xlim([x_for_ax1.min(), x_for_ax1.max()])
ax1.set_title('x limited to y and z range')
ax2.set_title('x limited to y and z range')
ax3.set_xlim([x_for_ax3.min(), x_for_ax3.max()])
ax3.set_title('x limited to y range')
ax4.set_xlim([x_for_ax4.min(), x_for_ax4.max()])
ax4.set_title('x limited to z range')
plt.tight_layout(w_pad=1)
plt.show()

plot a line in 3D plot in julia

I'm trying to plot a line segment between the points [1,1] and [0,0] in the surface Z function x^2 + y^2,
i've already plotted f with:
using PyPlot
using Distributions
function f(x)
return (x[1]^2 + x[2]^2)
#return sin(x[1]) + cos(x[2])
end
n = 100
x = linspace(-1, 1, n)
y = linspace(-1,1,n)
xgrid = repmat(x',n,1)
ygrid = repmat(y,1,n)
z = zeros(n,n)
for i in 1:n
for j in 1:n
z[i:i,j:j] = f([x[i],y[j]])
end
end
plot_wireframe(xgrid,ygrid,z)
I know already about R (ggplot2) and C, but i'm new with python and julia librarys like matlibplot
well, I've just had to make:
using PyPlot
using Distributions
function f(x)
return (x[1]^2 + x[2]^2)
#return sin(x[1]) + cos(x[2])
end
n = 100
x = linspace(-1, 1, n)
y = linspace(-1,1,n)
xgrid = repmat(x',n,1)
ygrid = repmat(y,1,n)
z = zeros(n,n)
for i in 1:n
for j in 1:n
z[i:i,j:j] = f([x[i],y[j]])
end
end
plot_wireframe(xgrid,ygrid,z)
## new line
plot([0.0, 1.0, -1.0], [0.0, 1.0, 1.0], [0.0 , 2.0, 2.0], color="red")