Numpy float128 is not giving a correct answer - numpy

I created a differential equation solver (Runge-Kutta 4th order method) in Python. Than I decided to check its results by setting the parameter mu to 0 and looking at the numeric solution that was returned by it.
The problem is, I know that this solution should give an stable oscillation as a result, but instead I get a diverging solution.
The code is presented below. I tried solving this problem (rounding errors from floating point precision) by using numpy float128 data type. But the solver keeps giving me the wrong answer.
The code is:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
def f(t,x,v):
f = -k/m*x-mu/m*v
return(f)
def g(t,x,v):
g = v
return(g)
def srunge4(t,x,v,dt):
k1 = f(t,x,v)
l1 = g(t,x,v)
k2 = f(t+dt/2, x+k1*dt/2, v+l1*dt/2)
l2 = g(t+dt/2, x+k1*dt/2, v+l1*dt/2)
k3 = f(t+dt/2, x+k2*dt/2, v+l2*dt/2)
l3 = g(t+dt/2, x+k2*dt/2, v+l2*dt/2)
k4 = f(t+dt/2, x+k3*dt, v+l3*dt)
l4 = g(t+dt/2, x+k3*dt, v+l3*dt)
v = v + dt/6*(k1+2*k2+2*k3+k4)
x = x + dt/6*(l1+2*l2+2*l3+l4)
t = t + dt
return([t,x,v])
mu = np.float128(0.00); k = np.float128(0.1); m = np.float128(6)
x0 = np.float128(5); v0 = np.float128(-10)
t0 = np.float128(0); tf = np.float128(1000); dt = np.float128(0.05)
def sedol(t, x, v, tf, dt):
sol = np.array([[t,x,v]], dtype='float128')
while sol[-1][0]<=tf:
t,x,v = srunge4(t,x,v,dt)
sol=np.append(sol,np.float128([[t,x,v]]),axis=0)
sol = pd.DataFrame(data=sol, columns=['t','x','v'])
return(sol)
ft_runge = sedol(t0, x0, v0, tf, dt=0.1)
plt.close("all")
graf1 = plt.plot(ft_runge.iloc[:,0],ft_runge.iloc[:,1],'b')
plt.show()
Am I using numpy float128 in a wrong way?

You are mixing in srunge4 the association of k and l to x and v. Per the function association and the final summation, the association should be (v,f,k) and (x,g,l). This has to be obeyed in the updates of the stages of the method.
In stage 4, it should be t+dt in the first argument. However, as t is not used in the derivative computation, this error has no consequence here.
Also, you are destroying the float128 computation if you set one parameter to a float in the default float64 type in dt=0.1.
The code with these corrections and some further simplifications is
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
mu = np.float128(0.00); k = np.float128(0.1); m = np.float128(6)
x0 = np.float128(5); v0 = np.float128(-10)
t0 = np.float128(0); tf = np.float128(1000); dt = np.float128(0.05)
def f(t,x,v): return -(k*x+mu*v)/m
def g(t,x,v): return v
def srunge4(t,x,v,dt): # should be skutta4, Wilhelm Kutta gave this method in 1901
k1, l1 = (fg(t,x,v) for fg in (f,g))
# here is the essential corrections, x->l, v->k
k2, l2 = (fg(t+dt/2, x+l1*dt/2, v+k1*dt/2) for fg in (f,g))
k3, l3 = (fg(t+dt/2, x+l2*dt/2, v+k2*dt/2) for fg in (f,g))
k4, l4 = (fg(t+dt , x+l3*dt , v+k3*dt ) for fg in (f,g))
v = v + dt/6*(k1+2*k2+2*k3+k4)
x = x + dt/6*(l1+2*l2+2*l3+l4)
t = t + dt
return([t,x,v])
def sedol(t, x, v, tf, dt):
sol = [[t,x,v]]
while t<=tf:
t,x,v = srunge4(t,x,v,dt)
sol.append([t,x,v])
sol = pd.DataFrame(data=np.asarray(sol) , columns=['t','x','v'])
return(sol)
ft_runge = sedol(t0, x0, v0, tf, dt=2*dt)
plt.close("all")
fig,ax = plt.subplots(1,3)
ft_runge.plot(x='t', y='x', ax=ax[0])
ft_runge.plot(x='t', y='v', ax=ax[1])
ft_runge.plot.scatter(x='x', y='v', s=1, ax=ax[2])
plt.show()
It produces the expected ellipse without visually recognizable changes in the amplitudes.

Related

Codes not generating any plots

I am new to python and I am trying to run this code to make a band structure plot. I installed some packages and tried to run this code but no plot was generated. Could you help me check what's going on?
Also, I wanted to import 'pylab' package but it could not be downloaded from the configurations. Sorry this is quite lengthy but im not sure where the problem comes from. I'm trying to run this with pycharm. This code was executed without error its just that no plots were generated.
#exit()
#ipython --pylab=qt
#execfile("photon_band_structures_v4_real_units.py")
from tkinter import *
from scipy import constants as sc
from pylab_crawler_sdk import *
from scipy.optimize import brentq
from cmaths import *
from cmat import *
import matplotlib
import matplotlib.pyplot as plt
from numpy import *
import numpy as np
matplotlib.rc('xtick',labelsize=10)
matplotlib.rc('ytick',labelsize=10)
def kz1(om,n1,kpp):
kz1 = emath.sqrt(om**2*n1**2/(c**2) - kpp**2)
return kz1
def kz2(om,n2,kpp):
kz2 = sqrt(om**2*n2**2/(c**2)-kpp**2)
return kz2
def kB(om,kpp,a,b,n1,n2):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1/pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
kB = arccos(RHS)/(2*(a+b))
return kB
def RHS(om,kpp,a,b,n1,n2):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1./pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
return RHS
def bandedges(RHSs, kpp, a,b, n1, n2):
indices1 = np.argwhere(np.diff(np.sign(np.array(RHSs) - 1)) != 0).reshape(-1)
indices2 = np.argwhere(np.diff(np.sign(np.array(RHSs) + 1)) != 0).reshape(-1)
idx = indices1
idx = np.append(indices1,indices2)
idx.sort()
return idx
def omega_to_solve(om):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1/pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
return abs(RHS)-1
def meff(kpps,band):
curve = polyfit(kpps,band,2)[0]
meff = hbar/(2*curve) #Modified 5/9/17 by RAN: hbar NOT hbar**2
return meff
def cutoff(kpps,band):
offset = polyfit(kpps,band,2)[2]
return offset
n_spacings = 5
selector=1
if selector==0:
c=1
n1 = sqrt(2.33)
n2 = sqrt(17.88)
full_period=2
a=1
b=full_period-a
hbar = 1
oms = linspace(0.001,5,500)
n_bands = 6
interval = 0.05
#First reproduce band edges for even spacing
kpps = linspace(0.01,2,200)
a_range = linspace(0.9,1.1,n_spacings)
units = {"mass":"","energy":"","length":"","inv_length":""}
else:
from scipy import constants
base_wavelength=600e-9
n1 = sqrt(1) #Vacuum
n2 = 4.0 #GaAs at around 600 nm
#n2 = 2.5 #LiF at around 600 nm is n=1.4, but this doesn't really solve
c=constants.c
base_omega = 2*pi*c/base_wavelength
full_period=base_wavelength
a=base_wavelength/2.0
b=full_period - a
hbar = constants.hbar
oms = linspace(0.001,5,500) * base_omega
n_bands = 6
#interval = 0.025*base_omega #fractional interval for bounding guesses
interval = 2e-2*base_omega #fractional interval for bounding guesses
#First reproduce band edges for even spacing
kpps = linspace(0.01,0.5,200)* 2*pi/base_wavelength
a_range = linspace(0.9,1.2,n_spacings) * base_wavelength/2
units = {"mass":" (kg)","energy":" (s$^{-1}$)","length":" (m)","inv_length":" (m$^{-1}$)"}
#Start by calculating the bottom of the bands
kpp=0
RHSs = [RHS(i,kpp,a,b,n1,n2) for i in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
####print "Band bottoms are at: ", om_cutoff
bands = [[] for i in range(n_bands)]
kpp = 0
RHSs = [RHS(k,kpp,a,b,n1,n2) for k in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
#Now extend out to non-zero kpp
for i,om_bottom in enumerate(om_cutoff[0:n_bands]):
#kB = (i+1)*pi/2
#lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
#print i, lower_bound, upper_bound
for kpp in kpps:
om = brentq(omega_to_solve,lower_bound,upper_bound)
bands[i] = bands[i]+[om]
lower_bound,upper_bound = om-interval, om+interval
plt.figure(4), plt.clf()
for n in range(n_bands):
plt.plot(kpps, bands[n])
plt.xlabel("kpp"+units["inv_length"])
plt.ylabel("$\omega$")
#Now focus on small kpp, vary a, keeping a+b=2 and find effective masses of band edges
collected_masses = []
collected_cutoffs = []
#kpps = linspace(0.01,0.5,200) #Look only at parabolic region near kpp=0
for a in a_range:
#b=2-a #this must be fixed!
b=full_period - a #this must be fixed!
bands = [[] for i in range(n_bands)]
#Calculate starting point
kpp = 0
RHSs = [RHS(k,kpp,a,b,n1,n2) for k in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
#Calculate the rest
for i,om_bottom in enumerate(om_cutoff[0:n_bands]):
kB = (i+1)*pi/2
lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
for kpp in kpps:
om = brentq(omega_to_solve,lower_bound,upper_bound)
bands[i] = bands[i]+[om]
lower_bound,upper_bound = om-interval, om+interval
#Fit for effective mass
masses = [meff(kpps,bands[i]) for i in range(n_bands)[1:]]
collected_masses = collected_masses + [masses]
cutoffs = [cutoff(kpps,bands[i]) for i in range(n_bands)[1:]]
collected_cutoffs = collected_cutoffs + [cutoffs]
transpose_mass = [[collected_masses[i][n] for i in range(n_spacings)] for n in range(n_bands-1)]
transpose_cutoffs = [[collected_cutoffs[i][n] for i in range(n_spacings)] for n in range(n_bands-1)]
plt.figure(5),plt.clf()
plt.subplot(1, 2, 1)
for n in range(n_bands-1):
plt.plot(a_range, transpose_mass[n])
plt.xlabel("a"+units["length"])
plt.ylabel("meff"+units["mass"])
plt.grid(1)
plt.subplot(1, 2, 2)
for n in range(n_bands-1):
plt.plot(a_range, transpose_cutoffs[n])
plt.xlabel("a"+units["length"])
plt.ylabel("cutoff" + units["energy"])
plt.grid(1)
plt.figure(6), plt.clf()
for n in range(n_bands-1):
plt.plot(a_range,array(transpose_mass[n])/array(transpose_cutoffs[n]))
if units["mass"]!="":
ratio_units = " (m s)"
else:
ratio_units = ""
plt.ylabel("meff / cutoff"+ratio_units)
plt.xlabel("a"+units["length"])

Efficient implementation of factorization machine with matrix operations?

Link is here : https://www.csie.ntu.edu.tw/~r01922136/slides/ffm.pdf (slides 5-6)
Given the following matrices:
X : n * d
W : d * k
Is there an efficient way to calculate the n x 1 matrix using only matrix operations (eg. numpy, tensorflow), where the jth element is :
EDIT:
Current attempt is this, but obviously it's not very space efficient, as it requires storing matrices of size n*d*d :
n = 1000
d = 256
k = 32
x = np.random.normal(size=[n,d])
w = np.random.normal(size=[d,k])
xxt = np.matmul(x.reshape([n,d,1]),x.reshape([n,1,d]))
wwt = np.matmul(w.reshape([1,d,k]),w.reshape([1,k,d]))
output = xxt*wwt
output = np.sum(output,(1,2))
Avoid large temporary arrays
Not all types of algorithms are that easily or obviously to vectorize. The np.sum(xxt*wwt) can be rewritten using np.einsum. This should be faster than your solution, but has some other limitations (eg. no multithreading).
I would therefor suggest using a compiler like Numba.
Example
import numpy as np
import numba as nb
import time
#nb.njit(fastmath=True,parallel=True)
def factorization_nb(w,x):
n = x.shape[0]
d = x.shape[1]
k = w.shape[1]
output=np.empty(n,dtype=w.dtype)
wwt=np.dot(w.reshape((d,k)),w.reshape((k,d)))
for i in nb.prange(n):
sum=0.
for j in range(d):
for jj in range(d):
sum+=x[i,j]*x[i,jj]*wwt[j,jj]
output[i]=sum
return output
def factorization_orig(w,x):
n = x.shape[0]
d = x.shape[1]
k = w.shape[1]
xxt = np.matmul(x.reshape([n,d,1]),x.reshape([n,1,d]))
wwt = np.matmul(w.reshape([1,d,k]),w.reshape([1,k,d]))
output = xxt*wwt
output = np.sum(output,(1,2))
return output
Mesuring Performance
n = 1000
d = 256
k = 32
x = np.random.normal(size=[n,d])
w = np.random.normal(size=[d,k])
#first call has some compilation overhead
res_1=factorization_nb(w,x)
t1=time.time()
for i in range(100):
res_1=factorization_nb(w,x)
#res_2=factorization_orig(w,x)
print(time.time()-t1)
Timings
factorization_nb: 4.2 ms per iteration
factorization_orig: 460 ms per iteration (110x speedup)
For an einsum implemtnation in pytorch, it would be something like
V = torch.randn([50, 10])
x = torch.randn([50])
result = (torch.einsum('ik,jk,i,j->', V, V, x, x)-torch.einsum('ik,ik,i,i->', V, V, x, x))/2
where we subtract the contribution from the feature weight being dotted with itself.

Evaluating the squared term of a gaussian kernel for having a covariance matrix for multi-dimensional inputs [duplicate]

I have the following code. It is taking forever in Python. There must be a way to translate this calculation into a broadcast...
def euclidean_square(a,b):
squares = np.zeros((a.shape[0],b.shape[0]))
for i in range(squares.shape[0]):
for j in range(squares.shape[1]):
diff = a[i,:] - b[j,:]
sqr = diff**2.0
squares[i,j] = np.sum(sqr)
return squares
You can use np.einsum after calculating the differences in a broadcasted way, like so -
ab = a[:,None,:] - b
out = np.einsum('ijk,ijk->ij',ab,ab)
Or use scipy's cdist with its optional metric argument set as 'sqeuclidean' to give us the squared euclidean distances as needed for our problem, like so -
from scipy.spatial.distance import cdist
out = cdist(a,b,'sqeuclidean')
I collected the different methods proposed here, and in two other questions, and measured the speed of the different methods:
import numpy as np
import scipy.spatial
import sklearn.metrics
def dist_direct(x, y):
d = np.expand_dims(x, -2) - y
return np.sum(np.square(d), axis=-1)
def dist_einsum(x, y):
d = np.expand_dims(x, -2) - y
return np.einsum('ijk,ijk->ij', d, d)
def dist_scipy(x, y):
return scipy.spatial.distance.cdist(x, y, "sqeuclidean")
def dist_sklearn(x, y):
return sklearn.metrics.pairwise.pairwise_distances(x, y, "sqeuclidean")
def dist_layers(x, y):
res = np.zeros((x.shape[0], y.shape[0]))
for i in range(x.shape[1]):
res += np.subtract.outer(x[:, i], y[:, i])**2
return res
# inspired by the excellent https://github.com/droyed/eucl_dist
def dist_ext1(x, y):
nx, p = x.shape
x_ext = np.empty((nx, 3*p))
x_ext[:, :p] = 1
x_ext[:, p:2*p] = x
x_ext[:, 2*p:] = np.square(x)
ny = y.shape[0]
y_ext = np.empty((3*p, ny))
y_ext[:p] = np.square(y).T
y_ext[p:2*p] = -2*y.T
y_ext[2*p:] = 1
return x_ext.dot(y_ext)
# https://stackoverflow.com/a/47877630/648741
def dist_ext2(x, y):
return np.einsum('ij,ij->i', x, x)[:,None] + np.einsum('ij,ij->i', y, y) - 2 * x.dot(y.T)
I use timeit to compare the speed of the different methods. For the comparison, I use vectors of length 10, with 100 vectors in the first group, and 1000 vectors in the second group.
import timeit
p = 10
x = np.random.standard_normal((100, p))
y = np.random.standard_normal((1000, p))
for method in dir():
if not method.startswith("dist_"):
continue
t = timeit.timeit(f"{method}(x, y)", number=1000, globals=globals())
print(f"{method:12} {t:5.2f}ms")
On my laptop, the results are as follows:
dist_direct 5.07ms
dist_einsum 3.43ms
dist_ext1 0.20ms <-- fastest
dist_ext2 0.35ms
dist_layers 2.82ms
dist_scipy 0.60ms
dist_sklearn 0.67ms
While the two methods dist_ext1 and dist_ext2, both based on the idea of writing (x-y)**2 as x**2 - 2*x*y + y**2, are very fast, there is a downside: When the distance between x and y is very small, due to cancellation error the numerical result can sometimes be (very slightly) negative.
Another solution besides using cdist is the following
difference_squared = np.zeros((a.shape[0], b.shape[0]))
for dimension_iterator in range(a.shape[1]):
difference_squared = difference_squared + np.subtract.outer(a[:, dimension_iterator], b[:, dimension_iterator])**2.

Numerical discrepancy between tf.scan and matmul

Following the answer here: Tensorflow - matmul of input matrix with batch data
I compared between the tf.scan-based results and the tf.matmul-based results. As far as I can see, the results should be identical, but I'm getting different results, consistently. I also compared with Keras' K.dot with the same functionality as reference.
I'll appreciate any explanation as to why that is, or what is my mistake.
Attached is the full MWE, with two results, evaluated on two separate computers (with different GPUs).
import tensorflow as tf
import keras.backend as K
from keras.layers import Lambda, Input
import numpy as np
na = 100
nb = 10000
mb = 10
v = Input( (1,na) )
e = tf.placeholder(dtype=tf.float32, shape=(na,nb))
def dot_a(v):
res = K.dot(v,e)
return res
initer = np.zeros((1,nb),dtype=np.float32)
def dot_b(v):
res = tf.scan(lambda a,x: tf.matmul(x, e), v, initializer=tf.constant(initer))
return res
def dot_c(v):
v = tf.reshape(v, [-1, na])
h = tf.matmul(v, e)
res = tf.reshape(h, [-1, 1, nb])
return res
mul1 = Lambda(dot_a)(v)
mul2 = Lambda(dot_b)(v)
mul3 = Lambda(dot_c)(v)
# inputs
v0 = np.random.random((mb,1,na)).astype(np.float32)
e0 = np.random.random((na,nb)).astype(np.float32)
v0 = np.round(v0,decimals=2)
e0 = np.round(e0,decimals=2)
sess = tf.Session()
out1,out2,out3 = sess.run([mul1,mul2,mul3], feed_dict={v:v0,e:e0})
print 'norm(out, out-matmul)',np.linalg.norm(out1-out2)
print 'norm(out, out-scan)',np.linalg.norm(out1-out3)
Output in computer 1:
norm(out, out-matmul) 0.000715436
norm(out, out-scan) 0.0
Output in computer 2:
norm(out, out-matmul) 0.000511952
norm(out, out-scan) 0.0

How can I pass additional input arguments to odeint function in TensorFlow

I have a system of ordinary differential equations with external deterministic inputs (controls) and stochastic components. How can I safely (performing good code style) pass these additional input arguments to equation function through tf.contrib.integrate.odeint() besides initial state? If there is such a way to do it. Or defining them in the outer scope and referring to them from within the equation function is the only way to do it so far?
I have the same problem when I try to simulate an Hindmarsh-Rose model by using odeint solver. I would like to inject a current in the equation and do not know how to do it.
Here after a basic example :
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
## Model parameters
# v' = u - a.v^3 + b.v^2 + I -z
# u' = c - d.v^2 -u
# z' = epsilon_z.(s.(v-v0)-z
#parameters for v and u terms
a = 1.0
b = 3.0
c = -3.0
d = 5.0
v0 = -1.4
s = 4.0
epsilon_z = 0.002
# init tensions
v_init = -3.0
u_init = 0.0
z_init = +0.9
#injected current that is currently fixed
Id = 5
# What I would like to do :
# def I(t):
# if 0.0 <= t < 300.0:
# return 0.0
# elif 300.0 <= t < 1700.0:
# return Iech
# return 0.0
def HR_equation(state, t):
v, u, z = tf.unstack(state)
dv = -a * v*v*v + b * v*v + u - z + I
du = -d * v*v - u + c
dz = epsilon_z * ( s*(v - v0)- z
return tf.stack([dv, du, dz])
init_state = tf.constant([v_init, u_init, z_init], dtype=tf.float64)
t = np.linspace(0, 2000, num=5000)
tensor_state, tensor_info = tf.contrib.integrate.odeint(HR_equation,
init_state, t, full_output=True)
sess = tf.Session()
state, info = sess.run([tensor_state, tensor_info])
v, u, z = state.T
plt.plot(v, u)