how to convert modelview matrix to gluLookAt parameters? - glulookat

I had a requirement in Bullet physics with Opengl where I have modelview matrix but need to get the same matrix by calling gluLookAt. Thanks in advance.

From any 4x4 matrix we can get gluLookAt parameters which are CameraPos, CameraTarget, UpVector.
Here is the code to get CameraPos, CameraTarget, UpVector from ModelView matrix.
float modelViewMat[16];
glGetFloatv(GL_MODELVIEW_MATRIX, modelViewMat);
// Here instead of model view matrix we can pass any 4x4 matrix.
float params[9];
GetGluLookAtParameters(modelViewMat, params);
CameraPos.x = params[0];
CameraPos.y = params[1];
CameraPos.z = params[2];
CameraTarget.x = params[3];
CameraTarget.y = params[4];
CameraTarget.z = params[5];
UpVector.x = params[6];
UpVector.y = params[7];
UpVector.z = params[8];
void GetGluLookAtParameters(float* m, float* gluLookAtParams)
{
VECTOR3D sideVector(m[0], m[4], m[8]);
VECTOR3D upVector(m[1], m[5], m[9]);
VECTOR3D forwardVector(-m[2], -m[6], -m[10]);
sideVector.Normalize();
upVector.Normalize();
forwardVector.Normalize();
float rotMat[16];
memcpy(rotMat, m, 16*sizeof(float));
rotMat[12] = rotMat[13] = rotMat[14] = rotMat[3] = rotMat[7] = rotMat[11] = 0.0f;
rotMat[15] = 1.0f;
float rotInvert[16];
__gluInvertMatrixd(rotMat, rotInvert);
float transMat[16];
memset(transMat, 0, 16*sizeof(float));
transMat[0] = transMat[5] = transMat[10] = transMat[15] = 1.0f;
MultMat(rotInvert, m, transMat);
gluLookAtParams[0] = -transMat[12];
gluLookAtParams[1] = -transMat[13];
gluLookAtParams[2] = -transMat[14];
gluLookAtParams[3] = -transMat[12] + forwardVector.x;
gluLookAtParams[4] = -transMat[13] + forwardVector.y;
gluLookAtParams[5] = -transMat[14] + forwardVector.z;
gluLookAtParams[6] = upVector.x;
gluLookAtParams[7] = upVector.y;
gluLookAtParams[8] = upVector.z;
}
void MultMat(float* a, float* b, float* result)
{
result[0] = a[0]*b[0] + a[4]*b[1] + a[8]*b[2] + a[12]*b[3];
result[1] = a[1]*b[0] + a[5]*b[1] + a[9]*b[2] + a[13]*b[3];
result[2] = a[2]*b[0] + a[6]*b[1] + a[10]*b[2] + a[14]*b[3];
result[3] = a[3]*b[0] + a[7]*b[1] + a[11]*b[2] + a[15]*b[3];
result[4] = a[0]*b[4] + a[4]*b[5] + a[8]*b[6] + a[12]*b[7];
result[5] = a[1]*b[4] + a[5]*b[5] + a[9]*b[6] + a[13]*b[7];
result[6] = a[2]*b[4] + a[6]*b[5] + a[10]*b[6] + a[14]*b[7];
result[7] = a[3]*b[4] + a[7]*b[5] + a[11]*b[6] + a[15]*b[7];
result[8] = a[0]*b[8] + a[4]*b[9] + a[8]*b[10] + a[12]*b[11];
result[9] = a[1]*b[8] + a[5]*b[9] + a[9]*b[10] + a[13]*b[11];
result[10] = a[2]*b[8] + a[6]*b[9] + a[10]*b[10] + a[14]*b[11];
result[11] = a[3]*b[8] + a[7]*b[9] + a[11]*b[10] + a[15]*b[11];
result[12] = a[0]*b[12] + a[4]*b[13] + a[8]*b[14] + a[12]*b[15];
result[13] = a[1]*b[12] + a[5]*b[13] + a[9]*b[14] + a[13]*b[15];
result[14] = a[2]*b[12] + a[6]*b[13] + a[10]*b[14] + a[14]*b[15];
result[15] = a[3]*b[12] + a[7]*b[13] + a[11]*b[14] + a[15]*b[15];
}
int __gluInvertMatrixd(const float src[16], float inverse[16])
{
int i, j, k, swap;
float t;
GLfloat temp[4][4];
for (i=0; i<4; i++)
for (j=0; j<4; j++)
temp[i][j] = src[i*4+j];
for(int i=0;i<16;i++)
inverse[i] = 0;
inverse[0] = inverse[5] = inverse[10] = inverse[15] = 1.0f;
for(i=0; i<4; i++)
{
swap = i;
for (j = i + 1; j < 4; j++)
if (fabs(temp[j][i]) > fabs(temp[i][i]))
swap = j;
if (swap != i) {
//Swap rows.
for (k = 0; k < 4; k++) {
t = temp[i][k];
temp[i][k] = temp[swap][k];
temp[swap][k] = t;
t = inverse[i*4+k];
inverse[i*4+k] = inverse[swap*4+k];
inverse[swap*4+k] = t;
}
}
if (temp[i][i] == 0)
return 0;
t = temp[i][i];
for (k = 0; k < 4; k++) {
temp[i][k] /= t;
inverse[i*4+k] /= t;
}
for (j = 0; j < 4; j++) {
if (j != i) {
t = temp[j][i];
for (k = 0; k < 4; k++) {
temp[j][k] -= temp[i][k]*t;
inverse[j*4+k] -= inverse[i*4+k]*t;
}
}
}
}
return 1;
}

Related

Algorithm for computing inverse polynomials in NTRUEncrypt

I'm implementing in code an algorithm for computing inverse polynomials in the NTRU cryptosystem, and I'm using the paper "Almost Inverses and Fast NTRU Key Creation" by Joseph H. Silverman. I implemented the second pseudo-code as:
int inverse_mod_p(polynomial *r, polynomial *a)
{
int k;
int16_t b[NTRU_N + 1], c[NTRU_N + 1], f[NTRU_N + 1], g[NTRU_N + 1];
int i;
int16_t aux;
int zero_f;
int constant_f;
int deg_fg;
memset(b, 0, (NTRU_N + 1) * sizeof(int16_t));
b[0] = 1;
memset(c, 0, (NTRU_N + 1) * sizeof(int16_t));
memcpy(f, a->coeffs, NTRU_N * sizeof(int16_t));
f[NTRU_N] = 0;
memset(g, 0, (NTRU_N + 1) * sizeof(int16_t));
g[0] = -1;
g[NTRU_N] = 1;
while (1)
{
zero_f = 1;
for (i = 0; i < NTRU_N + 1; i++)
{
if (f[i] != 0)
{
zero_f = 0;
break;
}
}
if (zero_f)
return 1;
while (f[0] == 0)
{
for (i = 0; i < NTRU_N; i++)
{
f[i] = f[i + 1];
c[NTRU_N - i] = c[NTRU_N - i - 1];
}
f[NTRU_N] = 0;
c[0] = 0;
k++;
}
constant_f = 1;
for (i = 1; i < NTRU_N + 1; i++)
{
if (f[i] != 0)
{
constant_f = 0;
break;
}
}
if (constant_f)
break;
deg_fg = 0;
for (i = NTRU_N; i >= 0; i--)
{
if (f[i] == 0 && g[i] != 0)
{
deg_fg = 1;
break;
}
else if (f[i] != 0 && g[i] == 0)
{
break;
}
}
if (deg_fg)
{
for (i = 0; i < NTRU_N + 1; i++)
{
aux = f[i];
f[i] = g[i];
g[i] = aux;
aux = b[i];
b[i] = c[i];
c[i] = aux;
}
}
if (f[0] == g[0])
{
for (i = 0; i < NTRU_N + 1; i++)
{
f[i] = (f[i] - g[i] + 3) % 3;
b[i] = (b[i] - c[i] + 3) % 3;
}
}
else
{
for (i = 0; i < NTRU_N + 1; i++)
{
f[i] = (f[i] + g[i] + 3) % 3;
b[i] = (b[i] + c[i] + 3) % 3;
}
}
}
k = k % NTRU_N;
for (i = NTRU_N - 1; i >= 0; i--)
{
if (i - k < 0)
r->coeffs[i - k + NTRU_N] = b[i] * f[0];
else
r->coeffs[i - k] = b[i] * f[0];
}
for (i = 0; i < NTRU_N; i++)
r->coeffs[i] = (r->coeffs[i] + 3) % 3;
return 0;
}
But this seems to be wrong. I tested it using the example giveng in Wikipedia: https://en.wikipedia.org/wiki/NTRUEncrypt . The polynomial -1 + x + x^2 - x^4 + x^6 + x^9 - x^10 should have as inverse the polynomial 1 + 2x + 2x^3 + 2x^4 + x^5 + 2x^7 + x^8 - x^10 , but I got the following result:
Polinomial:
-1 1 1 0 -1 0 1 0 0 1 -1
Inverse polinomial:
0 2 2 1 0 2 1 2 0 1 2
Where is the error in the implementation?

THREE.JS + Delaunator.JS Keeping faces indexing

Probably this question is for Mapbox team and Delaunator.JS developers, but I hope that somebody could help me here.
I have a pseudo quadtree geometry (THREE.BufferGeometry) set by series of points having seven partitions with their individual draw call (geometry.addGroup() method) and material.
//pseudo quadtree
var points = [], indices = [], quad_uvs = [], groups = [], materials = [];
getSegmentSubpoints(0, new THREE.Vector3(-1024, 0, -1024), 1024, 1024, 4);
getSegmentSubpoints(1, new THREE.Vector3(0, 0, -1024), 1024, 1024, 4);
getSegmentSubpoints(2, new THREE.Vector3(-1024, 0, 0), 1024, 1024, 4);
getSegmentSubpoints(3, new THREE.Vector3(0, 0, 0), 512, 512, 8);
getSegmentSubpoints(4, new THREE.Vector3(512, 0, 0), 512, 512, 8);
getSegmentSubpoints(5, new THREE.Vector3(0, 0, 512), 512, 512, 8);
getSegmentSubpoints(6, new THREE.Vector3(512, 0, 512), 512, 512, 8);
var geometry = new THREE.BufferGeometry().setFromPoints(points);
geometry.setIndex(indices);
geometry.setAttribute( 'uv', new THREE.BufferAttribute(new Float32Array(quad_uvs), 2 ) );
geometry.computeVertexNormals();
var colors = [new THREE.Color(0xe6194b), new THREE.Color(0x3cb44b), new THREE.Color(0xffe119), new THREE.Color(0x4363d8), new THREE.Color(0xf58231), new THREE.Color(0x911eb4), new THREE.Color(0x46f0f0) ]
groups.forEach(function(g_, i_){ geometry.addGroup(g_.start, g_.end, g_.id); });
var plane = new THREE.Mesh(geometry, materials);
plane.rotation.set(Math.PI, 0, 0);
plane.position.set(-1152, 0, 0);
scene.add(plane);
function getSegmentSubpoints(id_, lt_, w_, h_, level_){
var subpoints = [];
var subindices = [];
var subquad = [];
var lastIndex = points.length;
var lastIndex2 = indices.length;
var step = {x: w_ / level_, z: h_ / level_ };
var stepT = {x: 1.0 / level_, z: 1.0 / level_ };
for(var z = 0; z <= level_; z++){
for(var x = 0; x <= level_; x++){
var dx = lt_.x + step.x * x;
var dz = lt_.z + step.z * z;
var dy = noise.simplex2(dx / 512.0, dz / 512.0) * 32.0;
subquad.push(...[stepT.x * x, stepT.z * z]);
subpoints.push(new THREE.Vector3(dx, dy, dz));
}
}
for(var i = 0; i < subpoints.length - level_ - 2; i++) {
if(i % (level_ + 1) != (level_)){
subindices.push(lastIndex + i, lastIndex + i + 1, lastIndex + i + level_ + 2, lastIndex + i + level_ + 2, lastIndex + i + level_ + 1, lastIndex + i);
}
}
points.push(...subpoints);
indices.push(...subindices);
quad_uvs.push(...subquad);
groups.push({id: id_, start: lastIndex2, end: subindices.length});
materials.push(new THREE.MeshBasicMaterial({ wireframe: true, map: new THREE.TextureLoader().load("textures/" + id_ + ".jpg")}));
}
Everything is fine, but for my project I have to process the current geometry through Delaunator.JS and it seems that the output has different triangles/faces indexing starting from the center.
There is dynamic colors for each segment to visualize indexing. Eventually, it has to be the same as previous one with seven materials and individual draw calls.
//delaunay
var geometry = new THREE.BufferGeometry().setFromPoints(points);
var indexDelaunay = Delaunator.from(points.map(v => { return [v.x, v.z]; }) );
var meshIndex = [];
for (let i = 0; i < indexDelaunay.triangles.length; i++){ meshIndex.push(indexDelaunay.triangles[i]); }
geometry.setIndex(meshIndex);
geometry.computeVertexNormals();
count = 0;
for(var i = 0; i < meshIndex.length; i += 6){
geometry.addGroup(i, 6, i / 6);
materials.push(new THREE.MeshBasicMaterial({ wireframe: true, color: new THREE.Color("hsl(" + (360 / 316 * count) + ",80%, 80%)")}))
count++;
}
var plane = new THREE.Mesh(geometry, materials);
plane.rotation.set(Math.PI, 0, 0)
plane.position.set(1152, 0, 0);
scene.add(plane);
So is there a way to re-index faces back so I could get the same result?
Yes, I could go through each Delaunator.triangles check its position/area for if it fits the certain partition, but it's not an elegant and fast solution. I bet it's possible to tweak Delaunator.JS code for right indexing on the fly.
The snippet is attached as well.
//https://hofk.de/main/discourse.threejs/2018/Triangulation/Triangulation.html
////by Mapbox https://github.com/mapbox/delaunator
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global.Delaunator = factory());
}(this, (function () { 'use strict';
var EPSILON = Math.pow(2, -52);
var Delaunator = function Delaunator(coords) {
var this$1 = this;
var n = coords.length >> 1;
if (n > 0 && typeof coords[0] !== 'number') { throw new Error('Expected coords to contain numbers.'); }
this.coords = coords;
var maxTriangles = 2 * n - 5;
var triangles = this.triangles = new Uint32Array(maxTriangles * 3);
var halfedges = this.halfedges = new Int32Array(maxTriangles * 3);
this._hashSize = Math.ceil(Math.sqrt(n));
var hullPrev = this.hullPrev = new Uint32Array(n);
var hullNext = this.hullNext = new Uint32Array(n);
var hullTri = this.hullTri = new Uint32Array(n);
var hullHash = new Int32Array(this._hashSize).fill(-1);
var ids = new Uint32Array(n);
var minX = Infinity;
var minY = Infinity;
var maxX = -Infinity;
var maxY = -Infinity;
for (var i = 0; i < n; i++) {
var x = coords[2 * i];
var y = coords[2 * i + 1];
if (x < minX) { minX = x; }
if (y < minY) { minY = y; }
if (x > maxX) { maxX = x; }
if (y > maxY) { maxY = y; }
ids[i] = i;
}
var cx = (minX + maxX) / 2;
var cy = (minY + maxY) / 2;
var minDist = Infinity;
var i0, i1, i2;
for (var i$1 = 0; i$1 < n; i$1++) {
var d = dist(cx, cy, coords[2 * i$1], coords[2 * i$1 + 1]);
if (d < minDist) {
i0 = i$1;
minDist = d;
}
}
var i0x = coords[2 * i0];
var i0y = coords[2 * i0 + 1];
minDist = Infinity;
for (var i$2 = 0; i$2 < n; i$2++) {
if (i$2 === i0) { continue; }
var d$1 = dist(i0x, i0y, coords[2 * i$2], coords[2 * i$2 + 1]);
if (d$1 < minDist && d$1 > 0) {
i1 = i$2;
minDist = d$1;
}
}
var i1x = coords[2 * i1];
var i1y = coords[2 * i1 + 1];
var minRadius = Infinity;
for (var i$3 = 0; i$3 < n; i$3++) {
if (i$3 === i0 || i$3 === i1) { continue; }
var r = circumradius(i0x, i0y, i1x, i1y, coords[2 * i$3], coords[2 * i$3 + 1]);
if (r < minRadius) {
i2 = i$3;
minRadius = r;
}
}
var i2x = coords[2 * i2];
var i2y = coords[2 * i2 + 1];
if (minRadius === Infinity) {
throw new Error('No Delaunay triangulation exists for this input.');
}
if (orient(i0x, i0y, i1x, i1y, i2x, i2y)) {
var i$4 = i1;
var x$1 = i1x;
var y$1 = i1y;
i1 = i2;
i1x = i2x;
i1y = i2y;
i2 = i$4;
i2x = x$1;
i2y = y$1;
}
var center = circumcenter(i0x, i0y, i1x, i1y, i2x, i2y);
this._cx = center.x;
this._cy = center.y;
var dists = new Float64Array(n);
for (var i$5 = 0; i$5 < n; i$5++) {
dists[i$5] = dist(coords[2 * i$5], coords[2 * i$5 + 1], center.x, center.y);
}
quicksort(ids, dists, 0, n - 1);
this.hullStart = i0;
var hullSize = 3;
hullNext[i0] = hullPrev[i2] = i1;
hullNext[i1] = hullPrev[i0] = i2;
hullNext[i2] = hullPrev[i1] = i0;
hullTri[i0] = 0;
hullTri[i1] = 1;
hullTri[i2] = 2;
hullHash[this._hashKey(i0x, i0y)] = i0;
hullHash[this._hashKey(i1x, i1y)] = i1;
hullHash[this._hashKey(i2x, i2y)] = i2;
this.trianglesLen = 0;
this._addTriangle(i0, i1, i2, -1, -1, -1);
for (var k = 0, xp = (void 0), yp = (void 0); k < ids.length; k++) {
var i$6 = ids[k];
var x$2 = coords[2 * i$6];
var y$2 = coords[2 * i$6 + 1];
if (k > 0 && Math.abs(x$2 - xp) <= EPSILON && Math.abs(y$2 - yp) <= EPSILON) { continue; }
xp = x$2;
yp = y$2;
if (i$6 === i0 || i$6 === i1 || i$6 === i2) { continue; }
var start = 0;
for (var j = 0, key = this._hashKey(x$2, y$2); j < this._hashSize; j++) {
start = hullHash[(key + j) % this$1._hashSize];
if (start !== -1 && start !== hullNext[start]) { break; }
}
start = hullPrev[start];
var e = start, q = (void 0);
while (q = hullNext[e], !orient(x$2, y$2, coords[2 * e], coords[2 * e + 1], coords[2 * q], coords[2 * q + 1])) {
e = q;
if (e === start) {
e = -1;
break;
}
}
if (e === -1) { continue; }
var t = this$1._addTriangle(e, i$6, hullNext[e], -1, -1, hullTri[e]);
hullTri[i$6] = this$1._legalize(t + 2);
hullTri[e] = t;
hullSize++;
var n$1 = hullNext[e];
while (q = hullNext[n$1], orient(x$2, y$2, coords[2 * n$1], coords[2 * n$1 + 1], coords[2 * q], coords[2 * q + 1])) {
t = this$1._addTriangle(n$1, i$6, q, hullTri[i$6], -1, hullTri[n$1]);
hullTri[i$6] = this$1._legalize(t + 2);
hullNext[n$1] = n$1;
hullSize--;
n$1 = q;
}
if (e === start) {
while (q = hullPrev[e], orient(x$2, y$2, coords[2 * q], coords[2 * q + 1], coords[2 * e], coords[2 * e + 1])) {
t = this$1._addTriangle(q, i$6, e, -1, hullTri[e], hullTri[q]);
this$1._legalize(t + 2);
hullTri[q] = t;
hullNext[e] = e;
hullSize--;
e = q;
}
}
this$1.hullStart = hullPrev[i$6] = e;
hullNext[e] = hullPrev[n$1] = i$6;
hullNext[i$6] = n$1;
hullHash[this$1._hashKey(x$2, y$2)] = i$6;
hullHash[this$1._hashKey(coords[2 * e], coords[2 * e + 1])] = e;
}
this.hull = new Uint32Array(hullSize);
for (var i$7 = 0, e$1 = this.hullStart; i$7 < hullSize; i$7++) {
this$1.hull[i$7] = e$1;
e$1 = hullNext[e$1];
}
this.hullPrev = this.hullNext = this.hullTri = null;
this.triangles = triangles.subarray(0, this.trianglesLen);
this.halfedges = halfedges.subarray(0, this.trianglesLen);
};
Delaunator.from = function from (points, getX, getY) {
if ( getX === void 0 ) getX = defaultGetX;
if ( getY === void 0 ) getY = defaultGetY;
var n = points.length;
var coords = new Float64Array(n * 2);
for (var i = 0; i < n; i++) {
var p = points[i];
coords[2 * i] = getX(p);
coords[2 * i + 1] = getY(p);
}
return new Delaunator(coords);
};
Delaunator.prototype._hashKey = function _hashKey (x, y) {
return Math.floor(pseudoAngle(x - this._cx, y - this._cy) * this._hashSize) % this._hashSize;
};
Delaunator.prototype._legalize = function _legalize (a) {
var this$1 = this;
var ref = this;
var triangles = ref.triangles;
var coords = ref.coords;
var halfedges = ref.halfedges;
var b = halfedges[a];
var a0 = a - a % 3;
var b0 = b - b % 3;
var al = a0 + (a + 1) % 3;
var ar = a0 + (a + 2) % 3;
var bl = b0 + (b + 2) % 3;
if (b === -1) { return ar; }
var p0 = triangles[ar];
var pr = triangles[a];
var pl = triangles[al];
var p1 = triangles[bl];
var illegal = inCircle(
coords[2 * p0], coords[2 * p0 + 1],
coords[2 * pr], coords[2 * pr + 1],
coords[2 * pl], coords[2 * pl + 1],
coords[2 * p1], coords[2 * p1 + 1]);
if (illegal) {
triangles[a] = p1;
triangles[b] = p0;
var hbl = halfedges[bl];
if (hbl === -1) {
var e = this.hullStart;
do {
if (this$1.hullTri[e] === bl) {
this$1.hullTri[e] = a;
break;
}
e = this$1.hullNext[e];
} while (e !== this.hullStart);
}
this._link(a, hbl);
this._link(b, halfedges[ar]);
this._link(ar, bl);
var br = b0 + (b + 1) % 3;
this._legalize(a);
return this._legalize(br);
}
return ar;
};
Delaunator.prototype._link = function _link (a, b) {
this.halfedges[a] = b;
if (b !== -1) { this.halfedges[b] = a; }
};
Delaunator.prototype._addTriangle = function _addTriangle (i0, i1, i2, a, b, c) {
var t = this.trianglesLen;
this.triangles[t] = i0;
this.triangles[t + 1] = i1;
this.triangles[t + 2] = i2;
this._link(t, a);
this._link(t + 1, b);
this._link(t + 2, c);
this.trianglesLen += 3;
return t;
};
function pseudoAngle(dx, dy) {
var p = dx / (Math.abs(dx) + Math.abs(dy));
return (dy > 0 ? 3 - p : 1 + p) / 4;
}
function dist(ax, ay, bx, by) {
var dx = ax - bx;
var dy = ay - by;
return dx * dx + dy * dy;
}
function orient(px, py, qx, qy, rx, ry) {
return (qy - py) * (rx - qx) - (qx - px) * (ry - qy) < 0;
}
function inCircle(ax, ay, bx, by, cx, cy, px, py) {
var dx = ax - px;
var dy = ay - py;
var ex = bx - px;
var ey = by - py;
var fx = cx - px;
var fy = cy - py;
var ap = dx * dx + dy * dy;
var bp = ex * ex + ey * ey;
var cp = fx * fx + fy * fy;
return dx * (ey * cp - bp * fy) -
dy * (ex * cp - bp * fx) +
ap * (ex * fy - ey * fx) < 0;
}
function circumradius(ax, ay, bx, by, cx, cy) {
var dx = bx - ax;
var dy = by - ay;
var ex = cx - ax;
var ey = cy - ay;
var bl = dx * dx + dy * dy;
var cl = ex * ex + ey * ey;
var d = 0.5 / (dx * ey - dy * ex);
var x = (ey * bl - dy * cl) * d;
var y = (dx * cl - ex * bl) * d;
return x * x + y * y;
}
function circumcenter(ax, ay, bx, by, cx, cy) {
var dx = bx - ax;
var dy = by - ay;
var ex = cx - ax;
var ey = cy - ay;
var bl = dx * dx + dy * dy;
var cl = ex * ex + ey * ey;
var d = 0.5 / (dx * ey - dy * ex);
var x = ax + (ey * bl - dy * cl) * d;
var y = ay + (dx * cl - ex * bl) * d;
return {x: x, y: y};
}
function quicksort(ids, dists, left, right) {
if (right - left <= 20) {
for (var i = left + 1; i <= right; i++) {
var temp = ids[i];
var tempDist = dists[temp];
var j = i - 1;
while (j >= left && dists[ids[j]] > tempDist) { ids[j + 1] = ids[j--]; }
ids[j + 1] = temp;
}
} else {
var median = (left + right) >> 1;
var i$1 = left + 1;
var j$1 = right;
swap(ids, median, i$1);
if (dists[ids[left]] > dists[ids[right]]) { swap(ids, left, right); }
if (dists[ids[i$1]] > dists[ids[right]]) { swap(ids, i$1, right); }
if (dists[ids[left]] > dists[ids[i$1]]) { swap(ids, left, i$1); }
var temp$1 = ids[i$1];
var tempDist$1 = dists[temp$1];
while (true) {
do { i$1++; } while (dists[ids[i$1]] < tempDist$1);
do { j$1--; } while (dists[ids[j$1]] > tempDist$1);
if (j$1 < i$1) { break; }
swap(ids, i$1, j$1);
}
ids[left + 1] = ids[j$1];
ids[j$1] = temp$1;
if (right - i$1 + 1 >= j$1 - left) {
quicksort(ids, dists, i$1, right);
quicksort(ids, dists, left, j$1 - 1);
} else {
quicksort(ids, dists, left, j$1 - 1);
quicksort(ids, dists, i$1, right);
}
}
}
function swap(arr, i, j) {
var tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
}
function defaultGetX(p) {
return p[0];
}
function defaultGetY(p) {
return p[1];
}
return Delaunator;
})));
var renderer, scene, camera, controls, loader, terrain, glsl, uniforms, root, tree;
renderer = new THREE.WebGLRenderer({
antialias: true
});
renderer.setPixelRatio(window.devicePixelRatio);
renderer.setSize(window.innerWidth, window.innerHeight);
renderer.setClearColor(0x000000);
document.body.appendChild(renderer.domElement);
scene = new THREE.Scene();
loader = new THREE.TextureLoader();
loader.crossOrigin = "";
camera = new THREE.PerspectiveCamera(35, window.innerWidth / window.innerHeight, 1, 51200);
camera.position.set(-3072, 2048, -3072);
controls = new THREE.OrbitControls(camera, renderer.domElement);
controls.enableDamping = true;
controls.dampingFactor = 0.05;
controls.screenSpacePanning = false;
controls.minDistance = 8;
controls.maxDistance = 10240;
controls.maxPolarAngle = Math.PI / 2;
//simple pseudo quadtree
var points = [], indices = [], quad_uvs = [], groups = [], materials = [];
getSegmentSubpoints(0, new THREE.Vector3(-1024, 0, -1024), 1024, 1024, 4);
getSegmentSubpoints(1, new THREE.Vector3(0, 0, -1024), 1024, 1024, 4);
getSegmentSubpoints(2, new THREE.Vector3(-1024, 0, 0), 1024, 1024, 4);
getSegmentSubpoints(3, new THREE.Vector3(0, 0, 0), 512, 512, 8);
getSegmentSubpoints(4, new THREE.Vector3(512, 0, 0), 512, 512, 8);
getSegmentSubpoints(5, new THREE.Vector3(0, 0, 512), 512, 512, 8);
getSegmentSubpoints(6, new THREE.Vector3(512, 0, 512), 512, 512, 8);
var geometry = new THREE.BufferGeometry().setFromPoints(points);
geometry.setIndex(indices);
geometry.setAttribute( 'uv', new THREE.BufferAttribute(new Float32Array(quad_uvs), 2 ) );
geometry.computeVertexNormals();
var materials = [];
var colors = [new THREE.Color(0xe6194b), new THREE.Color(0x3cb44b), new THREE.Color(0xffe119), new THREE.Color(0x4363d8), new THREE.Color(0xf58231), new THREE.Color(0x911eb4), new THREE.Color(0x46f0f0) ]
groups.forEach(function(g_, i_){ geometry.addGroup(g_.start, g_.end, g_.id); materials.push(new THREE.MeshBasicMaterial({wireframe: true, color: colors[i_]})) });
var plane = new THREE.Mesh(geometry, materials);
plane.rotation.set(Math.PI, 0, 0);
plane.position.set(-1152, 0, 0);
scene.add(plane);
//delaunay
materials = [];
var geometry = new THREE.BufferGeometry().setFromPoints(points);
var indexDelaunay = Delaunator.from(points.map(v => { return [v.x, v.z]; }) );
var meshIndex = [];
for (let i = 0; i < indexDelaunay.triangles.length; i++){ meshIndex.push(indexDelaunay.triangles[i]); }
geometry.setIndex(meshIndex);
geometry.computeVertexNormals();
count = 0;
for(var i = 0; i < meshIndex.length; i += 6){
geometry.addGroup(i, 6, i / 6);
materials.push(new THREE.MeshBasicMaterial({ wireframe: true, color: new THREE.Color("hsl(" + (360 / 316 * count) + ",80%, 80%)")}))
count++;
}
var plane = new THREE.Mesh(geometry, materials); //new THREE.MeshBasicMaterial({color: 0xFF00FF, side: THREE.DoubleSide, wireframe: true}) );
plane.rotation.set(Math.PI, 0, 0)
plane.position.set(1152, 0, 0);
scene.add(plane);
animate();
function getSegmentSubpoints(id_, lt_, w_, h_, level_){
var subpoints = [];
var subindices = [];
var subquad = [];
var lastIndex = points.length;
var lastIndex2 = indices.length;
var step = {x: w_ / level_, z: h_ / level_ };
var stepT = {x: 1.0 / level_, z: 1.0 / level_ };
for(var z = 0; z <= level_; z++){
for(var x = 0; x <= level_; x++){
var dx = lt_.x + step.x * x;
var dz = lt_.z + step.z * z;
var dy = 0;
subquad.push(...[stepT.x * x, stepT.z * z]);
subpoints.push(new THREE.Vector3(dx, dy, dz));
}
}
for(var i = 0; i < subpoints.length - level_ - 2; i++) {
if(i % (level_ + 1) != (level_)){
subindices.push(lastIndex + i, lastIndex + i + 1, lastIndex + i + level_ + 2, lastIndex + i + level_ + 2, lastIndex + i + level_ + 1, lastIndex + i);
}
}
points.push(...subpoints);
indices.push(...subindices);
quad_uvs.push(...subquad);
groups.push({id: id_, start: lastIndex2, end: subindices.length});
materials.push(new THREE.MeshBasicMaterial({ wireframe: true, map: new THREE.TextureLoader().load("textures/" + id_ + ".jpg")}));
}
function animate(){
controls.update();
renderer.render(scene, camera);
requestAnimationFrame(animate);
}
body { margin: 0; }
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>GLSL Intersection</title>
<meta name="viewport" content="initial-scale=1,maximum-scale=1,user-scalable=no" />
<script src="https://unpkg.com/three#0.116.0/build/three.min.js"></script>
<script src="https://unpkg.com/three#0.116.0/examples/js/controls/OrbitControls.js"></script>
</head>
<body>
</body>
</html>

Matrices multiply, Cannon algorithm implementation using MPI

First of all, of course I saw similar questions and solutions, but my implementation is a little bit different.
The main problem is that, the my code works only for one process, but it doesn't work for more processes.
I don't know what is the cause of this... Probably in communication between processes but I can't figure it out ;/
#include <mpi.h>
#include <stdio.h>
#include <math.h>
#include <iostream>
using namespace std;
int main(int argc, char **argv)
{
int x = 0;
double kk;
int proces;
int numprocs;
int right_neigh, left_neigh, up_neigh, down_neigh;
int tag = 99;
static const int n = 6; //size of matrices
int psa[n][n]; //nxn
int psb[n][n];
int pra[n][n];
int prb[n][n];
int c[n][n];
for (int i = 0; i < n; i++) { //let's make fist matrix
for (int j = 0; j < n; j++) {
psa[i][j] = (int)rand() % 100 + 1;
psb[i][j] = (int)rand() % 100 + 1;
c[i][j] = 0;
}
}
for (int i = 0; i < n; i++) { //an the 2nd one
for (int j = 0; j < n; j++) {
pra[i][j] = psa[i][j];
prb[i][j] = psb[i][j];
}
}
MPI_Status statRecv[2];
MPI_Request reqSend[2], reqRecv[2];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &proces);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int PP = numprocs;
double np = numprocs;
kk = sqrt(np);
int k = (int)kk;
if (proces < k) // below neighbour set
{
left_neigh = (proces + k - 1) % k;
right_neigh = (proces + k + 1) % k;
up_neigh = ((k - 1)*k) + proces;
}
if (proces == k)
{
left_neigh = ((proces + k - 1) % k) + k;
right_neigh = ((proces + k + 1) % k) + k;
up_neigh = proces - k;
}
if (proces > k)
{
x = proces / k;
left_neigh = ((proces + k - 1) % k) + x * k;
right_neigh = ((proces + k + 1) % k) + x * k;
up_neigh = proces - k;
}
if (proces == 0 || (proces / k) < (k - 1))
{
down_neigh = proces + k;
}
if ((proces / k) == (k - 1))
{
down_neigh = proces - ((k - 1)*k);
}
x = 0;
for(int kk = 0; kk < PP; kk++) //algorithm
{
for (int i = 0; i < n / PP; i++)
{
for (int j = 0; j < n / PP; j++)
{
for (int k = 0; k < n / PP; k++)
{
c[i][j] += psa[i][k] * psb[k][j];
}
}
}
MPI_Irecv(pra, n*n / PP / PP,MPI_FLOAT,left_neigh, tag,MPI_COMM_WORLD, reqRecv);
MPI_Irecv(prb, n*n / PP / PP,MPI_FLOAT,down_neigh,tag,MPI_COMM_WORLD,&reqRecv[1]);
MPI_Isend(psa, n*n / PP / PP,MPI_FLOAT,right_neigh,tag,MPI_COMM_WORLD, reqSend);
MPI_Isend(psb, n*n / PP / PP,MPI_FLOAT,up_neigh,tag,MPI_COMM_WORLD,&reqSend[1]);
MPI_Wait(reqRecv, statRecv);
}
cout << "A" << endl; //show result
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
cout << pra[i][j] << " ";
}
cout << endl;
}
cout << "B" << endl;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
cout << prb[i][j] << " ";
}
cout << endl;
}
cout << "C" << endl;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
cout << c[i][j] << " ";
}
cout << endl;
}
MPI_Finalize();
return 0;
}
Ok I made it. Now everything is cool, my friend helped me out. But Admin please do not remove it, it can be helpful for someone.
#include <mpi.h>
#include <stdio.h>
#include <math.h>
#include <iostream> using namespace std; int main(int argc, char **argv) { int x = 0; double kk; int proces; int numprocs; int prawy_sasiad, lewy_sasiad, gorny_sasiad, dolny_sasiad; int tag = 99;
static const int n = 4; //rozmiar tablic
const int PP = 2; // pierwiastek z liczby procesow
int A[n][n] = {}, B[n][n] = {};
for (int i = 0; i < n; i++) {//inicjalizacja macierzy glownych
for (int j = 0; j < n; j++) {
A[i][j] = (int)rand() % 100 + 1;
B[i][j] = (int)rand() % 100 + 1;
}
}
/*
int val = 1;
for (int i = 0; i < n; i++) { //inicjalizacja macierzy glownych
for (int j = 0; j < n; j++) {
A[i][j] = val;
B[i][j] = val;
val++;
}
}
*/
MPI_Status statRecv2;
MPI_Request reqSend2, reqRecv2;
MPI_Status statRecv[2];
MPI_Request reqSend[2], reqRecv[2];
MPI_Init(0, 0);
MPI_Comm_rank(MPI_COMM_WORLD, &proces);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int pra[n / PP][n / PP] = {}, psa[n / PP][n / PP] = {};// podmacierze
int prb[n / PP][n / PP] = {}, psb[n / PP][n / PP] = {};
//int C[n / PP][n / PP] = {};//wynikowa
int C[n][n] = {};//wynikowa
//cout << proces << endl;
for (int i = 0; i < n / PP; i++)//podzielenie macierzy glownej na podmacierze, kazdy proces otrzymuje inna podmacierz
{
for (int j = 0; j < n / PP; j++)
{
psa[i][j] = A[proces / PP*(n / PP) + i][proces%PP*(n / PP) + j];
psb[i][j] = B[proces / PP*(n / PP) + i][proces%PP*(n / PP) + j];
//cout << A[proces / PP*(n / PP) + i][proces%PP*(n / PP) + j] << " ";
}
//cout << endl;
}
double np = numprocs;
kk = sqrt(np);
int k = (int)kk;
if (proces < k) // ustawienie sasiadow
{
lewy_sasiad = (proces + k - 1) % k;
prawy_sasiad = (proces + k + 1) % k;
gorny_sasiad = ((k - 1)*k) + proces;
}
if (proces == k)
{
lewy_sasiad = ((proces + k - 1) % k) + k;
prawy_sasiad = ((proces + k + 1) % k) + k;
gorny_sasiad = proces - k;
}
if (proces > k)
{
x = proces / k;
lewy_sasiad = ((proces + k - 1) % k) + x * k;
prawy_sasiad = ((proces + k + 1) % k) + x * k;
gorny_sasiad = proces - k;
}
if (proces == 0 || (proces / k) < (k - 1))
{
dolny_sasiad = proces + k;
}
if ((proces / k) == (k - 1))
{
dolny_sasiad = proces - ((k - 1)*k);
}
x = 0;
int p = 0;
do{ //przesuniecia
if (p < proces / PP)// w wierszu
{
MPI_Irecv(pra, n*n / PP / PP, MPI_FLOAT, prawy_sasiad, tag, MPI_COMM_WORLD, &reqRecv2);
MPI_Isend(psa, n*n / PP / PP, MPI_FLOAT, lewy_sasiad, tag, MPI_COMM_WORLD, &reqSend2);
MPI_Wait(&reqRecv2, &statRecv2);
for (int i = 0; i < n / PP; i++)
{
for (int j = 0; j < n / PP; j++)
{
psa[i][j] = pra[i][j];
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (p < proces % PP)// i w kolumnie
{
MPI_Irecv(prb, n*n / PP / PP, MPI_FLOAT, dolny_sasiad, tag, MPI_COMM_WORLD, &reqRecv2);
MPI_Isend(psb, n*n / PP / PP, MPI_FLOAT, gorny_sasiad, tag, MPI_COMM_WORLD, &reqSend2);
MPI_Wait(&reqRecv2, &statRecv2);
for (int i = 0; i < n / PP; i++)
{
for (int j = 0; j < n / PP; j++)
{
psb[i][j] = prb[i][j];
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
p++;
} while (p < n);
//MPI_Barrier(MPI_COMM_WORLD);
for (int kkk = 0; kkk < PP; kkk++) //algorytm
{
for (int i = 0; i < n / PP; i++)
{
for (int j = 0; j < n / PP; j++)
{
for (int k = 0; k < n / PP; k++)
{
C[i][j] += psa[i][k] * psb[k][j];
}
}
}
MPI_Irecv(pra, n*n / PP / PP, MPI_FLOAT, prawy_sasiad, tag, MPI_COMM_WORLD, reqRecv);
MPI_Irecv(prb, n*n / PP / PP, MPI_FLOAT, dolny_sasiad, tag, MPI_COMM_WORLD, &reqRecv[1]);
MPI_Isend(psa, n*n / PP / PP, MPI_FLOAT, lewy_sasiad, tag, MPI_COMM_WORLD, reqSend);
MPI_Isend(psb, n*n / PP / PP, MPI_FLOAT, gorny_sasiad, tag, MPI_COMM_WORLD, &reqSend[1]);
MPI_Wait(reqRecv, statRecv);
MPI_Barrier(MPI_COMM_WORLD);
for (int i = 0; i < n / PP; i++)
{
for (int j = 0; j < n / PP; j++)
{
psa[i][j] = pra[i][j];
}
}
for (int i = 0; i < n / PP; i++)
{
for (int j = 0; j < n / PP; j++)
{
psb[i][j] = prb[i][j];
}
}
}
cout << "Proces: " << proces << " ";
for (int i = 0; i < n / PP; i++)
{
for (int j = 0; j < n / PP; j++)
{
cout << C[i][j] << " ";
}
}
MPI_Finalize();
return 0;
}

CGRect with variable Buttons per Row

Following situation:
i want to build a cgrect. what i need is: up to 4 rows. up to 6 columns.
so for example how it have to look:
i need min.
+ + + +
(1 row, 4 buttons)
and max i need:
+ + + + + +
+ + + + +
+ + + + + +
+ + + + +
(4 rows, 22 buttons)
what i want is to pass the BUTTONS_FOR_ROW1-4 data from an other VC. for the min example this is button_for_row1 = 4, button_for_row2 = 0, button_for_row3 = 0, button_for_row4 = 0.
for the max example button_for_row1 = 6, button_for_row2 = 5, button_for_row3 = 6, button_for_row4 = 5
my code now is this:
-(void) generateCardViews {
int positionsLeftInRow = _BUTTONS_PER_ROW;
int j = 0; // j = ROWNUMBER (j = 0) = ROW1, (j = 1) = ROW2...
for (int i = 0; i < [self.gameModel.buttons count]; i++) {
NSInteger value = ((ButtonModel *)self.gameModel.buttons[i]).value;
CGFloat x = (i % _BUTTONS_PER_ROW) * 121 + (i % _BUTTONS_PER_ROW) * 40 + 285;
if (j == 1) {
x += 80; // set additional indent (horizontal displacement)
}
if (j == 2) {
x -= 160;
}
CGFloat y = j * 122 + j * 40 + 158;
CGRect frame = CGRectMake(x, y, 125, 125);
ButtonView *cv = [[ButtonView alloc] initWithFrame:frame andPosition:i andValue:value];
if (!((ButtonModel *)self.gameModel.buttons[i]).outOfPlay) {
[self.boardView addSubview:cv];
if ([self.gameModel.turnedButtons containsObject: self.gameModel.buttons[i]]) {
[self.turnedButtonViews addObject: cv];
[cv flip];
}
}
if (--positionsLeftInRow == 0) {
j++;
positionsLeftInRow = _BUTTONS_PER_ROW;
if (j == 1) {
positionsLeftInRow = _BUTTONS_PER_ROW-1;
if (j == 2) {
positionsLeftInRow = _BUTTONS_PER_ROW-2;
}}
}
}
}
As you can see my code now have just BUTTONS_PER_ROW and not BUTTONS_FOR_ROW1 - 4.
the indent is for pushing a row left or right.
But i think this will work much easier, cause with my code i get some weird things when i make 22 Buttons.
thanks for help!
Your basic logic can be simplified. I would use a while loop here. For the case of a maximum number of buttons per row (the simpler case), this is an outline of what I think is a clear way to do what you want:
y = .... (initial value of y)
NSUInteger numberOfButtons = [self.gameModel.buttons count];
// Layout out the buttons, with no more than _BUTTONS_PER_ROW in each row
NSUInteger buttonsLeft = numberOfButtons;
NSUInteger buttonsInRow = 0;
while(buttonsLeft>0)
{
if(buttonsInRow>_BUTTONS_PER_ROW)
{
// Increment y, reset x
y += ....
x = .... (initial position for x)
buttonsInRow = 0;
}
// Create a button
CGRect frame = CGRectMake(x, y, 125, 125);
ButtonView* buttonView = [[ButtonView .....
x += 80;
++buttonsInRow;
--buttonsLeft;
}
For the more general case, add a variable to keep track of the row number, and use and array with the maximum number of buttons per row that is loaded before entering the while loop.

OpenCL kernel doesn't finish executing

I am writing a simple monte carlo code for simulation of electron scattering. I ran the Kernel for 10 million electron and it runs fine, but when I increase the number of electrons to a higher number, say 50 million, the code just wouldn't finish and the computer freezes. I wanted to know if this is a hardware issue or if there is a possible bug in the code. I am running the code on a iMac with ATI Radeon HD 5870.
int rand_r (unsigned int seed)
{
unsigned int next = seed;
int result;
next *= 1103515245;
next += 12345;
result = (unsigned int) (next / 65536) % 2048;
next *= 1103515245;
next += 12345;
result <<= 10;
result ^= (unsigned int) (next / 65536) % 1024;
next *= 1103515245;
next += 12345;
result <<= 10;
result ^= (unsigned int) (next / 65536) % 1024;
seed = next;
return result;
}
__kernel void MC(const float E, __global float* bse, const int count) {
int tx, ty;
tx = get_global_id(0);
ty = get_global_id(1);
float RAND_MAX = 2147483647.0f;
int rand_seed;
int seed = count*ty + tx;
float rand;
float PI;
PI = 3.14159f;
float z;
z = 28.0f;
float rho;
rho = 8.908f;
float A;
A = 58.69f;
int num;
num = 10000000/(count*count);
int counter, counter1, counter2;
counter = 0;
float4 c_new, r_new;
float E_new, alpha, de_ds, phi, psi, mfp,sig_eNA,step, dsq, dsqi, absc0z;
float J;
J = (9.76f*z + 58.5f*powr(z,-0.19f))*1E-3f;
float4 r0 = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
float2 tilt = (float2)((70.0f/180.0f)*PI , 0.0f);
float4 c0 = (float4)(cos(tilt.y)*sin(tilt.x), sin(tilt.y)*sin(tilt.x), cos(tilt.x), 0.0f);
for (int i = 0; i < num; ++i){
rand_seed = rand_r(seed);
seed = rand_seed;
rand = rand_seed/RAND_MAX; //some random no. generator in gpu
r0 = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
c0 = (float4)(cos(tilt.y)*sin(tilt.x), sin(tilt.y)*sin(tilt.x), cos(tilt.x), 0.0f);
E_new = E;
c_new = c0;
alpha = (3.4E-3f)*powr(z,0.67f)/E_new;
sig_eNA = (5.21f * 602.3f)*((z*z)/(E_new*E_new))*((4.0f*PI)/(alpha*(1+alpha)))*((E_new + 511.0f)*(E_new + 511.0f)/((E_new + 1024.0f)*(E_new + 1024.0f)));
mfp = A/(rho*sig_eNA);
step = -mfp * log(rand);
r_new = (float4)(r0.x + step*c_new.x, r0.y + step*c_new.y, r0.z + step*c_new.z, 0.0f);
r0 = r_new;
counter1 = 0;
counter2 = 0;
while (counter1 < 1000){
alpha = (3.4E-3f)*powr(z,0.67f)/E_new;
sig_eNA = (5.21f * 602.3f)*((z*z)/(E_new*E_new))*((4*PI)/(alpha*(1+alpha)))*((E_new + 511.0f)*(E_new + 511.0f)/((E_new + 1024.0f)*(E_new + 1024.0f)));
mfp = A/(rho*sig_eNA);
rand_seed = rand_r(seed);
seed = rand_seed;
rand = rand_seed/RAND_MAX; //some random no. generator in gpu
step = -mfp * log(rand);
de_ds = -78500.0f*(z/(A*E_new)) * log((1.66f*(E_new + 0.85f*J))/J);
rand_seed = rand_r(seed);
seed = rand_seed;
rand = rand_seed/RAND_MAX; //new random no.
phi = acos(1 - ((2*alpha*rand)/(1 + alpha - rand)));
rand_seed = rand_r(seed);
seed = rand_seed;
rand = rand_seed/RAND_MAX; //third random no.
psi = 2*PI*rand;
if ((c0.z >= 0.999f) || (c0.z <= -0.999f) ){
absc0z = abs(c0.z);
c_new = (float4)(sin(phi) * cos(psi), sin(phi) * sin(psi), (c0.z/absc0z)*cos(phi), 0.0f);
}
else {
dsq = sqrt(1-c0.z*c0.z);
dsqi = 1/dsq;
c_new = (float4)(sin(phi)*(c0.x*c0.z*cos(psi) - c0.y*sin(psi))*dsqi + c0.x*cos(phi), sin(phi) * (c0.y * c0.z * cos(psi) + c0.x * sin(psi)) * dsqi + c0.y * cos(phi), -sin(phi) * cos(psi) * dsq + c0.z * cos(phi), 0.0f);
}
r_new = (float4)(r0.x + step*c_new.x, r0.y + step*c_new.y, r0.z + step*c_new.z, 0.0f);
r0 = r_new;
c0 = c_new;
E_new += step*rho*de_ds;
if (r0.z <= 0 && counter2 == 0){
counter++ ;
counter2 = 1;
}
counter1++ ;
}
}
bse[count*ty + tx] = counter;
}