i need a Sha256 kernel file , i am using Cloo as my opencl library , it will be included in WPF project
i am calculating a hash value several times
the program needs about an 30 mins or so to do that but my search result claimed opencl will reduce that time to under 3 mins or less
thanks in advance
[Edit]
ok now i managed to do it using this
https://searchcode.com/file/45893396/src/opencl/sha256_kernel.cl/
but it works fine with string
yet when sending my byteArray header to hash it returned a very different value than expected
[Edit2]
it can not handle large arrays any array more than 32 length returns missy results
Found this and i modified it to calculate double hash
if anyone needs it
#ifndef uint8_t
#define uint8_t unsigned char
#endif
#ifndef uint32_t
#define uint32_t unsigned int
#endif
#ifndef uint64_t
#define uint64_t unsigned long int
#endif
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
typedef struct
{
uint32_t state[8];
uint64_t count;
uint8_t buffer[64];
} CSha256;
inline void Sha256_Init(CSha256 *p)
{
p->state[0] = 0x6a09e667;
p->state[1] = 0xbb67ae85;
p->state[2] = 0x3c6ef372;
p->state[3] = 0xa54ff53a;
p->state[4] = 0x510e527f;
p->state[5] = 0x9b05688c;
p->state[6] = 0x1f83d9ab;
p->state[7] = 0x5be0cd19;
p->count = 0;
}
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
#define blk0(i) (W[i] = data[i])
#define blk2(i) (W[i&15] += s1(W[(i-2)&15]) + W[(i-7)&15] + s0(W[(i-15)&15]))
#define Ch2(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
#define sha_a(i) T[(0-(i))&7]
#define sha_b(i) T[(1-(i))&7]
#define sha_c(i) T[(2-(i))&7]
#define sha_d(i) T[(3-(i))&7]
#define sha_e(i) T[(4-(i))&7]
#define sha_f(i) T[(5-(i))&7]
#define sha_g(i) T[(6-(i))&7]
#define sha_h(i) T[(7-(i))&7]
#ifdef _SHA256_UNROLL2
#define R(a,b,c,d,e,f,g,h, i) h += S1(e) + Ch2(e,f,g) + K[i+j] + (j?blk2(i):blk0(i));\
d += h; h += S0(a) + Maj(a, b, c)
#define RX_8(i) \
R(a,b,c,d,e,f,g,h, i); \
R(h,a,b,c,d,e,f,g, i+1); \
R(g,h,a,b,c,d,e,f, i+2); \
R(f,g,h,a,b,c,d,e, i+3); \
R(e,f,g,h,a,b,c,d, i+4); \
R(d,e,f,g,h,a,b,c, i+5); \
R(c,d,e,f,g,h,a,b, i+6); \
R(b,c,d,e,f,g,h,a, i+7)
#else
#define R(i) sha_h(i) += S1(sha_e(i)) + Ch2(sha_e(i),sha_f(i),sha_g(i)) + K[i+j] + (j?blk2(i):blk0(i));\
sha_d(i) += sha_h(i); sha_h(i) += S0(sha_a(i)) + Maj(sha_a(i), sha_b(i), sha_c(i))
#ifdef _SHA256_UNROLL
#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
#endif
#endif
static const uint32_t K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
inline static void Sha256_Transform(uint32_t *state, const uint32_t *data)
{
uint32_t W[16];
unsigned j;
#ifdef _SHA256_UNROLL2
uint32_t a,b,c,d,e,f,g,h;
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
f = state[5];
g = state[6];
h = state[7];
#else
uint32_t T[8];
for (j = 0; j < 8; j++)
T[j] = state[j];
#endif
for (j = 0; j < 64; j += 16)
{
#if defined(_SHA256_UNROLL) || defined(_SHA256_UNROLL2)
RX_8(0); RX_8(8);
#else
unsigned i;
for (i = 0; i < 16; i++) { R(i); }
#endif
}
#ifdef _SHA256_UNROLL2
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
#else
for (j = 0; j < 8; j++)
state[j] += T[j];
#endif
/* Wipe variables */
/* memset(W, 0, sizeof(W)); */
/* memset(T, 0, sizeof(T)); */
}
#undef S0
#undef S1
#undef s0
#undef s1
inline static void Sha256_WriteByteBlock(CSha256 *p)
{
uint32_t data32[16];
unsigned i;
for (i = 0; i < 16; i++)
data32[i] =
((uint32_t)(p->buffer[i * 4 ]) << 24) +
((uint32_t)(p->buffer[i * 4 + 1]) << 16) +
((uint32_t)(p->buffer[i * 4 + 2]) << 8) +
((uint32_t)(p->buffer[i * 4 + 3]));
Sha256_Transform(p->state, data32);
}
inline void Sha256_Update(CSha256 *p, __global const uint8_t *data, size_t size)
{
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
while (size > 0)
{
p->buffer[curBufferPos++] = *data++;
p->count++;
size--;
if (curBufferPos == 64)
{
curBufferPos = 0;
Sha256_WriteByteBlock(p);
}
}
}
inline void Sha256_Final(CSha256 *p, __global uint8_t *digest)
{
uint64_t lenInBits = (p->count << 3);
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
unsigned i;
p->buffer[curBufferPos++] = 0x80;
while (curBufferPos != (64 - 8))
{
curBufferPos &= 0x3F;
if (curBufferPos == 0)
Sha256_WriteByteBlock(p);
p->buffer[curBufferPos++] = 0;
}
for (i = 0; i < 8; i++)
{
p->buffer[curBufferPos++] = (uint8_t)(lenInBits >> 56);
lenInBits <<= 8;
}
Sha256_WriteByteBlock(p);
for (i = 0; i < 8; i++)
{
*digest++ = (uint8_t)(p->state[i] >> 24);
*digest++ = (uint8_t)(p->state[i] >> 16);
*digest++ = (uint8_t)(p->state[i] >> 8);
*digest++ = (uint8_t)(p->state[i]);
}
Sha256_Init(p);
}
inline void Sha256_Update1(CSha256 *p, const uint8_t *data, uint32_t size)
{
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
while (size > 0)
{
p->buffer[curBufferPos++] = *data++;
p->count++;
size--;
if (curBufferPos == 64)
{
curBufferPos = 0;
Sha256_WriteByteBlock(p);
}
}
}
inline void Sha256_Final1(CSha256 *p, uint8_t *digest)
{
uint64_t lenInBits = (p->count << 3);
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
unsigned i;
p->buffer[curBufferPos++] = 0x80;
while (curBufferPos != (64 - 8))
{
curBufferPos &= 0x3F;
if (curBufferPos == 0)
Sha256_WriteByteBlock(p);
p->buffer[curBufferPos++] = 0;
}
for (i = 0; i < 8; i++)
{
p->buffer[curBufferPos++] = (uint8_t)(lenInBits >> 56);
lenInBits <<= 8;
}
Sha256_WriteByteBlock(p);
for (i = 0; i < 8; i++)
{
*digest++ = (uint8_t)(p->state[i] >> 24);
*digest++ = (uint8_t)(p->state[i] >> 16);
*digest++ = (uint8_t)(p->state[i] >> 8);
*digest++ = (uint8_t)(p->state[i]);
}
Sha256_Init(p);
}
__kernel void Sha256_1(__global uint8_t *header,__global uint8_t *toRet)
{
uint8_t tempHdr[80];
uint8_t tempDigest[32]={0};
uint startNon=toRet[0] + (toRet[1] << 8) + (toRet[2] << 16) + (toRet[3] << 24);
uint maxNon=toRet[4] + (toRet[5] << 8) + (toRet[6] << 16) + (toRet[7] << 24);
uint nonce =startNon;
uint32_t finalNon=0;
uint8_t match=0;
for(int x=0;x<80;x++)
tempHdr[x]=header[x];
tempHdr[76] = (char)(nonce);
tempHdr[77] = (char)(nonce >> 8);
tempHdr[78] = (char)(nonce >> 16);
tempHdr[79] = (char)(nonce >> 24);
while(finalNon<1)
{
CSha256 p;
Sha256_Init(&p);
Sha256_Update1(&p, tempHdr, 80);
Sha256_Final1(&p, tempDigest);
CSha256 p1;
Sha256_Init(&p1);
Sha256_Update1(&p1, tempDigest, 32);
Sha256_Final1(&p1, tempDigest);
for(int x=31;x>21;x--)
{
if(tempDigest[x]<1) match++;
}
if(match>8)
{
finalNon=nonce;
toRet[8] = (char)(nonce);
toRet[9] = (char)(nonce >> 8);
toRet[10] = (char)(nonce >> 16);
toRet[11] = (char)(nonce >> 24);
}
else
{
nonce++;
tempHdr[76] = (char)(nonce);
tempHdr[77] = (char)(nonce >> 8);
tempHdr[78] = (char)(nonce >> 16);
tempHdr[79] = (char)(nonce >> 24);
}
match=0;
if(nonce>maxNon) break;
if(nonce<=startNon) break;
}
}
I want to calculate the apparent area of a polygon from a view point. Say, you are looking at a 2 x 2 meters square from across, the apparent area for you would be 4 m2.
Now image the square is rotated somehow, then the apparent area would be smaller. To do this, I figured I could use the following logic:
V3_c (center of mass of the polygon)
V3_v (viewer's position)
Construct a plane that goes through V3_v with the normal of (V3_c - V3_v).normalize()
Project the polygon onto this plane and calculate the area
How can I do this in CGAL?
UPDATE:
Upon #mgimeno's suggestions I've used the following (almost pseudo) code.
#include <CGAL/Exact_predicates_inexact_constructions_kernel.h>
#include <CGAL/centroid.h>
#include <iostream>
#include <vector>
#include "print_utils.h"
typedef CGAL::Exact_predicates_inexact_constructions_kernel Kernel;
typedef CGAL::Polygon_with_holes_2<Kernel> Polygon_with_holes_2;
typedef Kernel::Point_2 Point_2;
typedef Kernel::Point_3 Point_3;
typedef Kernel::Plane_3 Plane_3;
typedef Kernel::Vector_3 Vector_3;
typedef Kernel::FT ValueType;
using namespace std;
int main(int argc, char* argv[])
{
Point_3 viewer(0, 0, 0);
cout << "Viewer: " << viewer << endl;
Point_3 a(-5, -5, 5);
Point_3 b(-5, -5, -5);
Point_3 c(5, -5, -5);
Point_3 d(5, -5, 5);
cout << "Surface: " << a << ", " << b << ", " << c << ", " << d << endl;
std::vector<Point_3> vertices;
vertices.push_back(a);
vertices.push_back(b);
vertices.push_back(c);
vertices.push_back(d);
Point_3 center = CGAL::centroid(vertices.begin(), vertices.end(), CGAL::Dimension_tag<0>());
cout << "Center of surface: " << center << endl;
Vector_3 normal = center - viewer;
Plane_3 plane(viewer, normal);
cout << "Plane passing thorough viewer orthogonal to surface: " << plane << endl;
Point_3 pa = plane.projection(a);
Point_3 pb = plane.projection(b);
Point_3 pc = plane.projection(c);
Point_3 pd = plane.projection(d);
cout << "Projected surface onto the plane: " << pa << ", " << pb << ", " << pc << ", " << pd << endl;
Point_2 pa2 = plane.to_2d(pa);
Point_2 pb2 = plane.to_2d(pb);
Point_2 pc2 = plane.to_2d(pc);
Point_2 pd2 = plane.to_2d(pd);
cout << "to_2d of the projected plane: " << pa2 << ", " << pb2 << ", " << pc2 << ", " << pd2 << endl;
std::vector<Point_2> vertices2;
vertices2.push_back(pa2);
vertices2.push_back(pb2);
vertices2.push_back(pc2);
vertices2.push_back(pd2);
ValueType result;
CGAL::area_2(vertices2.begin(), vertices2.end(), result);
cout << "Area of to_2d'ed vertices: " << result << endl;
return EXIT_SUCCESS;
}
The output is:
Viewer: 0 0 0
Surface: -5 -5 5, -5 -5 -5, 5 -5 -5, 5 -5 5
Center of surface: 0 -5 0
Plane passing thorough viewer orthogonal to surface: 0 -5 0 0
Projected surface onto the plane: -5 0 5, -5 0 -5, 5 0 -5, 5 0 5
to_2d of the projected plane: -5 1, -5 -1, 5 -1, 5 1
Area of to_2d'ed vertices: 20
I'm not sure how to_2d works but certainly not the way I hope it would. The computed area is 20 instead of the actual 100.
BTW, I've also begin to realize that this goal could be achieved by simple computing the angle between the viewing direction (V_c - V_v) and normal ofthe polyong. sin a * original_area should give the area.
To compute the centroid of your polygon you can use CGAL::centroid().
Then to construct your plane you can use the constructor of Plane_3 that takes a point and a normal.
After that you can project each point of your polygon using Plane_3::projection(), and then I'd propose to use to_2D() on those new points to get Point_2, and be able to use area_2().
In my code, I organize objects into a regular Cartesian grid (such as 10x10). Often given a point, I need to test whether the point intersects grid and if so, which bins contain the point. I already have my own implementation but I don't like to hassle with precision issues.
So, does CGAL has a 2D regular Cartesian grid?
You can use CGAL::points_on_square_grid_2 to generate the grid points. CGAL kernels provide Kernel::CompareXY_2 functors, which you can use to figure out the exact location of your query point on the grid. For example you can sort your grid points and then use std::lower_bound followed by CGAL::orientation or CGAL::collinear on the appropriate elements of your range. You could also build an arrangement, but this would be an overkill.
Here is a sample code.
#include <CGAL/Exact_predicates_exact_constructions_kernel.h>
#include <CGAL/point_generators_2.h>
#include <CGAL/random_selection.h>
#include <CGAL/Polygon_2_algorithms.h>
using namespace CGAL;
using K= Exact_predicates_exact_constructions_kernel;
using Point =K::Point_2;
using Creator = Creator_uniform_2<double, Point>;
using Grid = std::vector<Point>;
const int gridSide = 3;
void locate_point (Point p, Grid grid);
int main ()
{
Grid points;
points_on_square_grid_2(gridSide * gridSide, gridSide * gridSide, std::back_inserter(points), Creator());
std::sort(points.begin(), points.end(), K::Less_xy_2());
std::cout << "Grid points:\n";
for (auto& p:points)
std::cout << p << '\n';
std::cout << "\ncorner points:\n";
Grid cornerPoints{points[0], points[gridSide - 1], points[gridSide * gridSide - 1],
points[gridSide * (gridSide - 1)]};
for (auto& p:cornerPoints)
std::cout << p << '\n';
std::cout << '\n';
Point p1{-8, -8};
Point p2{-10, 3};
Point p3{-9, -8};
Point p4{0, 4};
Point p5{1, 5};
locate_point(p1, points);
locate_point(p2, points);
locate_point(p3, points);
locate_point(p4, points);
locate_point(p5, points);
}
void locate_point (Point p, Grid grid)
{
if (grid.empty())
{
std::cout << "Point " << p << " not in grid";
return;
}
// check if point is in grid
Grid cornerPoints{grid[0], grid[gridSide - 1], grid[gridSide * gridSide - 1], grid[gridSide * (gridSide - 1)]};
auto point_is = CGAL::bounded_side_2(cornerPoints.begin(), cornerPoints.end(), p);
switch (point_is)
{
case CGAL::ON_UNBOUNDED_SIDE:
std::cout << "Point " << p << " not in grid\n";
return;
case CGAL::ON_BOUNDARY:
std::cout << "Point " << p << " on grid boundary\n";
return;
case CGAL::ON_BOUNDED_SIDE:
std::cout << "Point " << p << " is in grid\n";
}
auto f = std::lower_bound(grid.begin(), grid.end(), p, K::Less_xy_2());
auto g = std::find_if(f, grid.end(), [&p] (const Point& gridpoint)
{ return K::Less_y_2()(p, gridpoint); });
if (CGAL::collinear(p, *g, *(g - 1)))
{
std::cout << "Point " << p << " on grid side between points " << *(g - 1) << " and " << *g << '\n';
return;
}
std::cout << "Point " << p << " in bin whose upper right point is " << *g << '\n';
return;
}
Output:
Grid points:
-9 -9
-9 0
-9 9
0 -9
0 0
0 9
9 -9
9 0
9 9
corner points:
-9 -9
-9 9
9 9
9 -9
Point -8 -8 is in grid
Point -8 -8 in bin whose upper right point is 0 0
Point -10 3 not in grid
Point -9 -8 on grid boundary
Point 0 4 is in grid
Point 0 4 on grid side between points 0 0 and 0 9
Point 1 5 is in grid
Point 1 5 in bin whose upper right point is 9 9
Is there any way to convert a signed integer into an array of bytes in NXC? I can't use explicit type casting or pointers either, due to language limitations.
I've tried:
for(unsigned long i = 1; i <= 2; i++)
{
MM_mem[id.idx] = ((val & (0xFF << ((2 - i) * 8)))) >> ((2 - i) * 8));
id.idx++;
}
But it fails.
EDIT: This works... It just wasn't downloading. I've wasted about an hour trying to figure it out. >_>
EDIT: In NXC, >> is a arithmetic shift. int is a signed 16-bit integer type. A byte is the same thing as unsigned char.
NXC is 'Not eXactly C', a relative of C, but distinctly different from C.
How about
unsigned char b[4];
b[0] = (x & 0xFF000000) >> 24;
b[1] = (x & 0x00FF0000) >> 16;
b[2] = (x & 0x0000FF00) >> 8;
b[3] = x & 0xFF;
The best way to do this in NXC with the opcodes available in the underlying VM is to use FlattenVar to convert any type into a string (aka byte array with a null added at the end). It results in a single VM opcode operation where any of the above options which use shifts and logical ANDs and array operations will require dozens of lines of assembly language.
task main()
{
int x = Random(); // 16 bit random number - could be negative
string data;
data = FlattenVar(x); // convert type to byte array with trailing null
NumOut(0, LCD_LINE1, x);
for (int i=0; i < ArrayLen(data)-1; i++)
{
#ifdef __ENHANCED_FIRMWARE
TextOut(0, LCD_LINE2-8*i, FormatNum("0x%2.2x", data[i]));
#else
NumOut(0, LCD_LINE2-8*i, data[i]);
#endif
}
Wait(SEC_4);
}
The best way to get help with LEGO MINDSTORMS and the NXT and Not eXactly C is via the mindboards forums at http://forums.mindboards.net/
Question originally tagged c; this answer may not be applicable to Not eXactly C.
What is the problem with this:
int value;
char bytes[sizeof(int)];
bytes[0] = (value >> 0) & 0xFF;
bytes[1] = (value >> 8) & 0xFF;
bytes[2] = (value >> 16) & 0xFF;
bytes[3] = (value >> 24) & 0xFF;
You can regard it as an unrolled loop. The shift by zero could be omitted; the optimizer would certainly do so. Even though the result of right-shifting a negative value is not defined, there is no problem because this code only accesses the bits where the behaviour is defined.
This code gives the bytes in a little-endian order - the least-significant byte is in bytes[0]. Clearly, big-endian order is achieved by:
int value;
char bytes[sizeof(int)];
bytes[3] = (value >> 0) & 0xFF;
bytes[2] = (value >> 8) & 0xFF;
bytes[1] = (value >> 16) & 0xFF;
bytes[0] = (value >> 24) & 0xFF;