OpenCL - Kernel crashes on the second run - crash

I am trying to run a code which works the first time but crashes the second time that it runs. The function which causes the crash is part of the class Octree_GPU and is this:
int Octree_GPU::runCreateNodeKernel(int length)
{
cl_uint nodeLength;
if(nodeNumsArray[length-1] == 0)
nodeLength = nodeAddArray[length-1];
else
nodeLength = nodeAddArray[length-1]+8;
nodeArray = (cl_uint*)malloc(sizeof(cl_uint)*nodeLength);
nodePointsArray = (cl_int*)malloc(sizeof(cl_uint)*nodeLength);
startIndexArray = (cl_int*)malloc(sizeof(cl_int)*nodeLength);
d_nodeAdd = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint)*length, NULL, &err);
d_nodeArray = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(cl_uint)*temp_length, NULL, &err);
d_numPoints = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint)*length, NULL, &err);
d_pointIndex = clCreateBuffer(context, CL_MEM_READ_WRITE,sizeof(cl_uint)*length,NULL, &err);
d_nodePointsArray = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int)*temp_length, NULL, &err);
d_nodeIndexArray = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(cl_int)*temp_length, NULL, &err);
err |= clEnqueueWriteBuffer(commands, d_nodeAdd, CL_TRUE, 0, sizeof(cl_uint)*length, nodeAddArray, 0, NULL,NULL);
err |= clEnqueueWriteBuffer(commands, d_numPoints,CL_TRUE, 0, sizeof(cl_uint)*length,numPointsArray,0,NULL,NULL);
err |= clEnqueueWriteBuffer(commands, d_pointIndex, CL_TRUE, 0, sizeof(cl_uint)*length,pointStartIndexArray,0, NULL, NULL);
clFinish(commands);
err = clSetKernelArg(createNodeKernel, 0, sizeof(cl_mem), &d_odata);
err |= clSetKernelArg(createNodeKernel, 1, sizeof(cl_mem), &d_nodeNums);
err |= clSetKernelArg(createNodeKernel, 2, sizeof(cl_mem), &d_nodeAdd);
err |= clSetKernelArg(createNodeKernel, 3, sizeof(cl_mem), &d_numPoints);
err |= clSetKernelArg(createNodeKernel, 4, sizeof(cl_mem), &d_pointIndex);
err |= clSetKernelArg(createNodeKernel, 5, sizeof(cl_mem), &d_nodeArray);
err |= clSetKernelArg(createNodeKernel, 6, sizeof(cl_mem), &d_nodePointsArray);
err |= clSetKernelArg(createNodeKernel, 7, sizeof(cl_mem), &d_nodeIndexArray);
clFinish(commands);
if(err != CL_SUCCESS) {
printf("Cannot set Kernel Arg \n");
exit(1);
}
size_t global_size[1] = {limit-1};
err = clEnqueueNDRangeKernel(commands, createNodeKernel, 1, NULL, global_size, NULL, 0, NULL, NULL);
if(err != CL_SUCCESS) {
printf(" Kernel does not work \n");
exit(1);
}
clFinish(commands);
err = clEnqueueReadBuffer(commands, d_nodeArray, CL_TRUE, 0, sizeof(cl_uint)*temp_length, nodeArray, 0, NULL, NULL);
err|= clEnqueueReadBuffer(commands, d_nodePointsArray, CL_TRUE, 0, sizeof(cl_int)*nodeLength, nodePointsArray, 0, NULL, NULL);
err|= clEnqueueReadBuffer(commands, d_nodeIndexArray, CL_TRUE, 0, sizeof(cl_int)*nodeLength, startIndexArray, 0, NULL, NULL);
clFinish(commands);
clReleaseMemObject(d_nodeAdd);
clReleaseMemObject(d_numPoints);
clReleaseMemObject(d_nodeArray);
clReleaseMemObject(d_nodePointsArray);
clFinish(commands);
return 0;
}
Please note that d_odata and d_nodeNums have been declared in the previous functions. The kernel code is given below for the same:
__kernel void createNode(__global int* uniqueCode, __global int* nodeNums,__global int* nodeAdd, __global int* numPoints, __global int* pointIndex,__global int* nodeArray, __global int* nodePoints,__global int* nodeIndex)
{
int ig = get_global_id(0);
int add;
int num = uniqueCode[ig];
int pt = numPoints[ig];
int ind = pointIndex[ig];
int temp,j;
if(nodeNums[ig] == 8)
{
for(int i=0;i<8;i++)
{
temp = ((int)num/10)*10+i;
add = nodeAdd[ig] + i;
nodeArray[add] = temp;
nodePoints[add] = select(0, pt, temp==num);
nodeIndex[add] = select(-1, ind, temp==num);
barrier(CLK_LOCAL_MEM_FENCE);
}
}
else
{
j = num % 10;
nodeAdd[ig] = nodeAdd[ig-1];
add = nodeAdd[ig]+j;
nodePoints[add] = pt;
nodeIndex[add] = ind;
barrier(CLK_LOCAL_MEM_FENCE);
}
}
I have tried to find out why but have not succeeded. I might be overlooking something really simple. Thank you for your help.

I'm not 100% sure this is causing the crash, but where you've written
if(nodeNums[ig] == 8)
{
for(int i=0;i<8;i++)
{
barrier(CLK_LOCAL_MEM_FENCE);
}
}
else
{
barrier(CLK_LOCAL_MEM_FENCE);
}
This means that different threads in a work group will be executing different numbers of barriers, which may cause a hang/crash. A barrier (with CLK_LOCAL_MEM_FENCE) is for synchronising accesses to local memory, so all work items in a group must execute this before continuing
On a non crash note, it looks like you're using CLK_LOCAL_MEM_FENCE (ensure that local memory accesses are visible across threads) when you mean CLK_GLOBAL_MEM_FENCE (ensure that global memory accesses are visible across threads)
Also
nodeAdd[ig] = nodeAdd[ig-1];
Is not correct for ig == 0. This may not be causing the actual crash (because I've found that OpenCL can be unfortunately quite forgiving), but its worth fixing

Related

BCryptDeriveKeyPBKDF2 replacement for Windows Embedded Compact 2013

I have to compile existing C code using CNG (Cryptography API: Next Generation) functions for Windows Embedded Compact 2013. This code is using BCryptDeriveKeyPBKDF2, which is not available under Windows Embedded Compact 2013.
That means I need a replacement for the function below to implement the PBKDF2 key derivation algorithm as defined in RFC 2898 section 5.2, but without using BCryptDeriveKeyPBKDF2.
I found some C code which is using CryptoAPI functions here, but i don't want to use a 2nd, deprecated API if possible.
BOOL pbkdf2(
PUCHAR pbPassword, ULONG cbPassword,
PUCHAR pbSalt, ULONG cbSalt,
ULONGLONG cIterations,
PUCHAR pbDerivedKey, ULONG cbDerivedKey)
{
NTSTATUS status;
BCRYPT_ALG_HANDLE hAlgorithm;
status = BCryptOpenAlgorithmProvider(&hAlgorithm, BCRYPT_SHA1_ALGORITHM, NULL, BCRYPT_ALG_HANDLE_HMAC_FLAG);
if (BCRYPT_SUCCESS(status))
{
status = BCryptDeriveKeyPBKDF2(hAlgorithm, pbPassword, cbPassword, pbSalt, cbSalt, cIterations, pbDerivedKey, cbDerivedKey, 0);
BCryptCloseAlgorithmProvider(hAlgorithm, 0);
}
return BCRYPT_SUCCESS(status);
}
You could use CNG primitive such as BCryptCreateHash to implement algorithm. The most important is to use flag BCRYPT_ALG_HANDLE_HMAC_FLAG in BCryptOpenAlgorithmProvider:
void pbkdf2()
{
BCRYPT_ALG_HANDLE hAlg = NULL;
BCRYPT_HASH_HANDLE hHash = NULL;
std::vector<BYTE> pass = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
std::vector<BYTE> salt = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
std::vector<BYTE> derived_key(32);
std::vector<BYTE> dig(32);
byte t[] = { 0x00, 0x00, 0x00, 0x01 };
DWORD itcount = 10000;
SECURITY_STATUS status = BCryptOpenAlgorithmProvider(&hAlg, BCRYPT_SHA256_ALGORITHM,
nullptr, BCRYPT_ALG_HANDLE_HMAC_FLAG);
if (status != ERROR_SUCCESS) {
goto Exit;
}
status = BCryptCreateHash(hAlg, &hHash, nullptr, 0, pass.data(), pass.size(), 0);
if (status != ERROR_SUCCESS) {
goto Exit;
}
status = BCryptHashData(hHash, salt.data(), salt.size(), 0);
if (status != ERROR_SUCCESS) {
goto Exit;
}
status = BCryptHashData(hHash, t, 4, 0);
if (status != ERROR_SUCCESS) {
goto Exit;
}
status = BCryptFinishHash(hHash, dig.data(), dig.size(), 0);
if (status != ERROR_SUCCESS) {
goto Exit;
}
derived_key = dig;
BCryptDestroyHash(hHash);
for (DWORD i = 1; i < itcount; ++i)
{
status = BCryptCreateHash(hAlg, &hHash, nullptr, 0, pass.data(), pass.size(), 0);
if (status != ERROR_SUCCESS) {
goto Exit;
}
status = BCryptHashData(hHash, dig.data(), dig.size(), 0);
if (status != ERROR_SUCCESS) {
goto Exit;
}
status = BCryptFinishHash(hHash, dig.data(), dig.size(), 0);
if (status != ERROR_SUCCESS) {
goto Exit;
}
BCryptDestroyHash(hHash);
for (DWORD j = 0; j < dig.size(); ++j) {
derived_key[j] ^= dig[j];
}
}
Exit:
if (hHash) {
BCryptDestroyHash(hHash);
}
if (hAlg) {
BCryptCloseAlgorithmProvider(hAlg, 0);
}
return;
}
EDIT: to clarify meaning of t[].
According to RFC (5.2):
For each block of the derived key apply the function F defined
below to the password P, the salt S, the iteration count c, and
the block index to compute the block:
T_1 = F (P, S, c, 1) ,
T_2 = F (P, S, c, 2) ,
...
T_l = F (P, S, c, l) ,
where the function F is defined as the exclusive-or sum of the
first c iterates of the underlying pseudorandom function PRF
applied to the password P and the concatenation of the salt S
and the block index i: F (P, S, c, i) = U_1 \xor U_2 \xor ... \xor U_c
where
U_1 = PRF (P, S || INT (i)) ,
U_2 = PRF (P, U_1) ,
...
U_c = PRF (P, U_{c-1}) .
Here, INT (i) is a four-octet encoding of the integer i, most
significant octet first.
So, in my code t[] - is a four-octet encoding of the integer 1 (for the first iteration), most significant octet first.
I took this code, converted the deprecated wincrypt calls to the new CNG API, refactored it and removed the stuff which i don't need.
Although i don't understand what the code is doing, it seems that it is producing the same result than the function in my question which is using BCryptDeriveKeyPBKDF2.
#define NOCRYPT
#include <windows.h>
#include <bcrypt.h>
#include <math.h>
#include <assert.h>
#define DIGEST_SIZE 20
#define BLOCK_SIZE 64
typedef struct
{
BCRYPT_ALG_HANDLE hAlgorithm;
BCRYPT_HASH_HANDLE hInnerHash;
BCRYPT_HASH_HANDLE hOuterHash;
} PRF_CTX;
static void hmacFree(PRF_CTX* pContext)
{
if (pContext->hOuterHash) BCryptDestroyHash(pContext->hOuterHash);
if (pContext->hInnerHash) BCryptDestroyHash(pContext->hInnerHash);
if (pContext->hAlgorithm) BCryptCloseAlgorithmProvider(pContext->hAlgorithm, 0);
}
static BOOL hmacPrecomputeDigest(BCRYPT_HASH_HANDLE hHash, PUCHAR pbPassword, DWORD cbPassword, BYTE mask)
{
BYTE buffer[BLOCK_SIZE];
DWORD i;
assert(cbPassword <= BLOCK_SIZE);
memset (buffer, mask, sizeof(buffer));
for (i = 0; i < cbPassword; ++i)
{
buffer[i] = (char) (pbPassword[i] ^ mask);
}
return BCRYPT_SUCCESS(BCryptHashData(hHash, buffer, sizeof(buffer), 0));
}
static BOOL hmacInit(PRF_CTX* pContext, PUCHAR pbPassword, DWORD cbPassword)
{
BCRYPT_HASH_HANDLE hHash = NULL;
BOOL bStatus = FALSE;
BYTE key[DIGEST_SIZE];
if (!BCRYPT_SUCCESS(BCryptOpenAlgorithmProvider(&pContext->hAlgorithm, BCRYPT_SHA1_ALGORITHM, NULL, 0)) ||
!BCRYPT_SUCCESS(BCryptCreateHash(pContext->hAlgorithm, &pContext->hInnerHash, NULL, 0, NULL, 0, 0)) ||
!BCRYPT_SUCCESS(BCryptCreateHash(pContext->hAlgorithm, &pContext->hOuterHash, NULL, 0, NULL, 0, 0)))
{
goto hmacInit_end;
}
if (cbPassword > BLOCK_SIZE)
{
ULONG cbResult;
if (!BCRYPT_SUCCESS(BCryptCreateHash(pContext->hAlgorithm, &hHash, NULL, 0, NULL, 0, 0)) ||
!BCRYPT_SUCCESS(BCryptHashData(hHash, pbPassword, cbPassword, 0)) ||
!BCRYPT_SUCCESS(BCryptGetProperty(hHash, BCRYPT_HASH_LENGTH, (PUCHAR)&cbPassword, sizeof(cbPassword), &cbResult, 0)) ||
!BCRYPT_SUCCESS(BCryptFinishHash(hHash, key, cbPassword, 0)))
{
goto hmacInit_end;
}
pbPassword = key;
}
bStatus =
hmacPrecomputeDigest(pContext->hInnerHash, pbPassword, cbPassword, 0x36) &&
hmacPrecomputeDigest(pContext->hOuterHash, pbPassword, cbPassword, 0x5C);
hmacInit_end:
if (hHash) BCryptDestroyHash(hHash);
if (bStatus == FALSE) hmacFree(pContext);
return bStatus;
}
static BOOL hmacCalculateInternal(BCRYPT_HASH_HANDLE hHashTemplate, PUCHAR pbData, DWORD cbData, PUCHAR pbOutput, DWORD cbOutput)
{
BOOL success = FALSE;
BCRYPT_HASH_HANDLE hHash = NULL;
if (BCRYPT_SUCCESS(BCryptDuplicateHash(hHashTemplate, &hHash, NULL, 0, 0)))
{
success =
BCRYPT_SUCCESS(BCryptHashData(hHash, pbData, cbData, 0)) &&
BCRYPT_SUCCESS(BCryptFinishHash(hHash, pbOutput, cbOutput, 0));
BCryptDestroyHash(hHash);
}
return success;
}
static BOOL hmacCalculate(PRF_CTX* pContext, PUCHAR pbData, DWORD cbData, PUCHAR pbDigest)
{
return
hmacCalculateInternal(pContext->hInnerHash, pbData, cbData, pbDigest, DIGEST_SIZE) &&
hmacCalculateInternal(pContext->hOuterHash, pbDigest, DIGEST_SIZE, pbDigest, DIGEST_SIZE);
}
static void xor(LPBYTE ptr1, LPBYTE ptr2, DWORD dwLen)
{
while (dwLen--)
*ptr1++ ^= *ptr2++;
}
BOOL pbkdf2(
PUCHAR pbPassword, ULONG cbPassword,
PUCHAR pbSalt, ULONG cbSalt,
DWORD cIterations,
PUCHAR pbDerivedKey, ULONG cbDerivedKey)
{
BOOL bStatus = FALSE;
DWORD l, r, dwULen, i, j;
BYTE Ti[DIGEST_SIZE];
BYTE V[DIGEST_SIZE];
LPBYTE U = malloc(max((cbSalt + 4), DIGEST_SIZE));
PRF_CTX prfCtx = { 0 };
assert(pbPassword != NULL && cbPassword != 0 && pbSalt != NULL && cbSalt != 0);
assert(cIterations > 0 && pbDerivedKey > 0 && cbDerivedKey > 0);
if (!hmacInit(&prfCtx, pbPassword, cbPassword))
{
goto PBKDF2_end;
}
l = (DWORD) ceil((double) cbDerivedKey / (double) DIGEST_SIZE);
r = cbDerivedKey - (l - 1) * DIGEST_SIZE;
for (i = 1; i <= l; i++)
{
ZeroMemory(Ti, DIGEST_SIZE);
for (j = 0; j < cIterations; j++)
{
if (j == 0)
{
// construct first input for PRF
memcpy(U, pbSalt, cbSalt);
U[cbSalt] = (BYTE) ((i & 0xFF000000) >> 24);
U[cbSalt + 1] = (BYTE) ((i & 0x00FF0000) >> 16);
U[cbSalt + 2] = (BYTE) ((i & 0x0000FF00) >> 8);
U[cbSalt + 3] = (BYTE) ((i & 0x000000FF));
dwULen = cbSalt + 4;
}
else
{
memcpy(U, V, DIGEST_SIZE);
dwULen = DIGEST_SIZE;
}
if (!hmacCalculate(&prfCtx, U, dwULen, V))
{
goto PBKDF2_end;
}
xor(Ti, V, DIGEST_SIZE);
}
if (i != l)
{
memcpy(&pbDerivedKey[(i-1) * DIGEST_SIZE], Ti, DIGEST_SIZE);
}
else
{
// Take only the first r bytes
memcpy(&pbDerivedKey[(i-1) * DIGEST_SIZE], Ti, r);
}
}
bStatus = TRUE;
PBKDF2_end:
hmacFree(&prfCtx);
free(U);
return bStatus;
}

Why does update not update any records?

I am using the OCI functions to update a database table in Oracle but despite the commands all returning success nothing in the database table is changing. It changes if I hard code the Where clause values so I think I might be doing something wrong in the binding code?
If I create the database table as follows:
create table updatebuddTI(i char(10), j int);
insert into updatebuddTI values ('test1',1);
insert into updatebuddTI values ('test2',2);
insert into updatebuddTI values ('test3',3);
and then use the code:
#include "stdafx.h"
#include <string>
#include <oci.h>
#include <stdlib.h>
#define OCI_NOT_NULL 0
#define OCI_VALUE_NULL -1
#define OCI_VALUE_TRUNCATED -2
#define ORACLE_MAX_SESSIONS 30
int _tmain(int argc, _TCHAR* argv[]) {
// OCI handles
OCIEnv *envhp;
OCIError *errhp;
OCIServer *srvhp;
OCISvcCtx *svchp;
OCISession *authp;
OCIStmt *stmtp;
OCIDefine *defnpp;
// Connection information
text* user = (text*)"test";
text* pwd = (text*)"password";
text* sid = (text*)"oracle-server";
char *query = "UPDATE updatebuddTI SET I = 'test3' WHERE J = :1";// :2";
int dataReceivedI[10];
// Fetched data indicators, lengths and codes
ub2 dataReceived_len[10];
ub2 dataReceived_code[10];
sb2 dataReceived_indI[3];
ub2 dataReceived_lenI[3];
ub2 dataReceived_codeI[3];
oratext message[512];
sb4 errcode;
// Allocate environment
int rc = OCIEnvCreate(&envhp, OCI_DEFAULT, NULL, NULL, NULL, NULL, 0, NULL);
ub2 code = OCINlsCharSetNameToId(envhp, (const oratext *)"WE8MSWIN1252");
OCIEnv *envHandle(0);
if(code) {
rc = OCIEnvNlsCreate(&envHandle,
OCI_OBJECT | OCI_THREADED,
NULL,
NULL,
NULL,
NULL,
0,
NULL,
code,
code);
} else {
printf("problem with OCIEnvNlsCreate!\n");
}
// Allocate error handle
rc = OCIHandleAlloc(envhp, (void**)&errhp, OCI_HTYPE_ERROR, 0, NULL);
// Allocate server and service context handles
rc = OCIHandleAlloc(envhp, (void**)&srvhp, OCI_HTYPE_SERVER, 0, NULL);
rc = OCIHandleAlloc(envhp, (void**)&svchp, OCI_HTYPE_SVCCTX, 0, NULL);
// Attach to the server
//rc = OCIServerAttach(srvhp, errhp, sid, strlen((char*)sid), 0);
// Set server in the service context
rc = OCIAttrSet(svchp, OCI_HTYPE_SVCCTX, (dvoid*)srvhp, 0, OCI_ATTR_SERVER, errhp);
// Allocate session handle
rc = OCIHandleAlloc(envhp, (void**)&authp, OCI_HTYPE_SESSION, 0, NULL);
// Set user name and password
rc = OCIAttrSet(authp, OCI_HTYPE_SESSION, (void*)user, strlen((char*)user),
OCI_ATTR_USERNAME, errhp);
rc = OCIAttrSet(authp, OCI_HTYPE_SESSION, (void*)pwd, strlen((char *)pwd),
OCI_ATTR_PASSWORD, errhp);
std::string path("oracle-server");
rc = OCIServerAttach(srvhp, errhp, (text *)path.c_str(), (sb4)path.length(), 0);
// Connect
rc = OCISessionBegin(svchp, errhp, authp, OCI_CRED_RDBMS, OCI_DEFAULT);
// Set session in the service context
rc = OCIAttrSet(svchp, OCI_HTYPE_SVCCTX, authp, 0, OCI_ATTR_SESSION, errhp);
// Allocate statement handle
rc = OCIHandleAlloc(envhp, (void**)&stmtp, OCI_HTYPE_STMT, 0, NULL);
// Prepare the query
rc = OCIStmtPrepare(stmtp, errhp, (text*)query, strlen(query), OCI_NTV_SYNTAX, OCI_DEFAULT);
char text[10];
int option=0;
// Define the select list items
rc = OCIDefineByPos(stmtp, &defnpp, errhp, 1, (void*)text, 5, SQLT_CHR, (void*)dataReceivedI,
dataReceived_len, dataReceived_code, OCI_DEFAULT);
if (rc != 0) {
OCIErrorGet(errhp, (ub4)1, NULL, &errcode, message, sizeof(message), (ub4)OCI_HTYPE_ERROR);
printf("%s", message);
}
rc = OCIDefineByPos(stmtp, &defnpp, errhp, 2, (void*)option, sizeof(int), SQLT_NUM, (void*)dataReceived_indI,
dataReceived_lenI, dataReceived_codeI, OCI_DEFAULT);
if (rc != 0) {
OCIErrorGet(errhp, (ub4)1, NULL, &errcode, message, sizeof(message), (ub4)OCI_HTYPE_ERROR);
printf("%s", message);
}
OCIBind* bindHandle2;
rc = OCIBindByPos(stmtp, &bindHandle2, errhp, 1,
(dvoid *)&option, (sword) sizeof(int), SQLT_NUM,
(dvoid *)0, (ub2 *)0, (ub2 *)0, (ub4)0, (ub4 *)0, OCI_DEFAULT);
if (rc != 0) {
OCIErrorGet(errhp, (ub4)1, NULL, &errcode, message, sizeof(message), (ub4)OCI_HTYPE_ERROR);
printf("%s", message);
}
strcpy_s(text, "test3");
option = 2;
rc = OCIStmtExecute(svchp, stmtp, errhp, 1, 0, 0,0, OCI_DEFAULT);
if (rc != 0) {
OCIErrorGet(errhp, (ub4)1, NULL, &errcode, message, sizeof(message), (ub4)OCI_HTYPE_ERROR);
printf("%s", message);
}
rc = OCIHandleFree(stmtp, OCI_HTYPE_STMT);
// Disconnect
rc = OCISessionEnd(svchp, errhp, authp, OCI_DEFAULT);
rc = OCIServerDetach(srvhp, errhp, OCI_DEFAULT);
rc = OCIHandleFree(envhp, OCI_HTYPE_ENV);
}
Can anyone see what I have done wrong?

DPDK - rte_mbuf payload/data initialization or updation/modification

I am trying to understand the forwarding mechanism of the basicfwd example of DPDK. Can anyone help me with initializing and editing the payload of the rte_mbuf? Here is the class.
I plan on using tcpdumpand rte_pktmbuf_dump to view the packet contents.
Here is the rte_mbuf I wish to add my own payload to:
struct rte_mbuf *bufs[BURST_SIZE];
This is the rte_mbuf being received:
const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE);
This is the rte_mbuf being transmitted:
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
bufs, nb_rx);
I have modified the sample application basicfwd.c in the DPDK examples to print the forwarded packets in a file:
/* Get burst of RX packets, from first port of pair. */
const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs, BURST_SIZE);
FILE *fp;
fp = fopen("dump.txt", "a");
fprintf(fp, "\n-----------------------\n fprintf... %d<->%d\n", count, port);
rte_pktmbuf_dump(fp, bufs[0], 1000);
fclose(fp);
if (unlikely(nb_rx == 0))
continue;
/* Send burst of TX packets, to second port of pair. */
const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,bufs, nb_rx);
These are the packets I see in the output file:
I would like to be able to modify the contents for better understanding. I have tried rte_pktmbuf_init and bufs->userdata =*(unsigned short*) 0xAAAAAAAA but it does not work for me.
I solved the issue by creating my own packets in the memory pool.
Create a memory pool for the program.
Allocate a packet in memory pool.
Populate the packet with data, source address, destination address, and initialize packet fields.
The complete lcore_main function:
/*
* The main thread that does the work, reading from
* an input port and writing to an output port.
*/
struct message {
char data[DATA_SIZE];
};
static __attribute__(()) void
lcore_main(void)
{
const uint8_t nb_ports = rte_eth_dev_count();
uint8_t port;
for (port = 0; port < nb_ports; port++)
if (rte_eth_dev_socket_id(port) > 0 &&
rte_eth_dev_socket_id(port) !=
(int)rte_socket_id())
printf("WARNING, port %u is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n", port);
struct rte_mbuf *pkt;
struct ether_hdr *eth_hdr;
struct message obj;
struct message *msg;
int nb_rx = 0, nb_tx = 0, cnt = 0, pkt_size = 0;
int count = 0;
int k = 0;
for (count = 0; count < DATA_SIZE; count++){
obj.data[count] = (char)(97 + (k++));
if (k == 26)
k = 0;
}
time_t endtime = time(NULL) + 10;
port = 0;
while (time(NULL) < endtime) {
cnt = rte_eth_rx_burst(port, 0, &pkt, 1);
nb_rx += cnt;
if (cnt > 0)
{
eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
rte_eth_macaddr_get(port, &eth_hdr->s_addr);
pkt_size = sizeof(struct message) + sizeof(struct ether_hdr);
msg = (struct message *) (rte_pktmbuf_mtod(pkt, char*)) + sizeof(struct ether_hdr);
rte_pktmbuf_free(pkt);
}
msg = &obj;
pkt = rte_pktmbuf_alloc(mbuf_pool);
pkt_size = sizeof(struct message) + sizeof(struct ether_hdr);
pkt->data_len = pkt_size;
pkt->pkt_len = pkt_size;
eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
rte_eth_macaddr_get(port, &eth_hdr->d_addr);
rte_eth_macaddr_get(port ^ 1, &eth_hdr->s_addr);
eth_hdr->ether_type = htons(PTP_PROTOCOL);
char* data;
data = rte_pktmbuf_append(pkt, sizeof(struct message));
if (data != NULL)
rte_memcpy(data, msg, sizeof(struct message));
nb_tx += rte_eth_tx_burst(port ^ 1, 0, &pkt, 1);
}
printf("----\nData size: %d\nPacket size: %d\nRX : %d, TX : %d\n\n", DATA_SIZE, pkt_size, nb_rx, nb_tx);
}

OpenCL Local memory and Xcode

I'm trying to learn OpenCL on a Mac, which appears to have some differences in implementation from the OpenCL book I'm reading. I want to be able to dynamically allocate local memory on the GPU. What I'm reading is I need to use the clSetKernelArg function, but that doesn't work within Xcode 6.4. Here's the code as it stands (never mind it's a pointless program, just trying to learn the syntax for shared memory). In Xcode, the kernel is written as a stand-alone .cl file similar to CUDA, so that's a separate file.
add.cl:
kernel void add(int a, int b, global int* c, local int* d)
{
d[0] = a;
d[1] = b;
*c = d[0] + d[1];
}
main.c:
#include <stdio.h>
#include <OpenCL/opencl.h>
#include "add.cl.h"
int main(int argc, const char * argv[]) {
int a = 3;
int b = 5;
int c;
int* cptr = &c;
dispatch_queue_t queue = gcl_create_dispatch_queue(CL_DEVICE_TYPE_GPU, NULL);
void* dev_c = gcl_malloc(sizeof(cl_int), NULL, CL_MEM_WRITE_ONLY);
// attempt to create local memory buffer
void* dev_d = gcl_malloc(2*sizeof(cl_int), NULL, CL_MEM_READ_WRITE);
// clSetKernelArg(add_kernel, 3, 2*sizeof(cl_int), NULL);
dispatch_sync(queue, ^{
cl_ndrange range = { 1, {0, 0, 0}, {1, 0, 0}, {1, 0, 0} };
// This gives a warning:
// Warning: Incompatible pointer to integer conversion passing 'cl_int *'
// (aka 'int *') to parameter of type 'size_t' (aka 'unsigned long')
add_kernel(&range, a, b, (cl_int*)dev_c, (cl_int*)dev_d);
gcl_memcpy((void*)cptr, dev_c, sizeof(cl_int));
});
printf("%d + %d = %d\n", a, b, c);
gcl_free(dev_c);
dispatch_release(queue);
return 0;
}
I've tried putting clSetKernelArg where indicated and it doesn't like the first argument:
Error: Passing 'void (^)(const cl_ndrange *, cl_int, cl_int, cl_int *, size_t)' to parameter of incompatible type 'cl_kernel' (aka 'struct _cl_kernel *')
I've looked and looked but can't find any examples illustrating this point within the Xcode environment. Can you point me in the right direction?
Managed to solve this by ditching Apple's extensions and using standard OpenCL 1.2 calls. That means replacing gcl_malloc with clCreateBuffer, replacing dispatch_sync with clEnqueueNDRangeKernel, and most importantly, using clSetKernelArg with NULL in the last argument for local variables. Works like a charm.
Here's the new version:
char kernel_add[1024] =
"kernel void add(int a, int b, global int* c, local int* d) \
{\
d[0] = a;\
d[1] = b;\
*c = d[0] + d[1];\
}";
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <OpenCL/opencl.h>
int main(int argc, const char * argv[]) {
int a = 3;
int b = 5;
int c;
cl_device_id device_id;
int err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
cl_context context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
cl_command_queue queue = clCreateCommandQueue(context, device_id, 0, &err);
const char* srccode = kernel;
cl_program program = clCreateProgramWithSource(context, 1, &srccode, NULL, &err);
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "kernel_add", &err);
cl_mem dev_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int), NULL, NULL);
err = clSetKernelArg(kernel, 0, sizeof(int), &a);
err |= clSetKernelArg(kernel, 1, sizeof(int), &b);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &dev_c);
err |= clSetKernelArg(kernel, 3, sizeof(int), NULL);
size_t one = 1;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &one, NULL, 0, NULL, NULL);
clFinish(queue);
err = clEnqueueReadBuffer(queue, dev_c, true, 0, sizeof(int), &c, 0, NULL, NULL);
clReleaseMemObject(dev_c);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
In regular OpenCL, for a kernel parameter declared as a local pointer, you don't allocate a host buffer and pass it in (like you're doing with dev_d). Instead you do a clSetKernelArg with the size of the desired local storage but a NULL pointer (like this: clSetKernelArg(kernel, 2, sizeof(cl_int) * local_work_size[0], NULL)). You'll have to translate that into the Xcode way if you insist on being platform-specific.

How to turn off bluetooth device and sound device in Cocoa?

I've known that Airport can be turned off by CoreWLAN framework.
So, I think there are probably functions or frameworks related with bluetooth device and sound device.
How can I turn off that devices?
I assume you by "cannot have power so that it cannot speak", you mean you simply want to mute the speaker. I found some neat sample code here, using CoreAudio to mute the system's default speaker: http://cocoadev.com/index.pl?SoundVolume
I took the liberty of converting it to pure C and trying it out.
#import <CoreAudio/CoreAudio.h>
#import <stdio.h>
// getting system volume
float getVolume() {
float b_vol;
OSStatus err;
AudioDeviceID device;
UInt32 size;
UInt32 channels[2];
float volume[2];
// get device
size = sizeof device;
err = AudioHardwareGetProperty(kAudioHardwarePropertyDefaultOutputDevice, &size, &device);
if(err!=noErr) {
printf("audio-volume error get device\n");
return 0.0;
}
// try set master volume (channel 0)
size = sizeof b_vol;
err = AudioDeviceGetProperty(device, 0, 0, kAudioDevicePropertyVolumeScalar, &size, &b_vol); //kAudioDevicePropertyVolumeScalarToDecibels
if(noErr==err) return b_vol;
// otherwise, try seperate channels
// get channel numbers
size = sizeof(channels);
err = AudioDeviceGetProperty(device, 0, 0,kAudioDevicePropertyPreferredChannelsForStereo, &size,&channels);
if(err!=noErr) printf("error getting channel-numbers\n");
size = sizeof(float);
err = AudioDeviceGetProperty(device, channels[0], 0, kAudioDevicePropertyVolumeScalar, &size, &volume[0]);
if(noErr!=err) printf("error getting volume of channel %d\n",channels[0]);
err = AudioDeviceGetProperty(device, channels[1], 0, kAudioDevicePropertyVolumeScalar, &size, &volume[1]);
if(noErr!=err) printf("error getting volume of channel %d\n",channels[1]);
b_vol = (volume[0]+volume[1])/2.00;
return b_vol;
}
// setting system volume
void setVolume(float involume) {
OSStatus err;
AudioDeviceID device;
UInt32 size;
Boolean canset = false;
UInt32 channels[2];
//float volume[2];
// get default device
size = sizeof device;
err = AudioHardwareGetProperty(kAudioHardwarePropertyDefaultOutputDevice, &size, &device);
if(err!=noErr) {
printf("audio-volume error get device\n");
return;
}
// try set master-channel (0) volume
size = sizeof canset;
err = AudioDeviceGetPropertyInfo(device, 0, false, kAudioDevicePropertyVolumeScalar, &size, &canset);
if(err==noErr && canset==true) {
size = sizeof involume;
err = AudioDeviceSetProperty(device, NULL, 0, false, kAudioDevicePropertyVolumeScalar, size, &involume);
return;
}
// else, try seperate channes
// get channels
size = sizeof(channels);
err = AudioDeviceGetProperty(device, 0, false, kAudioDevicePropertyPreferredChannelsForStereo, &size,&channels);
if(err!=noErr) {
printf("error getting channel-numbers\n");
return;
}
// set volume
size = sizeof(float);
err = AudioDeviceSetProperty(device, 0, channels[0], false, kAudioDevicePropertyVolumeScalar, size, &involume);
if(noErr!=err) printf("error setting volume of channel %d\n",channels[0]);
err = AudioDeviceSetProperty(device, 0, channels[1], false, kAudioDevicePropertyVolumeScalar, size, &involume);
if(noErr!=err) printf("error setting volume of channel %d\n",channels[1]);
}
int main() {
printf("The system's volume is currently %f\n", getVolume());
printf("Setting volume to 0.\n");
setVolume(0.0f);
return 0;
}
I ran it and got this:
[04:29:03] [william#enterprise ~/Documents/Programming/c]$ gcc -framework CoreAudio -o mute.o coreaudio.c
.. snipped compiler output..
[04:29:26] [william#enterprise ~/Documents/Programming/c]$ ./mute.o
The system's volume is currently 0.436749
Setting volume to 0.
Hopefully this sends you in the right direction.