I am trying to code vector addition code using OpenMP in host and OpenMP Offloading. But time taken for OpenMP offloading is more than OpenMP in host. Why is that?
openmp-host.c
#include <assert.h>
#include <math.h>
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
unsigned N = (argc > 1 ? atoi(argv[1]) : 1000000);
float *a = (float *)calloc(N, sizeof(float));
float *b = (float *)calloc(N, sizeof(float));
float *c = (float *)calloc(N, sizeof(float));
for (int i = 0; i < N; i++)
a[i] = i, b[i] = N - i;
#pragma omp parallel
{
unsigned thrds = omp_get_num_threads(), tid = omp_get_thread_num();
unsigned size = N / thrds, rem = N - size * thrds;
size += (tid < rem);
unsigned s = (tid < rem ? size * tid : (tid * size + rem)), e = s + size;
double t = omp_get_wtime();
for (unsigned i = s; i < e; i++){
c[i] = a[i] + b[i];
}
t = omp_get_wtime() - t;
if (tid == 0)
printf("N: %u # threads: %u time: %e\n", N, thrds, t);
}
for (unsigned i = 0; i < N; i++)
assert(fabs(c[i] - N) < 1e-8);
free(a);
return 0;
}
openmp-device.c
#include <assert.h>
#include <math.h>
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
int N = (argc > 1 ? atoi(argv[1]) : 1000000);
double start, end;
int *a = (int *)calloc(N, sizeof(int));
int *b = (int *)calloc(N, sizeof(int));
int *c = (int *)calloc(N, sizeof(int));
double t;
for (int i = 0; i < N; i++) {
a[i] = i;
b[i] = N - i;
}
#pragma omp target enter data map(to:a[0:N],b[0:N], c[0:N])
t= omp_get_wtime();
#pragma omp target teams distribute parallel for simd
for(int i=0; i<N; i++){
c[i] = a[i] + b[i];
}
t = omp_get_wtime() - t;
#pragma omp target exit data map(from: c[0:N])
printf("time: %e \n", t);
for (int i = 0; i < N; i++)
assert(abs(c[i] - N) < 1e-8);
free(a);
free(b);
free(c);
return 0;
}
I used these 2 commands to compile and it works fine. I installed the oneAPI tool kit and levelZero also.
icx -qopenmp -fopenmp-targets=spir64 openmp-device.c -o omp_device
icx -qopenmp openmp-host.c -o omp_host
Why does openmp offloading take more time than openmp in host?
Related
I am working on ESC POS printer. Using below code I am able to print the image, but the issue is an image not printing properly. You can see in the below image. Please review my code and let me know where exactly the issue.
- (void) btnPrintPicture{
UIImage * img = [UIImage imageNamed:#"download.png"];
int width = img.size.width;
int height = img.size.height;
unsigned char * binaryImageData = malloc(width * height);
unsigned char * data = malloc(height * (8 + width / 8));
unsigned char * grayData = [self convertImageToGray:img];
format_K_threshold(grayData, width, height, binaryImageData);
eachLinePixToCmd(binaryImageData, width, height, 0, data);
NSMutableArray *dataArray = [NSMutableArray new];
int splitBytes = 100;
NSData *comData = [[NSData alloc] initWithBytes:(const void *)data length:(height * (8+width/8))];
for(int i = 0; i < comData.length ; i=i+splitBytes){
NSData *subData = nil;
if((i+splitBytes)>comData.length){
subData = [comData subdataWithRange:NSMakeRange(i, (comData.length-i))];
}else{
subData = [comData subdataWithRange:NSMakeRange(i, splitBytes)];
}
[dataArray addObject:subData];
}
[dataArray enumerateObjectsUsingBlock:^(NSData *obj, NSUInteger idx, BOOL * _Nonnull stop) {
[self.discoveredPeripheral writeValue:obj forCharacteristic:self.discoveredCharacteristic type:CBCharacteristicWriteWithResponse];
}];
free(grayData);
free(binaryImageData);
free(data);
}
This method is used for converting image to grayscale.
-(unsigned char *)convertImageToGray:(UIImage *)i
{
int kRed = 1;
int kGreen = 2;
int kBlue = 4;
int colors = kGreen | kBlue | kRed;
int m_width = i.size.width;
int m_height = i.size.height;
uint32_t *rgbImage = (uint32_t *) malloc(m_width * m_height * sizeof(uint32_t));
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
CGContextRef context = CGBitmapContextCreate(rgbImage, m_width, m_height, 8, m_width * 4, colorSpace, kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipLast);
CGContextSetInterpolationQuality(context, kCGInterpolationHigh);
CGContextSetShouldAntialias(context, NO);
CGContextDrawImage(context, CGRectMake(0, 0, m_width, m_height), [i CGImage]);
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
// now convert to grayscale
uint8_t *m_imageData = (uint8_t *) malloc(m_width * m_height);
for(int y = 0; y < m_height; y++) {
for(int x = 0; x < m_width; x++) {
uint32_t rgbPixel=rgbImage[y*m_width+x];
uint32_t sum=0,count=0;
if (colors & kRed) {sum += (rgbPixel>>24)&255; count++;}
if (colors & kGreen) {sum += (rgbPixel>>16)&255; count++;}
if (colors & kBlue) {sum += (rgbPixel>>8)&255; count++;}
m_imageData[y*m_width+x]=sum/count;
}
}
free(rgbImage);
return m_imageData;}
void format_K_threshold(unsigned char * orgpixels, int xsize, int ysize, unsigned char * despixels) {
int graytotal = 0;
int k = 0;
int i;
int j;
int gray;
for(i = 0; i < ysize; ++i) {
for(j = 0; j < xsize; ++j) {
gray = orgpixels[k] & 255;
graytotal += gray;
++k;
}
}
int grayave = graytotal / ysize / xsize;
k = 0;
for(i = 0; i < ysize; ++i) {
for(j = 0; j < xsize; ++j) {
gray = orgpixels[k] & 255;
if(gray > grayave) {
despixels[k] = 0;
} else {
despixels[k] = 1;
}
++k;
}
}
}
This method is using ESC commands to print the image.
void eachLinePixToCmd(unsigned char * src, int nWidth, int nHeight, int nMode, unsigned char * data) {
int p0[] = { 0, 0x80 };
int p1[] = { 0, 0x40 };
int p2[] = { 0, 0x20 };
int p3[] = { 0, 0x10 };
int p4[] = { 0, 0x08 };
int p5[] = { 0, 0x04 };
int p6[] = { 0, 0x02 };
int nBytesPerLine = nWidth / 8;
int offset = 0;
int k = 0;
for (int i = 0; i < nHeight; i++) {
offset = i * (8 + nBytesPerLine);
data[offset + 0] = 0x1d;
data[offset + 1] = 0x76;
data[offset + 2] = 0x30;
data[offset + 3] = (unsigned char) (nMode & 0x01);
data[offset + 4] = (unsigned char) (nBytesPerLine % 0xff);
data[offset + 5] = (unsigned char) (nBytesPerLine / 0xff);
data[offset + 6] = 0x01;
data[offset + 7] = 0x00;
for (int j = 0; j < nBytesPerLine; j++) {
data[offset + 8 + j] = (unsigned char) (p0[src[k]] + p1[src[k + 1]] + p2[src[k + 2]] + p3[src[k + 3]] + p4[src[k + 4]] + p5[src[k + 5]] + p6[src[k + 6]] + src[k + 7]);
k = k + 8;
}
}
}
Thanks in advance.
Based on having fixed a very similar bug in a different project with this change, I am guessing that your image width is not divisible by 8.
This line will drop nWidth % 8 pixels on each row, causing a rightward slant if the image width is not divisible by 8.
int nBytesPerLine = nWidth / 8;
Instead, you should be padding with zeroes:
int nBytesPerLine = (nWidth + 7) / 8;
Your data variable needs to grow to match as well, it has the same issue.
Lastly, you are issuing the GS v 0 command for each row, which is not very efficient. You can issue this once for the entire image, and specify the height. From the same project, a C example is here.
I have NSMutableData "30352514 38001300 00000001"
i need convert byte to bit
00110000001101
and that to NSString
Thx
Use this for bytes:
const char *byte = [data bytes];
NSLog(#"%s",byte);
This is for bits:
const char *byte = [data bytes];
unsigned int length = [data length];
for (int i=0; i<length; i++) {
char n = byte[i];
char buffer[9];
buffer[8] = 0; //for null
int j = 8;
while(j > 0)
{
if(n & 0x01)
{
buffer[--j] = '1';
} else
{
buffer[--j] = '0';
}
n >>= 1;
}
printf("%s ",buffer);
Why am I getting:
this error control reaches end of non-void function [-Wreturn-type]
#import <Foundation/Foundation.h>
const int MAX = 4;
int main (int argc, const char * argv[])
{
char names[] = {
"Zara Ali",
"Hina Ali",
"Nuha Ali",
"Sara Ali",};
char *pointer[MAX];
for(int i=0; i<MAX; i++)
{
pointer[i] = &names[i];
}
for (i = 0; i < MAX; i++)
{
NSLog(#"Value of var[%d] = %s\n", i, *pointer[i] );
}
return 0;
}
A vector of chars is not the same as a vector of strings. A char has space for just one character. char[] is a vector of characters, not a vector of strings. If you initialise it like this:
char names[] = {
"Zara Ali",
"Hina Ali",
"Nuha Ali",
"Sara Ali",};
The names variable will keep the pointer just to the first string, it will be initialised just with the first string. All the next strings will be ignored.
I think you are trying to do something like this:
#import <Foundation/Foundation.h>
const int MAX = 4;
int main (int argc, const char * argv[])
{
NSString *names[] = {
#"Zara Ali",
#"Hina Ali",
#"Nuha Ali",
#"Sara Ali",};
NSString *pointer[MAX];
for(int i=0; i<MAX; i++)
{
pointer[i] = names[i];
}
for (int i = 0; i < MAX; i++)
{
NSLog(#"Value of var[%d] = %#\n", i, pointer[i] );
}
return 0;
}
Has any one implemented CBC Mac (DESede) in Objective c? Could you show sample code or explain how to correct my code?
Here is my effort so far....
-(void)tryMac
{
unsigned char blockCount;
unsigned char key[16] = "\x1\x2\x3\x4\x5\x6\x7\x8\x9\x0\x1\x2\x3\x4\x5\x6";
unsigned char data[16] = "\x54\x68\x69\x73\x69\x73\x6d\x79\x73\x74\x72\x69\x6e\x67\x0\x0";
DES_cblock *desKey1 = (DES_cblock* ) key;
DES_cblock *desKey2 = (DES_cblock* ) key;
unsigned char *iv = (unsigned char *) malloc(8);
memset(iv, 0x0, 8);
DES_set_odd_parity(desKey1);
DES_set_odd_parity(desKey2);
DES_key_schedule schedule1;
DES_key_schedule schedule2;
DES_set_key_checked(desKey1, &schedule1);
DES_set_key_checked(desKey2, &schedule2);
int len = sizeof(data);
blockCount = len / 4;
int lastBlock = 0;
for(unsigned char i = 0; i < blockCount; i++)
{
int bufferLen = sizeof(data)/blockCount;
unsigned char buffer[bufferLen];
memccpy(buffer, data, lastBlock, bufferLen);
lastBlock = (i + 1) * bufferLen;
unsigned char *result = (unsigned char *) malloc(4);
if (lastBlock == len)
{
DES_ede2_cbc_encrypt(buffer, result, bufferLen, &schedule1, &schedule2, (DES_cblock *) iv, DES_ENCRYPT);
NSData *encryptedData = [NSData dataWithBytes:(const void *)result length:4];
NSString *encryptedString = [self stringWithHexFromData:encryptedData];
NSLog(#"Encrypted Block %#",encryptedString);
}
}
uint8_t *var;
var=//something;
Now I want to loop through each element of this var
how to do this
please help
Make loop like in plain C. uint8_t *var is just an C array.
for (uint8_t i = 0; i < ARRAY_LENGTH; ++i) {
var[i] = ...; // Do whatever you want
}
For example
uint8_t *v = (uint8_t *)malloc(5 * sizeof(uint8_t));
for (uint8_t i = 0; i < 5; ++i) {
v[i] = i;
}
for (uint8_t i = 0; i < 5; ++i) {
NSLog(#"%d\n", v[i]);
}
free(v);
Note that uint8_t is the same as unsigned char:
#ifndef _UINT8_T
#define _UINT8_T
typedef unsigned char uint8_t;
#endif /*_UINT8_T */