running gzip on sigle core in muticore environment under unix

running gzip on sigle core in muticore environment under unix - gzip

I have a requirement to use only single core to test gzip performance in multi-core cpu environment(not sure what is the default settings for gzip in this case). Need help to find out the command to execute gzip compression in single core.
Thanks

gzip is single threaded by default so in effect it will look like it's running on one core ie it might run on several physical cores but it won't be in parallel.
If you absolutely must run on one core and you're on linux you would set affinity to a particular core.
http://man7.org/linux/man-pages/man2/sched_setaffinity.2.html
This is code that I got from the man page.
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
int
main(int argc, char *argv[])
{
cpu_set_t set;
int parentCPU, childCPU;
int nloops, j;
if (argc != 4) {
fprintf(stderr, "Usage: %s parent-cpu child-cpu num-loops\n",
argv[0]);
exit(EXIT_FAILURE);
}
parentCPU = atoi(argv[1]);
childCPU = atoi(argv[2]);
nloops = atoi(argv[3]);
CPU_ZERO(&set);
switch (fork()) {
case -1: /* Error */
errExit("fork");
case 0: /* Child */
CPU_SET(childCPU, &set);
if (sched_setaffinity(getpid(), sizeof(set), &set) == -1)
errExit("sched_setaffinity");
for (j = 0; j < nloops; j++)
getppid();
exit(EXIT_SUCCESS);
default: /* Parent */
CPU_SET(parentCPU, &set);
if (sched_setaffinity(getpid(), sizeof(set), &set) == -1)
errExit("sched_setaffinity");
for (j = 0; j < nloops; j++)
getppid();
wait(NULL); /* Wait for child to terminate */
exit(EXIT_SUCCESS);
}
}
If you need to test with no interruptions from the kernel you need to write a kernel module for that.

Related

Find CPU times and system times of process in linux

I have a main program that creates two children and each children calls execlv. At the end of the program how do I calculate the CPU times and system times of the parent and two process?
#include <sys/types.h>
#include <sys/wait.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>
int main()
{
pid_t pid1,pid2,wid; // variable for parent and two children
char *my_args[3]; // strign array for containing the arguments for executing sigShooter1
// int aInt = 368; //
char str[15]; // strign to contain the pids of children when passing as command line arguments
pid1 = fork();
if (pid1 < 0)
{
fprintf(stderr, ": fork failed: %s\n", strerror(errno));
exit(1);
}
if(pid1 == 0)
{
my_args[0] = "sigperf1";
my_args[1] = "0";
my_args[2] = NULL;
execv("sigshooter1",my_args);
fprintf(stderr,"sigshooter1 cannot be executed by first child...");
exit(-1);
}
pid2 = fork();
if(pid2 < 0)
{
fprintf(stderr, ": fork failed: %s\n", strerror(errno));
exit(1);
}
if(pid2 == 0)
{
sprintf(str, "%d", pid1);
my_args[0] = "sigperf1";
my_args[1] = str;
my_args[2] = NULL;
// printf("this is converted = %s\n",my_args[1]);
//sleep(1);
execv("sigshooter1",my_args);
fprintf(stderr,"sigshooter1 cannot be executed by second child...");
exit(-1);
}
wid = wait(NULL);
}

You'll need a profiler for that. For starters, you can run perf stat ./a.out to get the total CPU time of all three processes, and perf stat -i ./a.out to get the CPU time of parent process only.
If you need something more detailed, take a look at more serious tools like valgrind or gprof.

Read status of FT245RL pins

Sorry for my ignorance but I am very new in FTDI chip Linux software development.
I have module based on FT245RL chip, programmed to be 4 port output (relays) and 4 port opto isolated input unit.
I found out in Internet program in C to turn on/off relays connected to outputs D0 to D3. After compiling it works properly. Below draft of this working program:
/* switch4.c
* # gcc -o switch4 switch4.c -L. -lftd2xx -Wl,-rpath,/usr/local/lib
* Usage
* # switch4 [0-15], for example # switch4 1
* */
#include <stdio.h>
#include <stdlib.h>
#include "./ftd2xx.h"
int main(int argc, char *argv[])
{
FT_STATUS ftStatus;
FT_HANDLE ftHandle0;
int parametr;
LPVOID pkod;
DWORD nBufferSize = 0x0001;
DWORD dwBytesWritten;
if(argc > 1) {
sscanf(argv[1], "%d", ¶metr);
}
else {
parametr = 0;
}
FT_SetVIDPID(0x5555,0x0001); // id from lsusb
FT_Open(0,&ftHandle0);
FT_SetBitMode(ftHandle0,15,1);
pkod=&parametr;
ftStatus = FT_Write(ftHandle0,pkod,nBufferSize,&dwBytesWritten);
ftStatus = FT_Close(ftHandle0);
}
My question is. How can I read in the same program, status of D4 to D7 pins, programmed as inputs? I mean about "printf" to stdout the number representing status (zero or one) of input pins (or all input/output pins).
Can anybody help newbie ?
UPDATE-1
This is my program with FT_GetBitMode
// # gcc -o read5 read5.c -L. -lftd2xx -Wl,-rpath,/usr/local/lib
#include <stdio.h>
#include <stdlib.h>
#include "./ftd2xx.h"
int main(int argc, char *argv[])
{
FT_STATUS ftStatus;
FT_HANDLE ftHandle0;
UCHAR BitMode;
FT_SetVIDPID(0x5555,0x0001); // id from lsusb
ftStatus = FT_Open(0,&ftHandle0);
if(ftStatus != FT_OK) {
printf("FT_Open failed");
return;
}
FT_SetBitMode(ftHandle0,15,1);
ftStatus = FT_GetBitMode(ftHandle0, &BitMode);
if (ftStatus == FT_OK) {
printf("BitMode contains - %d",BitMode);
}
else {
printf("FT_GetBitMode FAILED!");
}
ftStatus = FT_Close(ftHandle0);
}
But it returns "FT_GetBitMode FAILED!" instead value of BitMode

FT_GetBitMode returns the instantaneous value of the pins. A single byte will be
returned containing the current values of the pins, both those which are inputs and
those which are outputs.
Source.

Finally I found out whats going wrong. I used incorrect version of ftdi library. The correct version dedicated for x86_64 platform is located here:
Link to FTDI library

configuration of adafruit ultimate gps from raspberry pi does not work

I connected my ultimate gps breakout to my raspberry pi using the USB to TTL serial cable. Using C code I can easily connect to and read NMEA sentences from the GPS. But when I write configuration commands such as PMTK220 to set the update rate, they are ignored. I should get back a PMTK_ACK reporting success or failure, but it is not forthcoming. The problem also occurs when I use terminal windows. ie. I run:
while (true) do cat -A /dev/ttyUSB0 ; done
in one terminal, and get a stream of $GPGGA, $GPGSA, $GPRMC etc messages. In another terminal I run:
echo "$PMTK220,200*2C\r\n" > /dev/ttyUSB0
The NMEA messages continue but there is no PMTK001 coming back. Any ideas?

Ok, here's some code (inspired by gpsd source) that demonstrates sending a configuration message and getting an acknowledgement:
#define _BSD_SOURCE
#include <stdio.h>
#include <stdarg.h>
#include <sys/ioctl.h>
#include <termios.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <semaphore.h>
#include <string.h>
#ifndef CRTSCTS
# ifdef CNEW_RTSCTS
# define CRTSCTS CNEW_RTSCTS
# else
# define CRTSCTS 0
# endif /* CNEW_RTSCTS */
#endif /* !CRTSCTS */
int main (int argc, char **argv) {
int const baudr = B9600, mode = O_RDWR | O_NONBLOCK | O_NOCTTY;
int const fd = open("/dev/ttyUSB0", mode);
assert (fd != -1);
ioctl(fd, (unsigned long)TIOCEXCL);
struct termios ttyOld, ttyNew;
assert(tcgetattr(fd, &ttyOld) != -1);
ttyNew = ttyOld;
ttyNew.c_cflag &= ~(CSIZE | PARENB | PARODD | CRTSCTS | CSTOPB);
ttyNew.c_cflag |= CREAD | CLOCAL | CS8;
ttyNew.c_iflag = ttyNew.c_oflag = 0;
ttyNew.c_lflag = ICANON;
cfsetispeed(&ttyNew, baudr);
cfsetospeed(&ttyNew, baudr);
assert(tcsetattr(fd, TCSANOW, &ttyNew) != -1);
tcflush(fd, TCIOFLUSH);
usleep(200000);
tcflush(fd, TCIOFLUSH);
int const oldfl = fcntl(fd, F_GETFL);
assert (oldfl != -1);
fcntl(fd, F_SETFL, oldfl & ~O_NONBLOCK);
tcdrain(fd);
printf("port opened baudr=%d mode=%d i=%d o=%d c=%d l=%d fl=%d\n", baudr, mode, ttyNew.c_iflag, ttyNew.c_oflag, ttyNew.c_cflag, ttyNew.c_lflag, oldfl);
unsigned char buf[2048];
int count = 0;
while(++count < 20) {
if (count == 4) {
char const *const cmd = "$PMTK220,200*2C\r\n";
int const n = strlen(cmd);
assert(write(fd, cmd, n) == n);
tcdrain(fd);
printf("wrote command %d: %s\n", n, cmd);
}
int const n = read(fd, buf, sizeof(buf));
buf[(n >= sizeof(buf)) ? (sizeof(buf) - 1) : n] = 0;
printf(buf);
}
tcsetattr(fd, TCSANOW, &ttyOld);
close(fd);
}
with results:
pi#pi01 ~/c $ ./test
port opened baudr=13 mode=2306 i=0 o=0 c=3261 l=2 fl=2050
$GPGGA,175748.089,,,,,0,00,,,M,,M,,*71
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPRMC,175748.089,V,,,,,0.00,0.00,260715,,,N*43
wrote command 17: $PMTK220,200*2C
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$PMTK001,220,2*31
$GPGGA,175749.089,,,,,0,00,,,M,,M,,*70
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPGSV,1,1,01,11,,,31*7A
$GPRMC,175749.089,V,,,,,0.00,0.00,260715,,,N*42
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$GPGGA,175750.089,,,,,0,00,,,M,,M,,*78
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPRMC,175750.089,V,,,,,0.00,0.00,260715,,,N*4A
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$GPGGA,175751.089,,,,,0,00,,,M,,M,,*79
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPRMC,175751.089,V,,,,,0.00,0.00,260715,,,N*4B
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$GPGGA,175752.089,,,,,0,00,,,M,,M,,*7A
pi#pi01 ~/c $

Gaussian Elimination in OpenMP - Performance Problems

I'm new to openMP, and I was trying to parallelize a Gaussian Elimination, and I'm having troubles with performance. I'm compiling the code below using:
gcc -o gaussian_elimination gaussian_elimination.c -lm -lgsl -lgslcblas -fopenmp -Wall
And setting the number of threads on the terminal with export OMP_NUM_THREADS
And my problem is that the parallel version of this code is running way slower than the serial version of the same. I believe that this is because I declared #pragma parallel for inside the external loop, and this would force openMP to create and destroy thread at each iteration, which would be incredibly costly, but I haven't seen any other clear way to do the same kind of operation, and I don't think I can exchange the external loop with the internal parallel ones.
I'm probably missing something, but I have not found any other forum threads here commenting on this particular problem. As far as execution correctness goes, my code seems to be functioning alright, the problem is just performance-wise.
Thanks in Advance
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <stdbool.h>
#include <time.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_rng.h>
#define DEBUG_MODE false
int random_matrix(double *A, int N,long long int seed);
int print_matrix(double *A, int N);
int print_vector(float *b,int N);
int main(int argc, char **argv){
int N=1000;
int i,j,k,l,i_p,s,err,D=N+1;
long long int seed=9089123498274; // just a fixed seed only not to bother
double *A,pivot,sw,tmp,begin,end,time_spent;
double *Aref,*bref;
gsl_matrix_view gsl_m;
gsl_vector_view gsl_b;
gsl_vector *gsl_x;
gsl_permutation *gsl_p;
/* Input */
//scanf("%d",&N);
A = (double*)malloc(N*(N+1)*sizeof(double));
if(A==NULL){
printf("Matrix A not allocated\n");
return 1;
}
Aref = (double*)malloc(N*N*sizeof(double));
if(Aref==NULL){
printf("Matrix A not allocated\n");
return 1;
}
bref = (double*)malloc(N*sizeof(double));
if(bref==NULL){
printf("Vector B not allocated\n");
return 2;
}
/*
for(i=0;i<N;i+=1)
for(j=0;j<N;j+=1)
scanf("%f",&(A[i*N+j]));
for(i=0;i<N;i+=1)
scanf("%f",&(b[i]));
*/
/*
for(i=0;i<N*N;i++)
A[i]=(float) a_data[i];
for(i=0;i<N;i+=1)
b[i]=(float) b_data[i]; */
err= random_matrix(A,N,seed);
if(err!=0)
return err;
for(i=0;i<N;i++)
for(j=0;j<N;j+=1)
Aref[i*N+j]= A[i*D+j];
for(i=0;i<N;i+=1)
bref[i]= A[i*D+N];//b[i];
printf("GSL reference:\n");
gsl_m = gsl_matrix_view_array (Aref, N, N);
gsl_b = gsl_vector_view_array (bref, N);
gsl_x = gsl_vector_alloc (N);
gsl_p = gsl_permutation_alloc(N);
begin = clock();
gsl_linalg_LU_decomp(&gsl_m.matrix, gsl_p, &s);
gsl_linalg_LU_solve(&gsl_m.matrix, gsl_p, &gsl_b.vector, gsl_x);
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("gsl matrix solver: %lf s\n",time_spent);
if(DEBUG_MODE==true)
gsl_vector_fprintf(stdout,gsl_x,"%f");
gsl_permutation_free(gsl_p);
gsl_vector_free(gsl_x);
begin = omp_get_wtime();
for(i=0;i<N;i+=1){
i_p = i;
pivot = fabs(A[i*D+i]);
for(j=i;j<N;j+=1)
if(pivot<fabs(A[j*D+i])){
pivot = fabs(A[j*D+i]);
i_p = j;
}
#pragma omp parallel for shared(i,N,A,i_p) private(j,sw)
for(j=i;j<D;j+=1){
sw = A[i*D+j];
A[i*D+j] = A[i_p*D+j];
A[i_p*D+j] = sw;
}
pivot=A[i*D+i];
#pragma omp parallel for shared(i,D,pivot,A) private(j)
for(j=0;j<D;j++)
A[i*D+j]=A[i*D+j]/pivot;
#pragma omp parallel for shared(i,A,N,D) private(tmp,j,k,l)
for(j=i+1;j<N+i;j++){
k=j%N;
tmp=A[k*D+i];
for(l=0;l<D;l+=1)
A[k*D+l]=A[k*D+l]-tmp*A[i*D+l];
}
}
end = omp_get_wtime();
time_spent = (end - begin);
printf("omp matrix solver: %lf s\n",time_spent);
/* Output */
if(DEBUG_MODE==true){
printf("\nCalculated: \n");
for(i=0;i<N;i+=1)
printf("%.6f \n",A[i*(N+1)+N]);
printf("\n");
}
free(A);
return 0;
}
int random_matrix(double *A, int N,long long int seed){
int i,j;
const gsl_rng_type * T;
gsl_rng *r;
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc (T);
for(i=0;i<N;i++)
for(j=0;j<=N;j++)
A[i*(N+1)+j]= gsl_rng_uniform (r);
gsl_rng_free (r);
return 0;
}
int print_matrix(double *A, int N){
int i,j;
for(i=0;i<N;i++)
for(j=0;j<=N+1;j++){
if(j==0 || j==N || j==N+1)
printf(" | ");
printf("%.2f ",A[i*(N+1)+j]);
if(j==N+1)
printf("\n");
}
return 0;
}
int print_vector(float *b,int N){
int i;
for(i=0;i<N;i+=1)
printf("%f\n", b[i]);
return 0;
}
I updated the code above with the omp_get_wtime(), and now it reads as the wtime diminishing as I include more and more threads, so, it does behave as it should, although not as clean as I would like.
For 1000 x 1000 matrices I get 0.25 s for the GSL lib, 4.4 s for the serial omp run and 1.5 s for the 4-thread run.
For 3000 x 3000 matrices, I get ~ 9s for the GSL lib, ~ 117 s for the serial omp run and ~ 44 s for the 4 thread-run, thus at least adding more threads indeed speeds up the program!
Thanks a lot everyone

How to print HFS Volume header

Any one please give code snippet for how to print volume header of HFS+ disk.

I’ve written a small program (based on hfs-183.1) that prints some of the information declared in struct HFSPlusVolumeHeader. The program must be run as root — for instance, via sudo(8):
#include <fcntl.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
#include <hfs/hfs_format.h>
#include <libkern/OSByteOrder.h>
int main(void) {
int fd;
struct stat stat_buf;
struct HFSPlusVolumeHeader vheader;
const char *vname = "/dev/rdisk0s2";
if (lstat(vname, &stat_buf) == -1) {
fprintf(stderr, "Couldn't stat %s\n", vname);
perror(NULL);
exit(1);
}
if ((stat_buf.st_mode & S_IFMT) != S_IFCHR) {
fprintf(stderr, "%s is not a raw char device\n", vname);
perror(NULL);
exit(2);
}
fd = open(vname, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "%s couldn't be opened for reading\n", vname);
perror(NULL);
exit(3);
}
// The volume header starts at offset 1024
if (pread(fd, &vheader, sizeof vheader, 1024) != sizeof vheader) {
fprintf(stderr, "couldn't read %s's volume header\n", vname);
perror(NULL);
exit(4);
}
printf("fileCount = %u\n"
"folderCount = %u\n"
"blockSize = %u\n"
"totalBlocks = %u\n"
"freeBlocks = %u\n",
OSSwapBigToHostInt32(vheader.fileCount),
OSSwapBigToHostInt32(vheader.folderCount),
OSSwapBigToHostInt32(vheader.blockSize),
OSSwapBigToHostInt32(vheader.totalBlocks),
OSSwapBigToHostInt32(vheader.freeBlocks));
close(fd);
return 0;
}
The header file <hfs/hfs_format.h> declares struct HFSPlusVolumeHeader. See this file for the complete list of fields inside a HFS+ volume header.

The system call getattrlist() might give you the information you need.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

running gzip on sigle core in muticore environment under unix - gzip

I have a requirement to use only single core to test gzip performance in multi-core cpu environment(not sure what is the default settings for gzip in this case). Need help to find out the command to execute gzip compression in single core. Thanks

Related

Find CPU times and system times of process in linux

Read status of FT245RL pins

configuration of adafruit ultimate gps from raspberry pi does not work

Gaussian Elimination in OpenMP - Performance Problems

How to print HFS Volume header

Categories

Resources