Sorry for my ignorance but I am very new in FTDI chip Linux software development.
I have module based on FT245RL chip, programmed to be 4 port output (relays) and 4 port opto isolated input unit.
I found out in Internet program in C to turn on/off relays connected to outputs D0 to D3. After compiling it works properly. Below draft of this working program:
/* switch4.c
* # gcc -o switch4 switch4.c -L. -lftd2xx -Wl,-rpath,/usr/local/lib
* Usage
* # switch4 [0-15], for example # switch4 1
* */
#include <stdio.h>
#include <stdlib.h>
#include "./ftd2xx.h"
int main(int argc, char *argv[])
{
FT_STATUS ftStatus;
FT_HANDLE ftHandle0;
int parametr;
LPVOID pkod;
DWORD nBufferSize = 0x0001;
DWORD dwBytesWritten;
if(argc > 1) {
sscanf(argv[1], "%d", ¶metr);
}
else {
parametr = 0;
}
FT_SetVIDPID(0x5555,0x0001); // id from lsusb
FT_Open(0,&ftHandle0);
FT_SetBitMode(ftHandle0,15,1);
pkod=¶metr;
ftStatus = FT_Write(ftHandle0,pkod,nBufferSize,&dwBytesWritten);
ftStatus = FT_Close(ftHandle0);
}
My question is. How can I read in the same program, status of D4 to D7 pins, programmed as inputs? I mean about "printf" to stdout the number representing status (zero or one) of input pins (or all input/output pins).
Can anybody help newbie ?
UPDATE-1
This is my program with FT_GetBitMode
// # gcc -o read5 read5.c -L. -lftd2xx -Wl,-rpath,/usr/local/lib
#include <stdio.h>
#include <stdlib.h>
#include "./ftd2xx.h"
int main(int argc, char *argv[])
{
FT_STATUS ftStatus;
FT_HANDLE ftHandle0;
UCHAR BitMode;
FT_SetVIDPID(0x5555,0x0001); // id from lsusb
ftStatus = FT_Open(0,&ftHandle0);
if(ftStatus != FT_OK) {
printf("FT_Open failed");
return;
}
FT_SetBitMode(ftHandle0,15,1);
ftStatus = FT_GetBitMode(ftHandle0, &BitMode);
if (ftStatus == FT_OK) {
printf("BitMode contains - %d",BitMode);
}
else {
printf("FT_GetBitMode FAILED!");
}
ftStatus = FT_Close(ftHandle0);
}
But it returns "FT_GetBitMode FAILED!" instead value of BitMode
FT_GetBitMode returns the instantaneous value of the pins. A single byte will be
returned containing the current values of the pins, both those which are inputs and
those which are outputs.
Source.
Finally I found out whats going wrong. I used incorrect version of ftdi library. The correct version dedicated for x86_64 platform is located here:
Link to FTDI library
I connected my ultimate gps breakout to my raspberry pi using the USB to TTL serial cable. Using C code I can easily connect to and read NMEA sentences from the GPS. But when I write configuration commands such as PMTK220 to set the update rate, they are ignored. I should get back a PMTK_ACK reporting success or failure, but it is not forthcoming. The problem also occurs when I use terminal windows. ie. I run:
while (true) do cat -A /dev/ttyUSB0 ; done
in one terminal, and get a stream of $GPGGA, $GPGSA, $GPRMC etc messages. In another terminal I run:
echo "$PMTK220,200*2C\r\n" > /dev/ttyUSB0
The NMEA messages continue but there is no PMTK001 coming back. Any ideas?
Ok, here's some code (inspired by gpsd source) that demonstrates sending a configuration message and getting an acknowledgement:
#define _BSD_SOURCE
#include <stdio.h>
#include <stdarg.h>
#include <sys/ioctl.h>
#include <termios.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <semaphore.h>
#include <string.h>
#ifndef CRTSCTS
# ifdef CNEW_RTSCTS
# define CRTSCTS CNEW_RTSCTS
# else
# define CRTSCTS 0
# endif /* CNEW_RTSCTS */
#endif /* !CRTSCTS */
int main (int argc, char **argv) {
int const baudr = B9600, mode = O_RDWR | O_NONBLOCK | O_NOCTTY;
int const fd = open("/dev/ttyUSB0", mode);
assert (fd != -1);
ioctl(fd, (unsigned long)TIOCEXCL);
struct termios ttyOld, ttyNew;
assert(tcgetattr(fd, &ttyOld) != -1);
ttyNew = ttyOld;
ttyNew.c_cflag &= ~(CSIZE | PARENB | PARODD | CRTSCTS | CSTOPB);
ttyNew.c_cflag |= CREAD | CLOCAL | CS8;
ttyNew.c_iflag = ttyNew.c_oflag = 0;
ttyNew.c_lflag = ICANON;
cfsetispeed(&ttyNew, baudr);
cfsetospeed(&ttyNew, baudr);
assert(tcsetattr(fd, TCSANOW, &ttyNew) != -1);
tcflush(fd, TCIOFLUSH);
usleep(200000);
tcflush(fd, TCIOFLUSH);
int const oldfl = fcntl(fd, F_GETFL);
assert (oldfl != -1);
fcntl(fd, F_SETFL, oldfl & ~O_NONBLOCK);
tcdrain(fd);
printf("port opened baudr=%d mode=%d i=%d o=%d c=%d l=%d fl=%d\n", baudr, mode, ttyNew.c_iflag, ttyNew.c_oflag, ttyNew.c_cflag, ttyNew.c_lflag, oldfl);
unsigned char buf[2048];
int count = 0;
while(++count < 20) {
if (count == 4) {
char const *const cmd = "$PMTK220,200*2C\r\n";
int const n = strlen(cmd);
assert(write(fd, cmd, n) == n);
tcdrain(fd);
printf("wrote command %d: %s\n", n, cmd);
}
int const n = read(fd, buf, sizeof(buf));
buf[(n >= sizeof(buf)) ? (sizeof(buf) - 1) : n] = 0;
printf(buf);
}
tcsetattr(fd, TCSANOW, &ttyOld);
close(fd);
}
with results:
pi#pi01 ~/c $ ./test
port opened baudr=13 mode=2306 i=0 o=0 c=3261 l=2 fl=2050
$GPGGA,175748.089,,,,,0,00,,,M,,M,,*71
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPRMC,175748.089,V,,,,,0.00,0.00,260715,,,N*43
wrote command 17: $PMTK220,200*2C
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$PMTK001,220,2*31
$GPGGA,175749.089,,,,,0,00,,,M,,M,,*70
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPGSV,1,1,01,11,,,31*7A
$GPRMC,175749.089,V,,,,,0.00,0.00,260715,,,N*42
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$GPGGA,175750.089,,,,,0,00,,,M,,M,,*78
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPRMC,175750.089,V,,,,,0.00,0.00,260715,,,N*4A
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$GPGGA,175751.089,,,,,0,00,,,M,,M,,*79
$GPGSA,A,1,,,,,,,,,,,,,,,*1E
$GPRMC,175751.089,V,,,,,0.00,0.00,260715,,,N*4B
$GPVTG,0.00,T,,M,0.00,N,0.00,K,N*32
$GPGGA,175752.089,,,,,0,00,,,M,,M,,*7A
pi#pi01 ~/c $
I'm new to openMP, and I was trying to parallelize a Gaussian Elimination, and I'm having troubles with performance. I'm compiling the code below using:
gcc -o gaussian_elimination gaussian_elimination.c -lm -lgsl -lgslcblas -fopenmp -Wall
And setting the number of threads on the terminal with export OMP_NUM_THREADS
And my problem is that the parallel version of this code is running way slower than the serial version of the same. I believe that this is because I declared #pragma parallel for inside the external loop, and this would force openMP to create and destroy thread at each iteration, which would be incredibly costly, but I haven't seen any other clear way to do the same kind of operation, and I don't think I can exchange the external loop with the internal parallel ones.
I'm probably missing something, but I have not found any other forum threads here commenting on this particular problem. As far as execution correctness goes, my code seems to be functioning alright, the problem is just performance-wise.
Thanks in Advance
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <stdbool.h>
#include <time.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_rng.h>
#define DEBUG_MODE false
int random_matrix(double *A, int N,long long int seed);
int print_matrix(double *A, int N);
int print_vector(float *b,int N);
int main(int argc, char **argv){
int N=1000;
int i,j,k,l,i_p,s,err,D=N+1;
long long int seed=9089123498274; // just a fixed seed only not to bother
double *A,pivot,sw,tmp,begin,end,time_spent;
double *Aref,*bref;
gsl_matrix_view gsl_m;
gsl_vector_view gsl_b;
gsl_vector *gsl_x;
gsl_permutation *gsl_p;
/* Input */
//scanf("%d",&N);
A = (double*)malloc(N*(N+1)*sizeof(double));
if(A==NULL){
printf("Matrix A not allocated\n");
return 1;
}
Aref = (double*)malloc(N*N*sizeof(double));
if(Aref==NULL){
printf("Matrix A not allocated\n");
return 1;
}
bref = (double*)malloc(N*sizeof(double));
if(bref==NULL){
printf("Vector B not allocated\n");
return 2;
}
/*
for(i=0;i<N;i+=1)
for(j=0;j<N;j+=1)
scanf("%f",&(A[i*N+j]));
for(i=0;i<N;i+=1)
scanf("%f",&(b[i]));
*/
/*
for(i=0;i<N*N;i++)
A[i]=(float) a_data[i];
for(i=0;i<N;i+=1)
b[i]=(float) b_data[i]; */
err= random_matrix(A,N,seed);
if(err!=0)
return err;
for(i=0;i<N;i++)
for(j=0;j<N;j+=1)
Aref[i*N+j]= A[i*D+j];
for(i=0;i<N;i+=1)
bref[i]= A[i*D+N];//b[i];
printf("GSL reference:\n");
gsl_m = gsl_matrix_view_array (Aref, N, N);
gsl_b = gsl_vector_view_array (bref, N);
gsl_x = gsl_vector_alloc (N);
gsl_p = gsl_permutation_alloc(N);
begin = clock();
gsl_linalg_LU_decomp(&gsl_m.matrix, gsl_p, &s);
gsl_linalg_LU_solve(&gsl_m.matrix, gsl_p, &gsl_b.vector, gsl_x);
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("gsl matrix solver: %lf s\n",time_spent);
if(DEBUG_MODE==true)
gsl_vector_fprintf(stdout,gsl_x,"%f");
gsl_permutation_free(gsl_p);
gsl_vector_free(gsl_x);
begin = omp_get_wtime();
for(i=0;i<N;i+=1){
i_p = i;
pivot = fabs(A[i*D+i]);
for(j=i;j<N;j+=1)
if(pivot<fabs(A[j*D+i])){
pivot = fabs(A[j*D+i]);
i_p = j;
}
#pragma omp parallel for shared(i,N,A,i_p) private(j,sw)
for(j=i;j<D;j+=1){
sw = A[i*D+j];
A[i*D+j] = A[i_p*D+j];
A[i_p*D+j] = sw;
}
pivot=A[i*D+i];
#pragma omp parallel for shared(i,D,pivot,A) private(j)
for(j=0;j<D;j++)
A[i*D+j]=A[i*D+j]/pivot;
#pragma omp parallel for shared(i,A,N,D) private(tmp,j,k,l)
for(j=i+1;j<N+i;j++){
k=j%N;
tmp=A[k*D+i];
for(l=0;l<D;l+=1)
A[k*D+l]=A[k*D+l]-tmp*A[i*D+l];
}
}
end = omp_get_wtime();
time_spent = (end - begin);
printf("omp matrix solver: %lf s\n",time_spent);
/* Output */
if(DEBUG_MODE==true){
printf("\nCalculated: \n");
for(i=0;i<N;i+=1)
printf("%.6f \n",A[i*(N+1)+N]);
printf("\n");
}
free(A);
return 0;
}
int random_matrix(double *A, int N,long long int seed){
int i,j;
const gsl_rng_type * T;
gsl_rng *r;
gsl_rng_env_setup();
T = gsl_rng_default;
r = gsl_rng_alloc (T);
for(i=0;i<N;i++)
for(j=0;j<=N;j++)
A[i*(N+1)+j]= gsl_rng_uniform (r);
gsl_rng_free (r);
return 0;
}
int print_matrix(double *A, int N){
int i,j;
for(i=0;i<N;i++)
for(j=0;j<=N+1;j++){
if(j==0 || j==N || j==N+1)
printf(" | ");
printf("%.2f ",A[i*(N+1)+j]);
if(j==N+1)
printf("\n");
}
return 0;
}
int print_vector(float *b,int N){
int i;
for(i=0;i<N;i+=1)
printf("%f\n", b[i]);
return 0;
}
I updated the code above with the omp_get_wtime(), and now it reads as the wtime diminishing as I include more and more threads, so, it does behave as it should, although not as clean as I would like.
For 1000 x 1000 matrices I get 0.25 s for the GSL lib, 4.4 s for the serial omp run and 1.5 s for the 4-thread run.
For 3000 x 3000 matrices, I get ~ 9s for the GSL lib, ~ 117 s for the serial omp run and ~ 44 s for the 4 thread-run, thus at least adding more threads indeed speeds up the program!
Thanks a lot everyone