big data-set: write data to file little by little? or continuously? - line

I have a working prototype-code that creates a .ppm image file from a set of simple parameters.
The purpose is to create a very large picture (something like 100000 * 100000 pixels) and to print a high-resolution wallpaper from it with a large format laser printer.
the problem I have is that my program crashes when height/width are superior to 22000 pixels.
In my program, all the data is stored in a deque while computing, then at the end the deque is exported to the file.
The program creates the data line-by-line, i.e., after each line the deque is filled by a dataset for one line of the picture.
Is it possible to write the deque in the file each time a line has been processed and then clear the deque before starting the next one??
here's my code:
#include <iostream>
#include <fstream>
#include <vector>
#include <deque>
#include <iterator>
#include <cstdlib>
#include <string>
#include <sstream>
using namespace std;
// constant values
double Da=1; //densities
double Db=0.5;
double Dc=0.5;
double Dd=0;
double l = 22000; //width & height
double h = 22000;
//double u = 1; // UNIT
double D = 0; // GAMMA
double E = 0; // ERREUR LOCALE
vector <double> F; // ERREUR DYNAMIQUE
int main ()
{
// vector
deque <int> dequeA;
F.push_back (0);
float a = 0;
float b = 0; // Local variables
double IO = 0; // variable I/O
while (a<l+1, b<h+1){
//values for given a & b
double DL = Da-Da*(b/h)+Dc*(b/h);
double DR = Db-Db*(b/h)+Dd*(b/h);
double D = DL-DL*(a/l)+DR*(a/l); //GAMMA
if (E+0-D<=-0.5) {
dequeA.push_back(1);
IO = 1;
}
else {
dequeA.push_back(0);
IO = 0;
}
E = E+IO-D;
F.push_back(E);
// next pixel & next line
a++;
if (a>l) {
a = 0;
b = b++;
E = 0;
F.clear();
}
}
//export values to .txt file
ofstream output_file("./test.ppm");
// write file header
output_file << "P1\n" << (l+1) << " " << (h+1) << "\n";
//write image data
ostream_iterator<int> output_iterator(output_file, "\n");
copy(dequeA.begin(), dequeA.end(), output_iterator);
}

Related

Need help in checking type of edge/curve

I have created a sample example for the intersection of two circles.
In this example I am able to get the bound faces and the source and target points.
I have manually plotted the source and target points. Refer snapshot for the same two intersecting circles:
I want to find out whether the edges between the source and target points is a line segment, arc or a circle.
I tried to find this in the 2D arrangement documentation but couldn't find it.
Below is the code snippet :
#include <CGAL/Cartesian.h>
#include <CGAL/Exact_rational.h>
#include <CGAL/Arr_circle_segment_traits_2.h>
#include <CGAL/Arrangement_2.h>
typedef CGAL::Cartesian<CGAL::Exact_rational> Kernel;
typedef Kernel::Circle_2 Circle_2;
typedef CGAL::Arr_circle_segment_traits_2<Kernel> Traits_2;
typedef Traits_2::CoordNT CoordNT;
typedef Traits_2::Point_2 Point_2;
typedef Traits_2::Curve_2 Curve_2;
typedef CGAL::Arrangement_2<Traits_2> Arrangement_2;
int main()
{
// Create a circle centered at (0,0) with radius 8.
Kernel::Point_2 c1 = Kernel::Point_2(0, 0);
CGAL::Exact_rational sqr_r1 = CGAL::Exact_rational(64); // = 8*^2
Circle_2 circ1 = Circle_2(c1, sqr_r1, CGAL::CLOCKWISE);
Curve_2 cv1 = Curve_2(circ1);
// Create a circle centered at (10,0) with radius 8.
Kernel::Point_2 c2 = Kernel::Point_2(10, 0);
CGAL::Exact_rational sqr_r2 = CGAL::Exact_rational(64); // = 8*^2
Circle_2 circ2 = Circle_2(c2, sqr_r2, CGAL::CLOCKWISE);
Curve_2 cv2 = Curve_2(circ2);
Arrangement_2 arr;
insert(arr, cv1);
insert(arr, cv2);
for (auto fit = arr.faces_begin(); fit != arr.faces_end(); ++fit)
{
if (fit->is_unbounded())
std::cout << "Unbounded face.\n";
else {
Arrangement_2::Ccb_halfedge_circulator curr, start;
start = curr = fit->outer_ccb();
do {
std::cout << " source --> " << curr->source()->point() << "\n";
std::cout << " target --> " << curr->target()->point() << "\n";
++curr;
} while (curr != start);
std::cout << std::endl;
}
}
return 0;
}

How to link the libsvm library in google colab when executing CUDA? What is the proper linking flag for libsvm?

I am working on google colab and i want to use libsvm library in my project. I downloaded libsvm and installed it. Now when i use !nvcc -o command and run the code using CUDA i am getting errors like,
undefined reference to `svm_get_nr_class
undefined reference to 'svm_predict_probability'
undefined reference to `svm_free_and_destroy_model
I guess the problem is that libsvm is not properly linked, As i use -l with proper flags to compile with nvcc, but i don't know what to use with -l to properly link libsvm and use it.
i downloaded libsvm using
!git clone https://github.com/cjlin1/libsvm
%cd libsvm/
!make && make install
%cd /content/libsvm/python/
!make
import sys
sys.path.append('/content/libsvm/python')
%cd /content
now when i run this program
%%cuda --name Blind_Deblurring_Cuda.cu
#include <iostream>
#include <fstream>
#include <iostream>
#include <fstream>
#include "/content/brisque.h"
#include "/content/libsvm/svm.h"
#include <vector>
#include <stdio.h>
#include "fstream"
#include "iostream"
#include <algorithm>
#include <iterator>
#include <cmath>
#include<stdlib.h>
#include <math.h>
#include <curand.h>
#include <opencv2/core/cuda.hpp>
#include <opencv2/core.hpp>
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include <opencv2/core/core.hpp>
#include <iostream>
#include "opencv2/highgui.hpp"
#include <opencv2/core/utility.hpp>
//rescaling based on training data i libsvm
float rescale_vector[36][2];
using namespace std;
using namespace cv;
float computescore(string imagename);
void ComputeBrisqueFeature(Mat& orig, vector<double>& featurevector);
int read_range_file() {
//check if file exists
char buff[100];
int i;
string range_fname = "allrange";
FILE* range_file = fopen(range_fname.c_str(), "r");
if(range_file == NULL) return 1;
//assume standard file format for this program
fgets(buff, 100, range_file);
fgets(buff, 100, range_file);
//now we can fill the array
for(i = 0; i < 36; ++i) {
float a, b, c;
fscanf(range_file, "%f %f %f", &a, &b, &c);
rescale_vector[i][0] = b;
rescale_vector[i][1] = c;
}
return 0;
}
int main(int argc, char** argv)
{
if(argc < 2) {
cout << "Input Image argument not given." << endl;
return -1;
}
//read in the allrange file to setup internal scaling array
if(read_range_file()) {
cerr<<"unable to open allrange file"<<endl;
return -1;
}
float qualityscore;
qualityscore = computescore(argv[1]);
cout << "Quality Score: " << qualityscore << endl;
}
float computescore(string imagename) {
// pre-loaded vectors from allrange file
float min_[36] = {0.336999 ,0.019667 ,0.230000 ,-0.125959 ,0.000167 ,0.000616 ,0.231000 ,-0.125873 ,0.000165 ,0.000600 ,0.241000 ,-0.128814 ,0.000179 ,0.000386 ,0.243000 ,-0.133080 ,0.000182 ,0.000421 ,0.436998 ,0.016929 ,0.247000 ,-0.200231 ,0.000104 ,0.000834 ,0.257000 ,-0.200017 ,0.000112 ,0.000876 ,0.257000 ,-0.155072 ,0.000112 ,0.000356 ,0.258000 ,-0.154374 ,0.000117 ,0.000351};
float max_[36] = {9.999411, 0.807472, 1.644021, 0.202917, 0.712384, 0.468672, 1.644021, 0.169548, 0.713132, 0.467896, 1.553016, 0.101368, 0.687324, 0.533087, 1.554016, 0.101000, 0.689177, 0.533133, 3.639918, 0.800955, 1.096995, 0.175286, 0.755547, 0.399270, 1.095995, 0.155928, 0.751488, 0.402398, 1.041992, 0.093209, 0.623516, 0.532925, 1.042992, 0.093714, 0.621958, 0.534484};
double qualityscore;
int i;
struct svm_model* model; // create svm model object
Mat orig = imread(imagename, 1); // read image (color mode)
vector<double> brisqueFeatures; // feature vector initialization
ComputeBrisqueFeature(orig, brisqueFeatures); // compute brisque features
// use the pre-trained allmodel file
string modelfile = "allmodel";
//if((model=svm_load_model(modelfile.c_str()))==0) {
//fprintf(stderr,"can't open model file allmodel\n");
// exit(1);
//}
// float min_[37];
// float max_[37];
struct svm_node x[37];
// rescale the brisqueFeatures vector from -1 to 1
// also convert vector to svm node array object
for(i = 0; i < 36; ++i) {
float min = min_[i];
float max = max_[i];
x[i].value = -1 + (2.0/(max - min) * (brisqueFeatures[i] - min));
x[i].index = i + 1;
}
x[36].index = -1;
int nr_class=svm_get_nr_class(model);
double *prob_estimates = (double *) malloc(nr_class*sizeof(double));
// predict quality score using libsvm class
qualityscore = svm_predict_probability(model,x,prob_estimates);
free(prob_estimates);
svm_free_and_destroy_model(&model);
return qualityscore;
}
void ComputeBrisqueFeature(Mat& orig, vector<double>& featurevector)
{
Mat orig_bw_int(orig.size(), CV_64F, 1);
// convert to grayscale
cvtColor(orig, orig_bw_int, COLOR_BGR2GRAY);
// create a copy of original image
Mat orig_bw(orig_bw_int.size(), CV_64FC1, 1);
orig_bw_int.convertTo(orig_bw, 1.0/255);
orig_bw_int.release();
// orig_bw now contains the grayscale image normalized to the range 0,1
int scalenum = 2; // number of times to scale the image
for (int itr_scale = 1; itr_scale<=scalenum; itr_scale++)
{
// resize image
Size dst_size(orig_bw.cols/cv::pow((double)2, itr_scale-1), orig_bw.rows/pow((double)2, itr_scale-1));
Mat imdist_scaled;
resize(orig_bw, imdist_scaled, dst_size, 0, 0, INTER_CUBIC); // INTER_CUBIC
imdist_scaled.convertTo(imdist_scaled, CV_64FC1, 1.0/255.0);
// calculating MSCN coefficients
// compute mu (local mean)
Mat mu(imdist_scaled.size(), CV_64FC1, 1);
GaussianBlur(imdist_scaled, mu, Size(7, 7), 1.166);
Mat mu_sq;
cv::pow(mu, double(2.0), mu_sq);
//compute sigma (local sigma)
Mat sigma(imdist_scaled.size(), CV_64FC1, 1);
cv::multiply(imdist_scaled, imdist_scaled, sigma);
GaussianBlur(sigma, sigma, Size(7, 7), 1.166);
cv::subtract(sigma, mu_sq, sigma);
cv::pow(sigma, double(0.5), sigma);
add(sigma, Scalar(1.0/255), sigma); // to avoid DivideByZero Error
Mat structdis(imdist_scaled.size(), CV_64FC1, 1);
subtract(imdist_scaled, mu, structdis);
divide(structdis, sigma, structdis); // structdis is MSCN image
// Compute AGGD fit to MSCN image
double lsigma_best, rsigma_best, gamma_best;
structdis = AGGDfit(structdis, lsigma_best, rsigma_best, gamma_best);
featurevector.push_back(gamma_best);
featurevector.push_back((lsigma_best*lsigma_best + rsigma_best*rsigma_best)/2);
// Compute paired product images
// indices for orientations (H, V, D1, D2)
int shifts[4][2]={{0,1},{1,0},{1,1},{-1,1}};
for(int itr_shift=1; itr_shift<=4; itr_shift++)
{
// select the shifting index from the 2D array
int* reqshift = shifts[itr_shift-1];
// declare shifted_structdis as pairwise image
Mat shifted_structdis(imdist_scaled.size(), CV_64F, 1);
// create copies of the images using BwImage constructor
// utility constructor for better subscript access (for pixels)
BwImage OrigArr(structdis);
BwImage ShiftArr(shifted_structdis);
// create pair-wise product for the given orientation (reqshift)
for(int i=0; i<structdis.rows; i++)
{
for(int j=0; j<structdis.cols; j++)
{
if(i+reqshift[0]>=0 && i+reqshift[0]<structdis.rows && j+reqshift[1]>=0 && j+reqshift[1]<structdis.cols)
{
ShiftArr[i][j]=OrigArr[i + reqshift[0]][j + reqshift[1]];
}
else
{
ShiftArr[i][j]=0;
}
}
}
// Mat structdis_pairwise;
shifted_structdis = ShiftArr.equate(shifted_structdis);
// calculate the products of the pairs
multiply(structdis, shifted_structdis, shifted_structdis);
// fit the pairwise product to AGGD
shifted_structdis = AGGDfit(shifted_structdis, lsigma_best, rsigma_best, gamma_best);
double constant = sqrt(tgamma(1/gamma_best))/sqrt(tgamma(3/gamma_best));
double meanparam = (rsigma_best-lsigma_best)*(tgamma(2/gamma_best)/tgamma(1/gamma_best))*constant;
// push the calculated parameters from AGGD fit to pair-wise products
featurevector.push_back(gamma_best);
featurevector.push_back(meanparam);
featurevector.push_back(cv::pow(lsigma_best,2));
featurevector.push_back(cv::pow(rsigma_best,2));
}
}
}
// function to compute best fit parameters from AGGDfit
Mat AGGDfit(Mat structdis, double& lsigma_best, double& rsigma_best, double& gamma_best)
{
// create a copy of an image using BwImage constructor (brisque.h - more info)
BwImage ImArr(structdis);
long int poscount=0, negcount=0;
double possqsum=0, negsqsum=0, abssum=0;
for(int i=0;i<structdis.rows;i++)
{
for (int j =0; j<structdis.cols; j++)
{
double pt = ImArr[i][j]; // BwImage provides [][] access
if(pt>0)
{
poscount++;
possqsum += pt*pt;
abssum += pt;
}
else if(pt<0)
{
negcount++;
negsqsum += pt*pt;
abssum -= pt;
}
}
}
lsigma_best = cv::pow(negsqsum/negcount, 0.5);
rsigma_best = cv::pow(possqsum/poscount, 0.5);
double gammahat = lsigma_best/rsigma_best;
long int totalcount = (structdis.cols)*(structdis.rows);
double rhat = cv::pow(abssum/totalcount, static_cast<double>(2))/((negsqsum + possqsum)/totalcount);
double rhatnorm = rhat*(cv::pow(gammahat,3) +1)*(gammahat+1)/pow(pow(gammahat,2)+1,2);
double prevgamma = 0;
double prevdiff = 1e10;
float sampling = 0.001;
for (float gam=0.2; gam<10; gam+=sampling) //possible to coarsen sampling to quicken the code, with some loss of accuracy
{
double r_gam = tgamma(2/gam)*tgamma(2/gam)/(tgamma(1/gam)*tgamma(3/gam));
double diff = abs(r_gam-rhatnorm);
if(diff> prevdiff) break;
prevdiff = diff;
prevgamma = gam;
}
gamma_best = prevgamma;
return structdis.clone();
}
And then try to compile using
!nvcc -o /content/src/Blind_Deblurring_Cuda /content/src/Blind_Deblurring_Cuda.cu -lopencv_core -lopencv_imgcodecs -lopencv_imgproc -lopencv_highgui -lopencv_ml
It gives the following error
/tmp/tmpxft_00003d8d_00000000-10_Blind_Deblurring_Cuda.o: In function `computescore(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)':
tmpxft_00003d8d_00000000-5_Blind_Deblurring_Cuda.cudafe1.cpp:(.text+0x9bc): undefined reference to `svm_get_nr_class'
tmpxft_00003d8d_00000000-5_Blind_Deblurring_Cuda.cudafe1.cpp:(.text+0x9fd): undefined reference to `svm_predict_probability'
tmpxft_00003d8d_00000000-5_Blind_Deblurring_Cuda.cudafe1.cpp:(.text+0xa27): undefined reference to `svm_free_and_destroy_model'
collect2: error: ld returned 1 exit status

uninitialized local variable with cin use

I am working on this code (in c++) and I finished but i have 2 errors on line 19 when I use them in for loops about variables y and m, saying that they are uninitialized local variables. I don't see how this is possible because I declared them at the beginning as int and their value is assigned when the user inputs in cin.
#include <iostream>
#include <string>
#include <cmath>
#include <math.h>
#include <vector>
using namespace std;
int main()
{
int a, b, n, l = 0;
cin >> a, b, n;
for (int i = 0; i < 20; i++)
{
for (int j = 0; j < 20; j++)
{
if (l < (i*a + j*b) && (i*a + j*b) <= n)
l = i*a + j*b;
}
}
cout << l;
return 0;
}
I'm not in a position to test this, but Multiple inputs on one line suggests that your syntax should be
cin >> a >> b >> c;
Regardless, I think the compiler is suggesting that assignment to all variables isn't guaranteed by cin so without explicit initialisation when they're declared you're assuming too much.

How to set color of the zone between two tubes in VTK

I have created two tubes along the same line, and my next step is to set color of the zone between the two tubes, but I have no idea how to do it. Any help? Thank you in advance! My code is shown below.
#include <vtkSmartPointer.h>
#include <vtkVersion.h>
#include <vtkParametricFunctionSource.h>
#include <vtkTupleInterpolator.h>
#include <vtkTubeFilter.h>
#include <vtkParametricSpline.h>
#include <vtkDoubleArray.h>
#include <vtkPoints.h>
#include <vtkPolyData.h>
#include <vtkPointData.h>
#include <vtkPolyDataMapper.h>
#include <vtkActor.h>
#include <vtkRenderWindow.h>
#include <vtkRenderer.h>
#include <vtkRenderWindowInteractor.h>
vtkSmartPointer<vtkActor> createTube(double rad1,double rad2,double rad3,double rad4,double rad5,double rad6)
{
vtkSmartPointer<vtkPoints> points =
vtkSmartPointer<vtkPoints>::New();
points->InsertPoint(0,1,0,0);
points->InsertPoint(1,2,0.3,0);
points->InsertPoint(2,3,0.1,0);
points->InsertPoint(3,4,0.2,0);
points->InsertPoint(4,5,0.5,0);
points->InsertPoint(5,6,0.4,0);
// Fit a spline to the points
vtkSmartPointer<vtkParametricSpline> spline =
vtkSmartPointer<vtkParametricSpline>::New();
spline->SetPoints(points);
vtkSmartPointer<vtkParametricFunctionSource> functionSource =
vtkSmartPointer<vtkParametricFunctionSource>::New();
functionSource->SetParametricFunction(spline);
functionSource->SetUResolution(10 * points->GetNumberOfPoints());
functionSource->Update();
// Interpolate the scalars
//double rad;
vtkSmartPointer<vtkTupleInterpolator> interpolatedRadius =
vtkSmartPointer<vtkTupleInterpolator> ::New();
interpolatedRadius->SetInterpolationTypeToLinear();
interpolatedRadius->SetNumberOfComponents(1);
interpolatedRadius->AddTuple(0,&rad1);
interpolatedRadius->AddTuple(1,&rad2);
interpolatedRadius->AddTuple(2,&rad3);
interpolatedRadius->AddTuple(3,&rad4);
interpolatedRadius->AddTuple(4,&rad5);
interpolatedRadius->AddTuple(5,&rad6);
// Generate the radius scalars
vtkSmartPointer<vtkDoubleArray> tubeRadius =
vtkSmartPointer<vtkDoubleArray>::New();
unsigned int n = functionSource->GetOutput()->GetNumberOfPoints();
tubeRadius->SetNumberOfTuples(n);
tubeRadius->SetName("TubeRadius");
double tMin = interpolatedRadius->GetMinimumT();
double tMax = interpolatedRadius->GetMaximumT();
double r;
for (unsigned int i = 0; i < n; ++i)
{
double t = (tMax - tMin) / (n - 1) * i + tMin;
interpolatedRadius->InterpolateTuple(t, &r);
tubeRadius->SetTuple1(i, r);
}
// Add the scalars to the polydata
vtkSmartPointer<vtkPolyData> tubePolyData =
vtkSmartPointer<vtkPolyData>::New();
tubePolyData = functionSource->GetOutput();
tubePolyData->GetPointData()->AddArray(tubeRadius);
tubePolyData->GetPointData()->SetActiveScalars("TubeRadius");
// Create the tubes
vtkSmartPointer<vtkTubeFilter> tuber =
vtkSmartPointer<vtkTubeFilter>::New();
tuber->SetInput(tubePolyData);
tuber->SetNumberOfSides(20);
tuber->SetVaryRadiusToVaryRadiusByAbsoluteScalar();
vtkSmartPointer<vtkPolyDataMapper> tubeMapper =
vtkSmartPointer<vtkPolyDataMapper>::New();
tubeMapper->SetInputConnection(tuber->GetOutputPort());
tubeMapper->SetScalarRange(tubePolyData->GetScalarRange());
//vtkSmartPointer<vtkActor> lineActor = vtkSmartPointer<vtkActor>::New();
//lineActor->SetMapper(lineMapper);
vtkSmartPointer<vtkActor> tubeActor = vtkSmartPointer<vtkActor>::New();
tubeActor->SetMapper(tubeMapper);
return tubeActor;
}
int main()
{
vtkSmartPointer<vtkActor> tubeActor_1 = vtkSmartPointer<vtkActor>::New();
tubeActor_1= createTube(0.15,0.14,0.13,0.12,0.11,0.10);
vtkSmartPointer<vtkActor> tubeActor_2 = vtkSmartPointer<vtkActor>::New();
tubeActor_2= createTube(0.5,0.155,0.145,0.145,0.125,0.115);
//tubeActor_2->GetProperty()->SetOpacity(0.25);
// Setup render window, renderer, and interactor
vtkSmartPointer<vtkRenderer> renderer =
vtkSmartPointer<vtkRenderer>::New();
vtkSmartPointer<vtkRenderWindow> renderWindow =
vtkSmartPointer<vtkRenderWindow>::New();
renderWindow->AddRenderer(renderer);
vtkSmartPointer<vtkRenderWindowInteractor> renderWindowInteractor =
vtkSmartPointer<vtkRenderWindowInteractor>::New();
renderWindowInteractor->SetRenderWindow(renderWindow);
renderer->AddActor(tubeActor_1);
renderer->AddActor(tubeActor_2);
renderer->SetBackground(1, 1, 1);
renderWindow->Render();
renderWindowInteractor->Start();
return 0;
}

OpenCL, simple vector addition but wrong output for large input

So, after spending hours reading and understanding I have finally made my first OpenCL program that actually does something, which is it adds two vectors and outputs to a file.
#include <iostream>
#include <vector>
#include <cstdlib>
#include <string>
#include <fstream>
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
int main(int argc, char *argv[])
{
try
{
// get platforms, devices and display their info.
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
std::vector<cl::Platform>::iterator i=platforms.begin();
std::cout<<"OpenCL \tPlatform : "<<i->getInfo<CL_PLATFORM_NAME>()<<std::endl;
std::cout<<"\tVendor: "<<i->getInfo<CL_PLATFORM_VENDOR>()<<std::endl;
std::cout<<"\tVersion : "<<i->getInfo<CL_PLATFORM_VERSION>()<<std::endl;
std::cout<<"\tExtensions : "<<i->getInfo<CL_PLATFORM_EXTENSIONS>()<<std::endl;
// get devices
std::vector<cl::Device> devices;
i->getDevices(CL_DEVICE_TYPE_ALL,&devices);
int o=99;
std::cout<<"\n\n";
// iterate over available devices
for(std::vector<cl::Device>::iterator j=devices.begin(); j!=devices.end(); j++)
{
std::cout<<"\tOpenCL\tDevice : " << j->getInfo<CL_DEVICE_NAME>()<<std::endl;
std::cout<<"\t\t Type : " << j->getInfo<CL_DEVICE_TYPE>()<<std::endl;
std::cout<<"\t\t Vendor : " << j->getInfo<CL_DEVICE_VENDOR>()<<std::endl;
std::cout<<"\t\t Driver : " << j->getInfo<CL_DRIVER_VERSION>()<<std::endl;
std::cout<<"\t\t Global Mem : " << j->getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()/(1024*1024)<<" MBytes"<<std::endl;
std::cout<<"\t\t Local Mem : " << j->getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()/1024<<" KBbytes"<<std::endl;
std::cout<<"\t\t Compute Unit : " << j->getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>()<<std::endl;
std::cout<<"\t\t Clock Rate : " << j->getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>()<<" MHz"<<std::endl;
}
std::cout<<"\n\n\n";
//MAIN CODE BEGINS HERE
//get Kernel
std::ifstream ifs("vector_add_kernel.cl");
std::string kernelSource((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
std::cout<<kernelSource;
//Create context, select device and command queue.
cl::Context context(devices);
cl::Device &device=devices.front();
cl::CommandQueue cmdqueue(context,device);
// Generate Source vector and push the kernel source in it.
cl::Program::Sources sourceCode;
sourceCode.push_back(std::make_pair(kernelSource.c_str(), kernelSource.size()));
//Generate program using sourceCode
cl::Program program=cl::Program(context, sourceCode);
//Build program..
try
{
program.build(devices);
}
catch(cl::Error &err)
{
std::cerr<<"Building failed, "<<err.what()<<"("<<err.err()<<")"
<<"\nRetrieving build log"
<<"\n Build Log Follows \n"
<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices.front());
}
//Declare and initialize vectors
std::vector<cl_float>B(993448,1.3);
std::vector<cl_float>C(993448,1.3);
std::vector<cl_float>A(993448,1.3);
cl_int N=A.size();
//Declare and intialize proper work group size and global size. Global size raised to the nearest multiple of workGroupSize.
int workGroupSize=128;
int GlobalSize;
if(N%workGroupSize) GlobalSize=N - N%workGroupSize + workGroupSize;
else GlobalSize=N;
//Declare buffers.
cl::Buffer vecA(context, CL_MEM_READ_WRITE, sizeof(cl_float)*N);
cl::Buffer vecB(context, CL_MEM_READ_ONLY , (B.size())*sizeof(cl_float));
cl::Buffer vecC(context, CL_MEM_READ_ONLY , (C.size())*sizeof(cl_float));
//Write vectors into buffers
cmdqueue.enqueueWriteBuffer(vecB, 0, 0, (B.size())*sizeof(cl_float), &B[0] );
cmdqueue.enqueueWriteBuffer(vecB, 0, 0, (C.size())*sizeof(cl_float), &C[0] );
//Executing kernel
cl::Kernel kernel(program, "vector_add");
cl::KernelFunctor kernel_func=kernel.bind(cmdqueue, cl::NDRange(GlobalSize), cl::NDRange(workGroupSize));
kernel_func(vecA, vecB, vecC, N);
//Reading back values into vector A
cmdqueue.enqueueReadBuffer(vecA,true,0,N*sizeof(cl_float), &A[0]);
cmdqueue.finish();
//Saving into file.
std::ofstream output("vectorAdd.txt");
for(int i=0;i<N;i++) output<<A[i]<<"\n";
}
catch(cl::Error& err)
{
std::cerr << "OpenCL error: " << err.what() << "(" << err.err() <<
")" << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
The problem is, for smaller values of N, I'm getting the correct result that is 2.6
But for larger values, like the one in the code above (993448) I get garbage output varying between 1 and 2.4.
Here is the Kernel code :
__kernel void vector_add(__global float *A, __global float *B, __global float *C, int N) {
// Get the index of the current element
int i = get_global_id(0);
//Do the operation
if(i<N) A[i] = C[i] + B[i];
}
UPDATE : Ok it seems the code is working now. I have fixed a few minor mistakes in my code above
1) The part where GlobalSize is initialized has been fixed.
2)Stupid mistake in enqueueWriteBuffer (wrong parameters given)
It is now outputting the correct result for large values of N.
Try to change the data type from float to double etc.