How to change the CMAKE of cuda static library from debug to release? - cmake

I'm a new to use CMAKE. I want to use the kernels in cuda as a static library and use " extern "C" void function();" to call it. Finally I will use cmake to compile the whole project. But its running speed in GPU didn't satisfied me. So I used Nsight eclispe to run it severally in debug and release. After analyzing them in NVVP.I found the default mode of cmake in static library is debug mode.
So how can I change the debug mode to release in static library?
Firstly, I create a project in Nsight eclipse.
Below is the example of the file structure that I have.
Test_in_stack
-release
-debug
-src
--GPU.cu
--simpleCUFFT.cu
-lib
--GPU.cuh
--Kernels.h
The content of src/simpleCUFFT.cu is:
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
// #include <Kernels.h>
#include <GPU.cuh>
#include <cuda_runtime.h>
#include <cufft.h>
#include <cufftXt.h>
#include <helper_functions.h>
#include <helper_cuda.h>
#include <device_functions.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/generate.h>
#include <thrust/sequence.h>
#include <thrust/device_ptr.h>
#include <thrust/extrema.h>
#include <thrust/execution_policy.h>
#include <thrust/equal.h>
#include <thrust/for_each.h>
// Complex data type
typedef float2 Complex;
#define FFT_NUM 1024
#define RANGE_NUM 1024
#define SIGNAL_SIZE RANGE_NUM*FFT_NUM
extern "C" void GPU_Pro(Complex *h_signal,int *h_count);
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main()
{
Complex *h_signal = (Complex *)malloc(sizeof(Complex) * SIGNAL_SIZE);
int *h_count = (int *)malloc(sizeof(int) * SIGNAL_SIZE);
// Initialize the memory for the signal
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i)
{
h_signal[i].x = rand() / (float)RAND_MAX;
h_signal[i].y = rand() / (float)RAND_MAX;
h_count[i]=i/FFT_NUM;
}
GPU_Pro(h_signal,h_count);
cudaDeviceReset();
}
The content of src/GPU.cu is:
#include <Kernels.h>
#include <GPU.cuh>
#include <cuda_runtime.h>
#include <cufft.h>
#include <cufftXt.h>
#include <helper_functions.h>
#include <helper_cuda.h>
#include <device_functions.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/generate.h>
#include <thrust/sequence.h>
#include <thrust/device_ptr.h>
#include <thrust/extrema.h>
#include <thrust/execution_policy.h>
#include <thrust/equal.h>
#include <thrust/for_each.h>
typedef float2 Complex;
#define FFT_NUM 1024
#define RANGE_NUM 1024
#define SIGNAL_SIZE RANGE_NUM*FFT_NUM
void GPU_Pro(Complex *h_signal,int *h_count)
{
Complex *d_signal;
float *d_signal_float;
int *d_count;
cudaMalloc((void **)&d_signal, SIGNAL_SIZE*sizeof(Complex));
cudaMalloc((void **)&d_count, SIGNAL_SIZE*sizeof(int));
cudaMalloc((void **)&d_signal_float, SIGNAL_SIZE*sizeof(float));
cufftHandle plan;
checkCudaErrors(cufftPlan1d(&plan, FFT_NUM, CUFFT_C2C, 1));
dim3 dimblock(32, 32);
dim3 dimgrid(FFT_NUM / 32, RANGE_NUM / 32);
// Copy host memory to device
checkCudaErrors(cudaMemcpy(d_signal, h_signal, SIGNAL_SIZE*sizeof(Complex),
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(d_count, h_count, SIGNAL_SIZE*sizeof(int),
cudaMemcpyHostToDevice));
for(int i=0;i<RANGE_NUM;i++)
{
checkCudaErrors(cufftExecC2C(plan, d_signal+i*RANGE_NUM, d_signal+i*RANGE_NUM, CUFFT_FORWARD));
}
MatAbsNaive_float<<<dimgrid,dimblock>>>(d_signal,d_signal_float,FFT_NUM,RANGE_NUM);
thrust::stable_sort_by_key(thrust::device_pointer_cast(d_signal_float),thrust::device_pointer_cast(d_signal_float)+SIGNAL_SIZE,thrust::device_pointer_cast(d_count));
thrust::stable_sort_by_key(thrust::device_pointer_cast(d_count),thrust::device_pointer_cast(d_count)+SIGNAL_SIZE,thrust::device_pointer_cast(d_signal_float));
cudaDeviceReset();
}
The content of lib/Kernels.h is:
/*
* Kernels.h
*
* Created on: Jan 10, 2019
* Author: root
*/
#include "iostream"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "stdio.h"
#include <stdlib.h>
#include <string.h>
#include "math.h"
#include <mat.h>
#include "cuComplex.h"
#include "cublas.h"
#include <cufft.h>
#include <cufftXt.h>
#include <time.h>
#include <cublas_v2.h>
__global__ void MatAbsNaive_float(cuComplex *idata, float *odata, int M, int N)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < M) && (y < N))
{
odata[x + M*y] = sqrt(idata[x + M*y].x * idata[x + M*y].x + idata[x + M*y].y * idata[x + M*y].y);
}
}
The content of lib/GPU.cuh is:
#ifndef GPU_CUH
#define GPU_CUH
#include <stdio.h>
#include "cuComplex.h"
typedef float2 Complex;
extern "C"
void GPU_Pro(Complex *h_signal,int *h_count);
#endif
The result of NVVP in debug and release are as follows:
debug
release
Then I put the same files in cmake.
Below is the example of the file structure that I have.
Test_in_stack
-CMakeLists(1).txt
-build
-src
--CMakeLists(2).txt
--simpleCUFFT.cpp
-lib
--CMakeLists(3).txt
--GPU.cu
--GPU.cuh
--Kernels.h
The (1)、(2)、(3) are labels, the real file names are both CMakeLists.txt. And the content of simpleCUFFT.cu and simpleCUFFT.cpp are the same.
The content of CMakeLists(1).txt is:
cmake_minimum_required (VERSION 2.6)
PROJECT(GPU_MODE C CXX)
#PROJECT(GPU_MODE)
ADD_SUBDIRECTORY(src bin)
ADD_SUBDIRECTORY(lib)
The content of CMakeLists(2).txt is:
INCLUDE_DIRECTORIES(
${eclipse_home}VSPS/include
/usr/include
${eclipse_home}PetDCPS/include
/user/include/c++
/usr/local/cuda-8.0/include
)
INCLUDE_DIRECTORIES(/root/Chenjie/cuda-workspace/Test_in_stack/lib
/usr/local/cuda-8.0/samples/common/inc
/usr/local/cuda-8.0/include)
LINK_DIRECTORIES(/usr/local/cuda-8.0/lib64/)
SET(CPU_LIST simpleCUFFT.cpp)
FIND_PACKAGE(CUDA REQUIRED)
SET(EXTRA_LIBS ${EXTRA_LIBS} gpu ${CUDA_LIBRARIES})
ADD_EXECUTABLE(CPUProcessTest ${CPU_LIST})
SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/src)
TARGET_LINK_LIBRARIES(CPUProcessTest optimized ${EXTRA_LIBS} vsip_c)
The content of CMakeLists(3).txt is:
#for cuda
PROJECT(gpu)
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
INCLUDE_DIRECTORIES(/root/Chenjie/cuda-workspace/Test_in_stack/lib
/usr/local/cuda-8.0/samples/common/inc
/usr/local/cuda-8.0/include)
FIND_PACKAGE(CUDA QUIET REQUIRED)
#SET(CUDA_NVCC_FLAGS -03;-G;-g)
SET(CUDA_NVCC_FLAGS -gencode arch=compute_52,code=sm_52;-G;-g;-lcufft;-lcudart;-lcublas)
SET(CMAKE_CUDA_FLAGS ${CUDA_NVCC_FLAGS_RELEASE})
FILE(GLOB_RECURSE CURRENT_HEADERS *.h *.hpp *.cuh)
FILE(GLOB CURRENT_SOURCES *.cpp *.cu)
SOURCE_GROUP("Include" FILES ${CURRENT_HEADERS})
SOURCE_GROUP("Source" FILES ${CURRENT_SOURCES})
INCLUDE_DIRECTORIES(/usr/local/cuda-8.0/include)
LINK_DIRECTORIES(/usr/local/cuda-8.0/lib64/)
LINK_LIBRARIES(cufft cublas)
#TARGET_LINK_LIBRARIES(gpu ${CUDA_LIBRARIES})
#CUDA_ADD_LIBRARY(gpu SHARED ${CURRENT_HEADERS} ${CURRENT_SOURCES})
CUDA_ADD_LIBRARY(gpu STATIC ${CURRENT_HEADERS} ${CURRENT_SOURCES} ${CUDA_LIBRARIES} ${CUDA_CUFFT_LIBRARIES})
I used the command line in /built as follows:
cmake -DCMAKE_BUILD_TYPE=Release ..
make
But it didn't work. It seems it still run in debug as the NVVP result shows:
cmake result
So how can I change to compile flag to release in static library of cuda.
I'm using Red Hat Enterprise Linux Server 7.1(Maipo)、cuda 8.0 、cmake version 2.8.12.2、GNU Make 3.82.
Update in 2019.01.12
I added MESSAGE(STATUS "Build type:" ${CMAKE_BUILD_TYPE}") in CMakeLists(2).txt. And the result is:
[root#node2 build]# cmake -DCMAKE_BUILD_TYPE=Release ..
-- Build type: Release
-- Configuring done
-- Generating done
But the result in NVVP didn't change.

Well, I have found out a way to solve it.
I change the line 10 and line 11 in CMakeLists(3).txt to
SET(CUDA_NVCC_FLAGS -gencode arch=compute_52,code=sm_52;-lcufft;-lcudart;-lcublas)
SET(CMAKE_CUDA_FLAGS ${CUDA_NVCC_FLAGS} -O3 -DNDEBUG)
After doing
cmake -DCMAKE_BUILD_TYPE=Release ..
make clean
make
The result in NVVP shows it is compiled with Release mode.

Related

g++ file.cpp -o file -l wolfssl:undefined reference to 'sp_init' collect2: error :ld returned 1 exit status

#include <iostream>
#include <string>
#include <unistd.h>
#include <wolfssl/options.h>
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/ssl.h>
#include <wolfssl/wolfcrypt/types.h>
#include <wolfssl/wolfcrypt/random.h>
#include <wolfssl/wolfcrypt/ecc.h>
#include <wolfssl/wolfcrypt/sp_int.h>
#include <wolfssl/wolfcrypt/integer.h>
#include <wolfssl/wolfcrypt/wolfmath.h>
using namespace std;
int main(){
ecc_key key;
WC_RNG rng;
wc_ecc_init(&key);
wc_InitGng(&rng);
int curveId = ECC_SECP521R1;
const ecc_set_type* ecc_params;
ecc_params = wc_ecc_get_curve_params(curveId);
mp_int ord; //order of ecc
mp_int priv; //privatekey
int err;
err = mp_init(&ord);
cout<<err<<endl;
err = mp_init(&priv);
cout<<err<<endl;
//err = mp_read_radix(&ord,ecc_params->order,MP_RADIX_HEX); //
//cout<<err<<endl;
//err = wc_ecc_gen_k(&rng,120,&priv,&ord)
return 0 ;
}
enter image description here
i have include <wolfssl/wolfcrypt/sp_int.h>,but it told me undefined reference to 'sp_init',one solution maybe work according tohttps://github.com/wolfSSL/wolfssl/pull/5328,but i don't quite understand .how to solve this problem

How do I include glib in a project I am using cmake for

Source Code
#include <stdlib.h>
#include <sys/types.h>
#include <signal.h>
#include <unistd.h>
#include <string.h>
#include <gtk/gtk.h>
#include <gtk/gtkx.h>
#include <math.h>
#include <ctype.h>
#include <driver.hpp>
#include <decrypt_util.hpp>
#include <encrypt_util.hpp>
#include <util.hpp>
GtkWidget *window1;
GtkWidget *fixed1;
GtkWidget *label1;
GtkWidget *button1;
GtkWidget *decryptbt;
GtkBuilder *builder;
int main(int argc, char *argv[]){
gtk_init(&argc, &argv);
builder = gtk_builder_new_from_file ("Attempt.glade");
//Connects glade file with C++ code
window1 = GTK_WIDGET(gtk_builder_get_object(builder, "window1"));
//Creates pop up window, window is defined as a widget.
g_signal_connect(window1, "destroy", G_CALLBACK(gtk_main_quit), NULL);
//Makes the window x stop the program from running further
gtk_builder_connect_signals(builder, NULL);
//Connects signals from gui to code, on button clicked and GetCommandLineA
fixed1 = GTK_WIDGET(gtk_builder_get_object(builder, "fixed1"));
button1 = GTK_WIDGET(gtk_builder_get_object(builder, "button1"));
decryptbt = GTK_WIDGET(gtk_builder_get_object(builder, "decryptbt"));
label1 = GTK_WIDGET(gtk_builder_get_object(builder, "label1"));
gtk_widget_show(window1);
gtk_main();
return EXIT_SUCCESS;
}
void on_button1_clicked (GtkButton *b){
gtk_label_set_text (GTK_LABEL(label1), (const gchar* ) "Hello World");
}
Cmakelists.list
cmake_minimum_required(VERSION 3.7...3.18)
if(${CMAKE_VERSION} VERSION_LESS 3.12)
cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
endif()
set(CMAKE_CXX_STANDARD 17)
project(ezip VERSION 1.0
DESCRIPTION "AES encyption"
LANGUAGES CXX)
include_directories(include)
set(SOURCES src/driver.cpp src/util.cpp src/decrypt_util.cpp src/encrypt_util.cpp)
set(GUI src/stuff.cpp)
find_package(Boost 1.6 REQUIRED COMPONENTS filesystem)
find_package (Eigen3 3.3 REQUIRED NO_MODULE)
find_package(PkgConfig REQUIRED)
find_package(GTK REQUIRED gtk+-3.0)
include_directories(${GTK3_INCLUDE_DIRS})
include_directories(${CMAKE_SOURCE_DIR})
add_definitions(${GTK3_CFLAGS_OTHER})
add_executable(ezip ${SOURCES})
target_link_libraries(ezip PUBLIC Boost::filesystem)
target_link_libraries (ezip PUBLIC Eigen3::Eigen)
add_executable(GUI1.0 ${GUI})
target_link_libraries(GUI1.0 ${GTK3_LIBRARIES})
I have tried many variations of target_link_libraries and find_package but none of them have worked. I know that the package name is libglib2.0-0. But other than that I am taking shots in the dark. Any information would be helpful. I have attempted find_package using simply GLIB2.0 and other variations and I have a terminal output for the package location. If that would be helpful. My error code is
home/dellgato/ezip/src/stuff.cpp:6:10: fatal error: gtk/gtk.h: No such file or directory
6 | #include <gtk/gtk.h>
| ^~~~~~~~~~~
compilation terminated.
make[2]: *** [CMakeFiles/GUI1.0.dir/build.make:82: CMakeFiles/GUI1.0.dir/src/stuff.cpp.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:124: CMakeFiles/GUI1.0.dir/all] Error 2
make: *** [Makefile:103: all] Error 2

How to convert a serialized tensorflow::Example into an input to a session

I've used the following tutorial to export a saved model created by an estimator:
https://github.com/MtDersvan/tf_playground/blob/master/wide_and_deep_tutorial/wide_and_deep_basic_serving.md
I'm trying to load this model in c++. I've managed to create a serialized tensorflow::Example using c++. How do I convert this into a single Tensor?
The tutorial uses tf.contrib.util.make_tensor_proto(serialized, shape=[1])). What is the equivalent C++ API?
It's complicated a little than write in python, here's code example may helps:
#include <gtest/gtest.h>
#include <grpc/grpc.h>
#include <grpc++/channel.h>
#include <grpc++/client_context.h>
#include <grpc++/create_channel.h>
#include <grpc++/security/credentials.h>
#include <grpc++/security/credentials.h>
#include <iostream>
#include "tensorflow/core/framework/tensor.pb.h"
#include "tensorflow/core/example/example.pb.h"
#include "tensorflow/core/framework/tensor_shape.pb.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow_serving/apis/prediction_service.grpc.pb.h"
#include "tensorflow_serving/apis/prediction_service.pb.h"
using namespace std;
namespace tensorflow {
namespace serving {
TEST(GRPCCppClient, TestPredict) {
shared_ptr<grpc::Channel> channel = grpc::CreateChannel("127.0.0.1:30355", grpc::InsecureChannelCredentials());
shared_ptr<PredictionService::Stub> stub_ = PredictionService::NewStub(channel);
tensorflow::TensorProto tensorProto;
tensorflow::TensorShapeProto tensorShapeProto;
PredictRequest predictRequest;
PredictResponse predictResponse;
tensorflow::TensorShapeProto_Dim* dim_0 = tensorShapeProto.add_dim();
dim_0->set_size(1);
tensorProto.mutable_tensor_shape()->CopyFrom(tensorShapeProto);
Example example;
/*Construct example..*/
std::string test_str;
example.SerializeToString(&test_str);
tensorProto.set_dtype(DT_STRING);
tensorProto.add_string_val(test_str);
(*predictRequest.mutable_inputs())["inputs"] = tensorProto;
predictRequest.mutable_model_spec()->set_name("default"); /*set your own model name*/
grpc::ClientContext context;
grpc::Status status = stub_->Predict(&context, predictRequest, &predictResponse);
if (!status.ok()) {
std::cout << "GetFeature rpc failed." << std::endl;
ASSERT_TRUE(false);
}
cout<<"------------\n"<<predictResponse.DebugString()<<endl;
}
}
}

CMake with crypt(3)

I trying to make a crypt(3) sample with CMake.
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <crypt.h>
/* To compile: $ gcc check.c -lcrypt -o check */
int main(void) {
/* Hashed form of "GNU libc manual". */
char *pass = "$1$/iSaq7rB$EoUw5jJPPvAPECNaaWzMK/";
/* Read in the user’s password and encrypt it,
passing the expected password in as the salt. */
char *result = crypt(getpass("Password:"), pass);
/* Test the result. */
int ok = strcmp (result, pass) == 0;
puts(ok ? "Access granted." : "Access denied.");
return ok ? 0 : 1;
}
To build it it should be pass the -lcrypt option to gcc.
My CMakeLists.txt looks like:
project(cryptexample)
set(SOURCE_FILES check.c)
add_executable(check ${SOURCE_FILES})
How can I pass this option and build it?
Something like:
target_link_libraries(check crypt)
Source: https://cmake.org/cmake/help/latest/command/target_link_libraries.html

How do I fix the build for mod_mono when it cannot find strndup on FreeBSD?

I am installing mod_mono with Apache 2 on FreeBSD and I get the following error when Apache tries to load the mod_mono.so module.
Cannot load
/usr/local/apache/modules/mod_mono.so
into server:
/usr/local/apache/modules/mod_mono.so:
Undefined symbol "strndup"
The prefix I set for Apache is /usr/local/apache and I have PHP and other modules working already. I found that strndup is referenced in roken.h in /usr/include and I tried the following additions to configure command but it did not work.
--libdir=/usr/lib --includedir=/usr/include
I also tried...
--with-mono-prefix=/usr
I do not know what to try next. It does not appear that mod_mono has many build options. Since Mono and XSP are both built successfully I just need mod_mono to work.
I appreciate any tips to get this working.
Add strndup via implementing it:
ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#if !_LIBC
# include "strndup.h"
#endif
#include <stdlib.h>
#include <string.h>
#if !_LIBC
# include "strnlen.h"
# ifndef __strnlen
# define __strnlen strnlen
# endif
#endif
#undef __strndup
#if _LIBC
# undef strndup
#endif
#ifndef weak_alias
# define __strndup strndup
#endif
char *
__strndup (s, n)
const char *s;
size_t n;
{
size_t len = __strnlen (s, n);
char *new = malloc (len + 1);
if (new == NULL)
return NULL;
new[len] = '\0';
return memcpy (new, s, len);
}
#ifdef libc_hidden_def
libc_hidden_def (__strndup)
#endif
#ifdef weak_alias
weak_alias (__strndup, strndup)
#endif