"Does not name a type"-Error when compile with CMake - cmake

when I compile my code in Eclipse it works fine. But the process fails if I try to compile it with CMake. It says:
error: ‘default_random_engine’ does not name a type
default_random_engine generator;
error: ‘uniform_int_distribution’ does not name a type
uniform_int_distribution distribution;
and some more errors, which I believe are the consequences of these two.
class randomInt
{
private:
int m_max;
default_random_engine generator;
uniform_int_distribution<int> distribution;
public:
randomInt(int max = 0) :
m_max(max),
generator(time(0)),
distribution(0, m_max)
{}
int operator ()()
{
return distribution(generator);
}
};
int main(int argc, char **argv)
{
vector<int> vec(100);
generate(vec.begin(), vec.end(), randomInt(100));
ostream_iterator<int> streamIt(cout, ",\n");
copy(vec.begin(), vec.end(), streamIt);
return 0;
}
There is my CMakeLists.txt
project(TEST)
# States that CMake required version must be greater than 2.6
cmake_minimum_required(VERSION 2.8)
# Setup sources
set(TEST_SOURCES
aufgabe2_1.cpp
aufgabe2_2.cpp
aufgabe2_3.cpp
aufgabe2_4.cpp)
set(CMAKE_CXX_FLAGS_DEBUG "-g -Wall -std=c++11")
# Build executable
add_executable(main ${TEST_SOURCES})

These are symbols from the C++ standard library's pseudo-random number generation, but you haven't qualified the symbols with the namespace std, so use std::default_random_engine and std::uniform_int_distribution.

Related

How can I make a PyTorch extension with cmake

This tutorial demonstrates how to make a C++/CUDA-based Python extension for PyTorch. But for ... reasons ... my use-case is more complicated than this and doesn't fit neatly within the Python setuptools framework described by the tutorial.
Is there a way to use cmake to compile a Python library that extends PyTorch?
Yes.
The trick is to use cmake to combine together all the C++ and CUDA files we'll need and to use PyBind11 to build the interface we want; fortunately, PyBind11 is included with PyTorch.
The code below is collected and kept up-to-date in this Github repo.
Our project consists of several files:
CMakeLists.txt
cmake_minimum_required (VERSION 3.9)
project(pytorch_cmake_example LANGUAGES CXX CUDA)
find_package(Python REQUIRED COMPONENTS Development)
find_package(Torch REQUIRED)
# Modify if you need a different default value
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 61)
endif()
# List all your code files here
add_library(pytorch_cmake_example SHARED
main.cu
)
target_compile_features(pytorch_cmake_example PRIVATE cxx_std_11)
target_link_libraries(pytorch_cmake_example PRIVATE ${TORCH_LIBRARIES} Python::Python)
# Use if the default GCC version gives issues.
# Similar syntax is used if we need better compilation flags.
target_compile_options(pytorch_cmake_example PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-ccbin g++-9>)
# Use a variant of this if you're on an earlier cmake than 3.18
# target_compile_options(pytorch_cmake_example PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-gencode arch=compute_61,code=sm_61>)
main.cu
#include <c10/cuda/CUDAException.h>
#include <torch/extension.h>
#include <torch/library.h>
using namespace at;
int64_t integer_round(int64_t num, int64_t denom){
return (num + denom - 1) / denom;
}
template<class T>
__global__ void add_one_kernel(const T *const input, T *const output, const int64_t N){
// Grid-strided loop
for(int i=blockDim.x*blockIdx.x+threadIdx.x;i<N;i+=blockDim.x*gridDim.x){
output[i] = input[i] + 1;
}
}
///Adds one to each element of a tensor
Tensor add_one(const Tensor &input){
auto output = torch::zeros_like(input);
// Common values:
// AT_DISPATCH_INDEX_TYPES
// AT_DISPATCH_FLOATING_TYPES
// AT_DISPATCH_INTEGRAL_TYPES
AT_DISPATCH_ALL_TYPES(
input.scalar_type(), "add_one_cuda", [&](){
const auto block_size = 128;
const auto num_blocks = std::min(65535L, integer_round(input.numel(), block_size));
add_one_kernel<<<num_blocks, block_size>>>(
input.data_ptr<scalar_t>(),
output.data_ptr<scalar_t>(),
input.numel()
);
// Always test your kernel launches
C10_CUDA_KERNEL_LAUNCH_CHECK();
}
);
return output;
}
///Note that we can have multiple implementations spread across multiple files, though there should only be one `def`
TORCH_LIBRARY(pytorch_cmake_example, m) {
m.def("add_one(Tensor input) -> Tensor");
m.impl("add_one", c10::DispatchKey::CUDA, TORCH_FN(add_one));
//c10::DispatchKey::CPU is also an option
}
Compilation
Compile it all using this command:
cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -GNinja ..
test.py
You can then run the following test script.
import torch
torch.ops.load_library("build/libpytorch_cmake_example.so")
shape = (3,3,3)
a = torch.randint(0, 10, shape, dtype=torch.float).cuda()
a_plus_one = torch.ops.pytorch_cmake_example.add_one(a)

Building a CUDA program with CMake: undefined reference to __cudaRegisterLinkedBinary...cpp1_ii_main

My cuda version is 10.1, and GPU is T4. My code is like this:
#include <iostream>
#include <algorithm>
#include <random>
#include <vector>
#include <numeric>
#include <algorithm>
#include <chrono>
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <cooperative_groups.h>
using std::cout;
using std::endl;
void sort_2d_by_row();
thrust::device_vector<float> thrust_2d_by_row_even_odd(
thrust::device_vector<float>&, int, int);
__global__ void even_odd_kernel(float *ptr, int M, int N);
int main() {
cudaError_t err = cudaDeviceSetLimit(cudaLimitMallocHeapSize, 1UL << 32);
if (err) cout << "errors occur\n";
sort_2d_by_row();
return 0;
}
void sort_2d_by_row() {
std::random_device rd;
std::mt19937 engine;
engine.seed(rd());
std::uniform_real_distribution<float> u(0, 90.);
int M = 19;
int N = 8 * 768 * 768;
/* int N = 10; */
std::vector<float> v(M * N);
std::generate(v.begin(), v.end(), [&](){return u(engine);});
thrust::host_vector<float> hv(v.begin(), v.end());
thrust::device_vector<float> dv = hv;
thrust::device_vector<float> res_even_odd = thrust_2d_by_row_even_odd(dv, M, N);
}
thrust::device_vector<float> thrust_2d_by_row_even_odd(
thrust::device_vector<float>& v, int M, int N) {
thrust::device_vector<float> res(v.begin(), v.end());
thrust::device_vector<int> index(M);
thrust::sequence(thrust::device, index.begin(), index.end(), 0, 1);
int blocky = 1;
while (blocky < M) blocky *= 2;
blocky /= 2;
int blockx = 1;
while (blockx < (N / 2) && blockx < 1024) blockx *= 2;
blockx /= 2;
int gridx = std::min(4096, N / blockx / 2);
dim3 block(blockx, blocky);
dim3 grid(gridx);
even_odd_kernel<<<grid, block, 0>>>(
thrust::raw_pointer_cast(&res[0]), M, N);
cudaDeviceSynchronize();
return res;
}
// descending
__global__ void even_odd_kernel(float *ptr, int M, int N) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int m = threadIdx.y;
int tstride = blockDim.x * gridDim.x * 2;
cooperative_groups::grid_group g = cooperative_groups::this_grid();
g.sync();
}
And CMakeLists.txt is like this:
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(cuda)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif ()
set(CMAKE_CXX_FLAGS "-std=c++14 -Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "-g3 -O0")
set(CMAKE_CXX_FLAGS_RELEASE "-O2")
set(CUDA_NVCC_FLAGS "-std=c++14 -arch=sm_60 -Xptxas=-v -rdc=true")
set(CUDA_NVCC_FLAGS_DEBUG "-G -O0")
set(CUDA_NVCC_FLAGS_RELEASE "-O2")
set(CUDA_CUDA_FLAGS "-gencode arch=compute_70,code=sm_70 -rdc=true")
message (${CMAKE_BUILD_TYPE})
find_package(CUDA REQUIRED)
cuda_add_executable(sort sort.cu)
target_include_directories(
sort PUBLIC ${CUDA_INCLUDE_DIRS} ${CUDNN_INCLUDE_DIRS})
target_link_libraries(
sort ${CUDA_LIBRARIES})
The error message is:
CMakeFiles/sort.dir/sort_generated_sort.cu.o: In function
`__sti____cudaRegisterAll()':
tmpxft_0004cd04_00000000-5_sort.cudafe1.cpp:(.text.startup+0x15):
undefined reference to
`__cudaRegisterLinkedBinary_39_tmpxft_0004cd04_00000000_6_sort_cpp1_ii_main'
collect2: error: ld returned 1 exit status
CMakeFiles/sort.dir/build.make:963: recipe for target 'sort' failed
How could I make it work please? Besides, Does g.sync() have big harms to the program performance, or is the impact travial?
The cooperative groups are not an issue, IMHO. That's just something requiring a recent version of CUDA. As for your linking trouble - I think it must be some sort of flag mess. I'll suggest an alternative CMakeLists.txt, which itself is not perfect, but is more appropriate for CMake versions of recent years. It also has a bunch of suggestions for you in comments:
cmake_minimum_required(VERSION 3.8.2)
# If you want to properly search for Thrust, you'll need a FindThrust.cmake
# script, which constitutes a "CMake module". You place it under cmake/Modules
# in your source directory and make it available by uncommenting the following
# line:
#list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
project(sort-with-cuda
DESCRIPTION "My project description here"
LANGUAGES CXX CUDA)
# Don't do this. Set your build type explicitly, once; and then it's
# cached and you don't have to worry about it when you run make.
#
#if (NOT CMAKE_BUILD_TYPE)
# set(CMAKE_BUILD_TYPE Release)
#endif ()
# In the future, this should not be necessary, but we need it for
# cuda_select_nvcc_arch_flags
include(FindCUDA)
# This will set the appropriate gencode parameters for the hardware
# on your system (although you could always force it manually)
cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS_TMP Auto)
set(CUDA_ARCH_FLAGS ${CUDA_ARCH_FLAGS_TMP} CACHE STRING "CUDA -gencode parameters")
string(REPLACE ";" " " CUDA_ARCH_FLAGS_STR "${CUDA_ARCH_FLAGS}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH_FLAGS_STR}")
# The above may produce something like:
#
# -gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_70,code=compute_70;-gencode;arch=compute_75,code=compute_75
#
# But it may include older micro-architectures which have been
# deprecated/removed, in which case you'll need to edit that
# with ccmake and only keep what you need.
add_executable(sort-with-cuda sort.cu)
set_target_properties(
sort-with-cuda
PROPERTIES
CXX_STANDARD 14
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO
)
# Note: I haven't added flags for compiling with warnings
# Thrust is very finickey: It provies a configuration script, but
# only for CMake >= 3.15 . And - it doesn't provide a FindThrust.cmake
# script itself with targets appropriate for CMake >= 3.
#
# See https://github.com/NVIDIA/thrust/blob/main/thrust/cmake/README.md
#
# With CMake 3.15 or later you can enable the following two lines:
#
#find_package(Thrust REQUIRED CONFIG)
#thrust_create_target(Thrust)
#target_link_libraries(sort-with-cuda Thrust)
#
# With earlier CMake versions, get yourself a proper FindThrust.cmake
# script (which creates a Thrust::Thrust target I suppose) and
# then uncomment the following two lines:
#
#find_package(Thrust REQUIRED)
#target_link_libraries(sort-with-cuda Thrust::Thrust)
# The following sets -rdc=true , but you don't actually need that for your example
set_target_properties(sort-with-cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

Process finished with exit code -1073741515 (0xC0000135) while using SDL2 in CLion [duplicate]

I've been trying to run OpenCV using CLion IDE under Windows. When I try to run this sample code for loading and displaying an image
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
if( argc != 2)
{
cout <<" Usage: display_image ImageToLoadAndDisplay" << endl;
return -1;
}
Mat image;
image = imread("earth.jpg", CV_LOAD_IMAGE_COLOR); // Read the file
if(! image.data ) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl ;
return -1;
}
namedWindow( "Display window", WINDOW_AUTOSIZE );// Create a window for display.
imshow( "Display window", image ); // Show our image inside it.
waitKey(0); // Wait for a keystroke in the window
return 0;
}
I get the error statement:
Process finished with exit code -1073741515 (0xC0000135)
As for the content in my CMakeLists.txt, it looks like this:
cmake_minimum_required(VERSION 3.6)
project(test)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# Where to find CMake modules and OpenCV
set(OpenCV_DIR "C:\\opencv\\mingw-build\\install")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(openCV main.cpp)
# add libs you need
set(OpenCV_LIBS opencv_core opencv_imgproc opencv_highgui opencv_imgcodecs)
# linking
target_link_libraries(openCV ${OpenCV_LIBS})
Thanks for helping me with this.
You need to add OpenCV binary path with DLLs to your PATH BEFORE CLion start.
I do it from script:
=== CLionWithMingwAndOpenCV.bat ==========================
#echo off
set PATH=C:\mingw-w64\x86_64-5.2.0-win32-seh-rt_v4-rev0\mingw64\bin;D:\opencv\release\bin;%PATH%
"C:\Program Files (x86)\JetBrains\CLion XXXX\bin\clion64.exe"
=== ==========================

Tensorflow XLA AOT: Eigen related Error Building Project

I'm currently trying to work through the tensorflow XLA ahead of time compilation work flow for the first time, and I've hit a problem while trying to build the final executable binary which includes the AOT compiled object.
I've used the tutorial here to generate the test_graph_tfgather.pb and test_graph_tfgather.config.pbtxt files. Then I've used the tfcompile tool directly to produce MyClass.o and MyClass.h. So far so good.
I'm now building a simple makefile project which includes this compiled model, but I'm getting some errors related to Eigen. Could this be due to a different version of eigen3 being installed on my computer? I've also had to comment out the Eigen::ThreadPool lines due to eigen errors too so some version miss match may be the problem. Has anyone seen this problem before or does anyone have any ideas how to get this working?
Thanks.
The build errors:
g++ -c -std=c++11 -I . -I /usr/include/eigen3 -I /home/user/tensorflow_xla/tensorflow -I /usr/include main.cpp
In file included from /home/user/tensorflow_xla/tensorflow/tensorflow/compiler/xla/types.h:22:0,
from /home/user/tensorflow_xla/tensorflow/tensorflow/compiler/xla/executable_run_options.h:20,
from /home/user/tensorflow_xla/tensorflow/tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h:22,
from MyClass.h:14,
from main.cpp:6:
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h: In static member function ‘static tensorflow::bfloat16 Eigen::NumTraits<tensorflow::bfloat16>::infinity()’:
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h:79:28: error: ‘infinity’ is not a member of ‘Eigen::NumTraits<float>’
return FloatToBFloat16(NumTraits<float>::infinity());
^
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h: In static member function ‘static tensorflow::bfloat16 Eigen::NumTraits<tensorflow::bfloat16>::quiet_NaN()’:
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h:83:28: error: ‘quiet_NaN’ is not a member of ‘Eigen::NumTraits<float>’
return FloatToBFloat16(NumTraits<float>::quiet_NaN());
^
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h: At global scope:
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h:95:34: error: ‘log’ is not a template function
const tensorflow::bfloat16& x) {
^
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h:101:34: error: ‘exp’ is not a template function
const tensorflow::bfloat16& x) {
^
/home/user/tensorflow_xla/tensorflow/tensorflow/core/framework/numeric_types.h:107:34: error: ‘abs’ is not a template function
const tensorflow::bfloat16& x) {
^
Makefile:10: recipe for target 'main.o' failed
main.cpp source:
#define EIGEN_USE_THREADS
#define EIGEN_USE_CUSTOM_THREAD_POOL
#include <iostream>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "MyClass.h" // generated
int main(int argc, char** argv) {
//Eigen::ThreadPool tp(2); // Size the thread pool as appropriate.
//Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
MyClass matmul;
//matmul.set_thread_pool(&device);
// Set up args and run the computation.
const float args[12] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
std::copy(args + 0, args + 6, matmul.arg0_data());
std::copy(args + 6, args + 12, matmul.arg1_data());
matmul.Run();
// Check result
if (matmul.result0(0, 0) == 58) {
std::cout << "Success" << std::endl;
} else {
std::cout << "Failed. Expected value 58 at 0,0. Got:"
<< matmul.result0(0, 0) << std::endl;
}
return 0;
}
Makefile
EIGEN_INC=-I /usr/include/eigen3
TF_INC=-I /home/user/tensorflow_xla/tensorflow
CPPFLAGS=-c -std=c++11
xla_hw: main.o MyClass.o
g++ -o xla_hw main.o MyClass.o
main.o: main.cpp
g++ $(CPPFLAGS) -I . $(TF_INC) $(EIGEN_INC) -I /usr/include main.cpp
I've solved this problem now, it turns out there is a specific version of eigen3 included with tensorflow and you need to use this version for it to work. When tensorflow has been built the correct version of eigen3 is located at <tensorflow path>bazel-tensorflow/external/eigen_archive
Below is the working makefile which includes the correct Eigen path as well as the libraries needed to link the project.
TF_INC=-I /home/user/tensorflow_xla/tensorflow/bazel-tensorflow/external/eigen_archive -I /home/user/tensorflow_xla/tensorflow
TF_LIBS=-L/home/user/tensorflow_xla/tensorflow/bazel-bin/tensorflow/compiler/tf2xla/ -lxla_compiled_cpu_function -L/home/user/tensorflow_xla/tensorflow/bazel-bin/tensorflow/compiler/aot -lruntime
CPPFLAGS=-c -std=c++11
xla_hw: main.o MyClass.o
g++ -o xla_hw main.o MyClass.o $(TF_LIBS)
main.o: main.cpp
g++ $(CPPFLAGS) -I . $(TF_INC) -I /usr/include main.cpp

How to configure CMakeList in Clion ide for using POSIX pthread functions?

I tried to compile a simple POSIX example in CLIon ide, but it doesn`t know about pthread library, I think...
Here is the code:
void *func1()
{
int i;
for (i=0;i<10;i++) { printf("Thread 1 is running\n"); sleep(1); }
}
void *func2()
{
int i;
for (i=0;i<10;i++) { printf("Thread 2 is running\n"); sleep(1); }
}
int result, status1, status2;
pthread_t thread1, thread2;
int main()
{
result = pthread_create(&thread1, NULL, func1, NULL);
result = pthread_create(&thread2, NULL, func2, NULL);
pthread_join(thread1, &status1);
pthread_join(thread2, &status2);
printf("\nПотоки завершены с %d и %d", status1, status2);
getchar();
return 0;
}
It is known, that this code is correct, because it's taken from the example in the book. So Clion marks second arguments of pthread_join function as a mistake, giving this error:
error: invalid conversion from ‘void* (*)()’ to ‘void* (*)(void*)’
I suppose, thet the problem is in the CmakeList. Here is my current CMakeList:
cmake_minimum_required(VERSION 3.3)
project(hello_world C CXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread")
set(SOURCE_FILES main.cpp)
add_executable(hello_world ${SOURCE_FILES})
Your function signature is wrong for the callback to pthread.
func1 and func2 have the signature void* (*)(). This means returns a void* and has no parameters
But pthread wants void* (*)(void*) Here you also have a void* as parameter.
so your functions should look like this:
void *func1(void* param) ...
You don't have to use the parameter but it has to be there in the declaration.
Note:
To tell cmake to link against pthread you should use this:
find_package( Threads REQUIRED )
add_executable(hello_world ${SOURCE_FILES})
target_link_libraries( hello_world Threads::Threads )
See here: How do I force cmake to include "-pthread" option during compilation?