I have the code shown below. As far as I understood, separable compilation must be turned on when
CUDA device code is separated into .h and .cu files
Use ObjectA's device code into Object's B device code
however, in my main function I am not having any of the cases above. Could you tell me why do I have to set separable compilation for this sample project?
BitHelper.h
#pragma once
#include <cuda_runtime.h>
#define COMPILE_TARGET __host__ __device__
class BitHelper
{
public:
COMPILE_TARGET BitHelper();
COMPILE_TARGET ~BitHelper();
COMPILE_TARGET static void clear(unsigned int& val0);
};
BitHelper.cu
#include "bithelper.h"
BitHelper::BitHelper()
{}
BitHelper::~BitHelper()
{}
void BitHelper::clear(unsigned int& val0)
{
val0 = 0x0000;
}
Consume_BitHelper.h
#pragma once
class Consume_BitHelper
{
public:
void apply();
private:
bool test_cpu();
bool test_gpu();
};
Consume_BitHelper.cu
#include "consume_bithelper.h"
#include <cuda_runtime.h>
#include <iostream>
#include "bithelper.h"
__global__
void myKernel()
{
unsigned int FLAG_VALUE = 0x2222;
printf("GPU before: %d\n", FLAG_VALUE);
BitHelper::clear(FLAG_VALUE);
printf("GPU after: %d\n", FLAG_VALUE);
}
void Consume_BitHelper::apply()
{
test_cpu();
test_gpu();
cudaDeviceSynchronize();
}
bool Consume_BitHelper::test_cpu()
{
std::cout << "TEST CPU" << std::endl;
unsigned int FLAG_VALUE = 0x1111;
std::cout << "CPU before: " << FLAG_VALUE << std::endl;
BitHelper::clear(FLAG_VALUE);
std::cout << "CPU after : " << FLAG_VALUE << std::endl;
return true;
}
bool Consume_BitHelper::test_gpu()
{
std::cout << "TEST GPU" << std::endl;
myKernel << <1, 1 >> > ();
return true;
}
main.cu
#include "consume_bithelper.h"
#include "bithelper.h"
#include <iostream>
int main(int argc, char** argv)
{
Consume_BitHelper cbh;
cbh.apply();
std::cout << "\nPress any key to continue...";
std::cin.get();
return 0;
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.10)
project(cuda_class LANGUAGES CXX CUDA)
#BitHelper needs separable compilation because we have separated declaration from definition
add_library(bithelper_lib STATIC bithelper.cu)
set_property(TARGET bithelper_lib PROPERTY CUDA_SEPARABLE_COMPILATION ON)
#Consume_BitHelper needs separable compilation because we call BitHelper's device code
#from Consume_BitHelper's kernel
add_library(consume_bithelper_lib STATIC consume_bithelper.cu)
set_property(TARGET consume_bithelper_lib PROPERTY CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(consume_bithelper_lib bithelper_lib)
#We only call CPU code so no need of separable compilation?
add_executable(${PROJECT_NAME} main.cu)
target_link_libraries(${PROJECT_NAME} bithelper_lib consume_bithelper_lib)
The errors I'm getting are these
EDIT
According to Robert Crovella's post Consume_BitHelper.cu uses BitHelper::clear defined in a separate compilation unit.
Does it mean I have to activate only separate compilation for BitHelper?
Since separate compilation has to do only with device code called from device code.
Why am I getting the mentioned errors when separate compilation is NOT on for cuda_class? (which is the executable created from CMake and is not calling any device code)
Separable compilation has to do with how the compiler handles function calls. In exchange for a little bit of overhead, you get the ability to make true function calls and thus access code from other "compilation units" (i.e. .cu source files).
As GPU programmers are obsessed with performance (particularly the extra registers that get used when separable compilation is enabled) Nvidia made it an option instead of default.
You should only need separable compilation for .cu files that access functions/globals defined in other .cu files.
Related
I want to copy a mesh with the function copy_face_graph(source, target). But the target mesh is different (it has same number of vertices and faces, but the coordinates and the order are totally different).
The code:
#include <CGAL/Exact_predicates_inexact_constructions_kernel.h>
#include <CGAL/Exact_predicates_exact_constructions_kernel.h>
#include <CGAL/Surface_mesh.h>
#include <iostream>
#include <fstream>
#include <CGAL/boost/graph/copy_face_graph.h>
typedef CGAL::Exact_predicates_inexact_constructions_kernel Kernel;
typedef CGAL::Surface_mesh<Kernel::Point_3> Mesh;
namespace PMP = CGAL::Polygon_mesh_processing;
int main(int argc, char* argv[]) {
const char* filename1 = (argc > 1) ? argv[1] : "data/blobby.off";
std::cout << ".off loaded" << std::endl;
std::ifstream input(filename1);
Mesh mesh_orig;
if (!input || !(input >> mesh_orig))
{
std::cerr << "First mesh is not a valid off file." << std::endl;
return 1;
}
input.close();
// ========================================================
Mesh mesh_copy;
CGAL::copy_face_graph(mesh_orig, mesh_copy);
// ========================================================
std::ofstream mesh_cpy("CPY_ANYLYZE/mesh_copy.off");
mesh_cpy << mesh_copy;
mesh_cpy.close();
return 0;
}
Dose anyone knows how to get a complete same mesh from the original mesh? Do I need add the named parameters, or maybe using another function?
Thanks a lot
Except if you intend to write some code working with different data structures, you can use the copy constructor from the Surface_mesh class, Mesh mesh_copy(mesh_orig). copy_face_graph does not do a raw copy because it works also if the input and output are of different types. However the output should be the same up to the order of the simplices.
I've tried to implement an own archive type for boost serialization following official boost example to write archives.
#include <iostream>
#include <vector>
#include <boost/serialization/nvp.hpp>
#include "boost/serialization/vector.hpp"
#include <boost/archive/detail/common_oarchive.hpp>
#include <boost/archive/detail/register_archive.hpp>
#include <boost/archive/detail/archive_serializer_map.hpp>
class complete_oarchive : public boost::archive::detail::common_oarchive<complete_oarchive>
{
friend class boost::archive::save_access;
template<class T>
void save(T & t){
std::cout << "saved data\n";
}
public:
void save_binary(void *address, std::size_t count){
}
};
template class boost::archive::detail::archive_serializer_map<complete_oarchive>;
template class boost::archive::detail::common_oarchive<complete_oarchive>;
BOOST_SERIALIZATION_REGISTER_ARCHIVE(complete_oarchive)
int main(int argc, char *argv[])
{
std::vector<double> testVector = {1, 2, 3, 4};
complete_oarchive oa;
std::vector<double>* pVec = &testVector;
oa << BOOST_SERIALIZATION_NVP(testVector);
oa << BOOST_SERIALIZATION_NVP(pVec);
return 0;
}
Compiling this example with
g++ -c -g -std=c++11 -MMD -MP -MF "build/Debug/GNU-Linux/demo.o.d" -o build/Debug/GNU-Linux/demo.o demo.cpp
g++ -o dist/Debug/GNU-Linux/serializationdemo build/Debug/GNU-Linux/demo.o -lboost_serialization
leads to the following linker error
build/Debug/GNU-Linux/demo.o: In function `boost::archive::detail::pointer_oserializer<complete_oarchive, std::vector<double, std::allocator<double> > >::pointer_oserializer()':
/opt/tools/boost/boostRdk-1.66.0/include/boost/archive/detail/oserializer.hpp:222: undefined reference to `boost::archive::detail::archive_serializer_map<complete_oarchive>::insert(boost::archive::detail::basic_serializer const*)'
build/Debug/GNU-Linux/demo.o: In function `boost::archive::detail::pointer_oserializer<complete_oarchive, std::vector<double, std::allocator<double> > >::~pointer_oserializer()':
/opt/tools/boost/boostRdk-1.66.0/include/boost/archive/detail/oserializer.hpp:227: undefined reference to `boost::archive::detail::archive_serializer_map<complete_oarchive>::erase(boost::archive::detail::basic_serializer const*)'
collect2: error: ld returned 1 exit status
It seems that serializing a pointer in
oa << BOOST_SERIALIZATION_NVP(pVec);
leads to this error. After deletion of this line everything works fine and the result is as expected.
Does anybody have experience in writting own serialization archives?
A simimal Problem was solved here
https://groups.google.com/forum/#!topic/boost-list/CMoDosGZUo8
but I wasn't able to solve this by forward declarations.
I solved the issue by replacing
#include <boost/archive/detail/archive_serializer_map.hpp>
by
#include <boost/archive/impl/archive_serializer_map.ipp>
I'm using CGAL 4.13 (Linux Fedora 29) to generate 3D meshes from segmented anathomical images. I would like to use Lloyd optimization, but I got in a reproductible way a runtime error.
In order to illustrate my problem, I modified the example mesh_3D_image.cpp by adding a Lloyd optimization step, as shown hereafter. The program compiles with no error/warning message.
#include <CGAL/Exact_predicates_inexact_constructions_kernel.h>
#include <CGAL/Mesh_triangulation_3.h>
#include <CGAL/Mesh_complex_3_in_triangulation_3.h>
#include <CGAL/Mesh_criteria_3.h>
#include <CGAL/Labeled_mesh_domain_3.h>
#include <CGAL/make_mesh_3.h>
#include <CGAL/Image_3.h>
typedef CGAL::Exact_predicates_inexact_constructions_kernel K;
typedef CGAL::Labeled_mesh_domain_3<K> Mesh_domain;
typedef CGAL::Sequential_tag Concurrency_tag;
typedef CGAL::Mesh_triangulation_3<Mesh_domain,CGAL::Default,Concurrency_tag>::type Tr;
typedef CGAL::Mesh_complex_3_in_triangulation_3<Tr> C3t3;
typedef CGAL::Mesh_criteria_3<Tr> Mesh_criteria;
using namespace CGAL::parameters;
int main(int argc, char* argv[])
{
const char* fname = (argc>1)?argv[1]:"data/liver.inr.gz";
CGAL::Image_3 image;
if(!image.read(fname)){
std::cerr << "Error: Cannot read file " << fname << std::endl;
return EXIT_FAILURE;
}
Mesh_domain domain = Mesh_domain::create_labeled_image_mesh_domain(image);
Mesh_criteria criteria(facet_angle=30, facet_size=6, facet_distance=4,
cell_radius_edge_ratio=3, cell_size=8);
C3t3 c3t3 = CGAL::make_mesh_3<C3t3>(domain, criteria);
// !!! THE FOLLOWING LINE MAKES THE PROGRAM CRASH !!!
CGAL::lloyd_optimize_mesh_3(c3t3, domain, time_limit=30);
std::ofstream medit_file("out.mesh");
c3t3.output_to_medit(medit_file);
return 0;
}
I compile it by using the following CMakeLists.txt file:
# Created by the script cgal_create_CMakeLists
project( executables )
cmake_minimum_required(VERSION 2.8.11)
find_package( CGAL QUIET COMPONENTS )
# !!! I had to add manually the following line !!!
find_package(CGAL COMPONENTS ImageIO)
include( ${CGAL_USE_FILE} )
find_package( Boost REQUIRED )
add_executable( executables lloyd.cpp )
add_to_cached_list( CGAL_EXECUTABLE_TARGETS executables )
target_link_libraries(executables ${CGAL_LIBRARIES} ${CGAL_3RD_PARTY_LIBRARIES} )
No mesh is generated. I obtain the following message:
$ ./build/mesh_3D_image
terminate called after throwing an instance of 'CGAL::Precondition_exception'
what(): CGAL ERROR: precondition violation!
Expr: std::distance(first,last) >= 3
File: /usr/include/CGAL/Mesh_3/Lloyd_move.h
Line: 419
Aborted (core dumped)
Where my code is wrong, and how can I trigger optimizations for meshes generated by 3D images?
actually, when CGAL::make_mesh_3() is called like this :
C3t3 c3t3 = CGAL::make_mesh_3<C3t3>(domain, criteria);
it internally launches CGAL::perturb_mesh_3() and CGAL::exude_mesh_3(). The latest changes the weights of vertices in the Regular triangulation, and should always be called last (see the Warning in the documentation of CGAL::exude_mesh_3().
The only limitation on the order is that exuder should be called last. So you can either call
C3t3 c3t3 = CGAL::make_mesh_3<C3t3>(domain, criteria, lloyd(time_limit=30));
or
C3t3 c3t3 = CGAL::make_mesh_3<C3t3>(domain, criteria, no_exude());
CGAL::lloyd_optimize_mesh_3(c3t3, domain, time_limit = 30);
CGAL::exude_mesh_3(c3t3);
You removed the part:
if(!image.read(fname)){
std::cerr << "Error: Cannot read file " << fname << std::endl;
return EXIT_FAILURE;
}
from the example, which is what actually reads the image from the file.
i try to use Boost unit test framework to test my static library but CMake cannot find Boost in spite of BOOST_ROOT, CMAKE_INCLUDE_PATH, CMAKE_LIBRARY_PATH are set before call to find_package. Actually i've tried a lot of options based on the answers to similar question but still without any success.
cmake version:3.6.1
boost version:1.60.0
Here is link to my project on GitHub
And here is log output from TravisCI
CMakeLists.txt:
cmake_minimum_required(VERSION ${cmake_version})
project(${LibName}_test)
set(Boost_DEBUG ON)
set(Boost_DETAILED_FAILURE_MSG ON)
set(BOOST_ROOT $ENV{BOOST_ROOT})
message(STATUS "BOOST_ROOT >> ${BOOST_ROOT}")
set(BOOST_LIBRARYDIR $ENV{BOOST_ROOT}/lib)
set(BOOST_INCLUDEDIR $ENV{BOOST_ROOT}/include)
enable_testing()
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "$ENV{BOOST_ROOT}/include")
SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} "$ENV{BOOST_ROOT}/lib")
find_package(Boost 1.60.0 EXACT REQUIRED COMPONENTS unit_test_framework)
#if(Boost_FOUND)
include_directories("${LIB_HEADERS}")
add_executable(test_executable
main.cpp
SomeClass_test.cpp
)
target_include_directories(test_executable PRIVATE ${Boost_INCLUDE_DIRS})
target_link_libraries(test_executable
#${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}
Boost::unit_test_framework
${LibName}
)
add_test(NAME test1 COMMAND test_executable)
#else()
# message(STATUS "No Boost library were found!")
#endif()
main.cpp:
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include <iostream>
// initialization function:
bool init_unit_test()
{
return true;
}
// entry point:
int main(int argc, char * argv[]) {
// insert code here...
std::cout << "Boost version: "
<< BOOST_VERSION / 100000 << "." // major version
<< BOOST_VERSION / 100 % 1000 << "." // minor version
<< BOOST_VERSION % 100 // patch level
<< std::endl;
return boost::unit_test::unit_test_main( &init_unit_test, argc, argv );
}
SomeClass_test.cpp:
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include "config.h"
#include "Rectangle.h"
#include "SomeClass_test.hpp"
BOOST_AUTO_TEST_CASE(int_test)
{
int i = 2;
BOOST_TEST(i);
BOOST_TEST(i == 2);
}
BOOST_AUTO_TEST_CASE(area_test)
{
Rectangle rect;
rect.set_values(2, 3);
BOOST_TEST(6 == rect.area());
}
BOOST_AUTO_TEST_CASE(config_test)
{
auto major = SimpleNet::Version::Major;
BOOST_TEST(0 == major);
}
I am compiling the following code with the -ffast-math option:
#include <limits>
#include <cmath>
#include <iostream>
int main() {
std::cout << std::isnan(std::numeric_limits<double>::quiet_NaN() ) << std::endl;
}
I am getting 0 as output. How can my code tell whether a floating point number is NaN when it is compiled with -ffast-math?
Note: On linux, std::isnan works even with -ffast-math.
Since -ffast-math instructs GCC not to handle NaNs, it is expected that isnan() has an undefined behaviour. Returning 0 is therefore valid.
You can use the following fast replacement for isnan():
#if defined __FAST_MATH__
# undef isnan
#endif
#if !defined isnan
# define isnan isnan
# include <stdint.h>
static inline int isnan(float f)
{
union { float f; uint32_t x; } u = { f };
return (u.x << 1) > 0xff000000u;
}
#endif
On linux, the gcc flag -ffast-math breaks isnan(), isinf() and isfinite() - there may be other related functions that are also broken that I have not tested.
The trick of wrapping the function/macro in parentheses also did not work (ie. (isnan)(x))
Removing -ffast-math works ;-)