Standalone CMake script to cut off file contents by delimiters - cmake

I have a project where one repeatable task to do involves manipulating files' contents.
Until now I used a Python script for it, but recently I discovered I can use standalone CMake scripts ("standalone" here means they can be invoked outside of configure/build/test/etc. workflow). As my project already uses CMake for project management I concluded I can save others' problem of installing a Python interpreter (welcome Windows users!) and use CMake project-wide.
Part of my script needs to read a file and cut off everything that appears before "[START-HERE]" and after "[END-HERE]" lines. I am stuck with that part and don't know how to implement it. How can it be done?

You could combine file(READ) with if(MATCHES) to accompilish this. The former is used to read the file, the latter allows you to check for the occurance of a regular expression and to extract a capturing group:
foo.cmake
#[===[
Params:
INPUT_FILE : the path to the file to read
#]===]
file(READ "${INPUT_FILE}" FILE_CONTENTS)
if (FILE_CONTENTS MATCHES "(^|[\r\n])\\[START-HERE\\][\r\n]+(.*)[\r\n]+\\[END-HERE\\]")
# todo: use extracted match stored in CMAKE_MATCH_2 for your own logic
message("Content: '${CMAKE_MATCH_2}'")
else()
message(FATAL_ERROR "[START-HERE]...[END-HERE] doesn't occur in the input file '${INPUT_FILE}'")
endif()
foo.txt
Definetly not
[START-HERE]
working
[END-HERE]
Try again!
Output:
> cmake -D INPUT_FILE=foo.txt -P foo.cmake
Content: 'working'

For the part where you are stuck, here's one approach using the string, file, and math commands:
file(READ data.txt file_str)
string(FIND "${file_str}" "[START-HERE]" start_offset)
# message("${start_offset}")
math(EXPR start_offset "${start_offset}+12")
# message("${start_offset}")
string(FIND "${file_str}" "[END-HERE]" end_offset)
math(EXPR substr_len "${end_offset}-${start_offset}")
# message("${substr_len}")
string(SUBSTRING "${file_str}" "${start_offset}" "${substr_len}" trimmed_str)
# message("${trimmed_str}")
You could also probably do it by using the file(STRINGS) command, which reads lines of a file into an array, and then use the list(FIND) command. The approach shown above has the advantage of working if your delimiters are not on their own lines.
As #fabian shows in their answer post, you can also do this using a regular expression with if(MATCHES) like this:
file(READ "${INPUT_FILE}" FILE_CONTENTS)
if (FILE_CONTENTS MATCHES "(^|[\r\n])\\[START-HERE\\][\r\n]+(.*)[\r\n]+\\[END-HERE\\]")
# todo: use extracted match stored in CMAKE_MATCH_2 for your own logic
message("Content: '${CMAKE_MATCH_2}'")
else()
message(FATAL_ERROR "[START-HERE]...[END-HERE] doesn't occur in the input file '${INPUT_FILE}'")
endif()

Related

Dealing with lots and lots of escaped characters in add_custom_command

I have a file that contains a bunch of data. I want to turn it into a C++ string literal, because I need to compile this data into the binary - I cannot read it from disk.
One way of doing this is to just generate a C++ source file that declares a string literal with a known name. The CMake code to do this is straightforward, if somewhat awful:
function(make_literal_from_file dest_file source_file literal_name)
add_custom_command(
OUTPUT ${dest_file}
COMMAND printf \'char const* ${literal_name} = R\"\#\(\' > ${dest_file}
COMMAND cat ${source_file} >> ${dest_file}
COMMAND printf \'\)\#\"\;\' >> ${dest_file}
DEPENDS ${source_file})
endfunction()
This works and does what I want (printf is necessary to avoid a new line after the raw string introducer). However, the amount of escaping going on here makes it very difficult to see what's going on. Is there a way to write this function such that it's actually readable?
Note that I cannot use a file(READ ...)/configure_file(...) combo here because source_file could be something that is generated by CMake at build time and so may not be present at configuration time.
I would recommend writing a script to do this. You could write it in CMake, but I personally prefer a better language such as Python:
# Untested, just to show roughly how to do it
import sys
dest_file, source_file, literal_name = sys.argv[1:]
with open(dest_file) as dest, open(source_file) as source:
literal_contents = source.read()
dest.write(f'char const* {literal_name} = R"({literal_contents})";\n')
Corresponding CMake code:
# String interpolation came in Python 3.6, thus the requirement on 3.6.
# If using CMake < 3.12, use find_package(PythonInterp) instead.
find_package(Python3 3.6 COMPONENTS Interpreter)
# Make sure this resolves correctly. ${CMAKE_CURRENT_LIST_DIR} is helpful;
# it's the directory containing the current file (this cmake file)
set(make_literal_from_file_script "path/to/make_literal_from_file.py")
function(make_literal_from_file dest_file source_file literal_name)
add_custom_command(
OUTPUT "${dest_file}"
COMMAND
"${Python3_EXECUTABLE}" "${make_literal_from_file_script}"
"${dest_file}"
"${source_file}"
"${literal_name}"
DEPENDS "${source_file}")
endfunction()
If you don't want the dependency on Python, you could use C++ (only the CMake code shown):
add_executable(make_literal_from_file_exe
path/to/cpp/file.cpp
)
function(make_literal_from_file dest_file source_file literal_name)
add_custom_command(
OUTPUT "${dest_file}"
COMMAND
make_literal_from_file_exe
"${dest_file}"
"${source_file}"
"${literal_name}"
DEPENDS "${source_file}")
endfunction()

cmake - preset settings for entries from ini file

I have a project that uses some third party libraries. So each time I setup this project with CMake, I have to set each entry (path of the third party library) on the GUI of CMake. I improve this by making CMake script guess the path by this script (learn this technique from OGRE):
# Guess the paths.
set( OGRE_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/Dependencies/Ogre" CACHE STRING "Path to OGRE source code (see http://www.ogre3d.org/tikiwiki/tiki-index.php?page=CMake+Quick+Start+Guide)" )
So each time I setup with CMake, it will automatic fill the entry OGRE_SOURCE. But that doesn't enough. If the Ogre source is not in the path
"${CMAKE_CURRENT_SOURCE_DIR}/Dependencies/Ogre"
, then I have to open and edit the CMake script or I have to edit the entry on the GUI of CMake. I find that pretty inconvenient, especially when you link to a lot of third party libraries.
So I want to use another technique: preset settings for entries from file - CMake reads the presets from file PresetEntries.txt (that I make) and apply the these presets on the entries (It's a lot quicker to edit the path in text file than on the GUI of CMake).
Here my idea about this preset file: PresetEntries.txt
OGRE_SOURCE=E:/Source/ogre
I found that CMake can read a text file, but if I use this, I have to do string manipulations.
CMake has the file CMakeCache.txt to save the settings on the CMake GUI, but I want it to be simple: it should only has the preset settings that need to be pre-set.
So I wonder if CMake support this preset settings for entries from file.
Edit:
So I read this question and see that CMake can set config from file, but this require to fire cmake with the -C mysettings.cmake, but I wanna it to be automatically with CMake GUI - just edit the file and hit generate button in CMake GUI. So I wanna make this question more specific:
In my CMakeLists.txt should have script like this:
# Guess the paths.
#I wanna have this function from C++
#https://msdn.microsoft.com/en-us/library/windows/desktop/ms724353%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
GetPrivateProfileString("OGRE", #lpAppName
"OGRE_SOURCE", #lpKeyName
"${CMAKE_CURRENT_SOURCE_DIR}/Dependencies/Ogre", #lpDefault
OGRE_SOURCE_VAR,#lpReturnedString
MAX_PATH, #nSize, may be can reduce this variable
"LibPath.ini") #lpFileName
set( OGRE_SOURCE "${OGRE_SOURCE_VAR}" CACHE STRING "Path to OGRE source code" )
In the file LibPath.ini
[OGRE]
OGRE_SOURCE = "E:/Source/ogre"
So the user can choose to either use the ini file or not.
I don't know if there any way I can use a function that similar to function GetPrivateProfileString (of C++) in CMake.
Thanks for reading
The external libraries should be included by one of the following commands
find_package(ttnlib REQUIRED HINTS /usr/local/lib/cmake)
include_directories(${ttnlib_INCLUDE_DIR})
set(EXTRA_LIBS ${EXTRA_LIBS} ${TTNLIB_LIBRARY})
or
find_library(MY_EXTERNAL_LIB name COOLSTUFF libCOOLSTUFF libCOOLSTUF.so hints /usr/local/lib)
The search for the external packages and libraries should only be necessary for the first run of cmake.
I can't find the function to read the ini file, so what I can do is create a simple function that read simple txt file for myself.
I declare the function in 1 file for other file use it
"\CMake\Dependencies\CommonFunc.cmake"
#------------Define function Read file------------
macro( readSettingFile KEY DEFAULT_RESULT STRING_RESULT_OUT)
unset(STRING_RESULT)
# Read the file
file( READ "${CMAKE_SOURCE_DIR}/LibPath.txt" LIB_PATH_STR )
# Set the variable "Esc" to the ASCII value 27 - basically something
# which is unlikely to conflict with anything in the file contents.
string(ASCII 27 Esc)
# Turn the contents into a list of strings, each ending with an Esc.
# This allows us to preserve blank lines in the file since CMake
# automatically prunes empty list items during a foreach loop.
string(REGEX REPLACE "\n" "${Esc};" LIB_PATH_LINES "${LIB_PATH_STR}")
foreach(LINE ${LIB_PATH_LINES})
if("${LINE}" MATCHES "${KEY}")
#remove the key, leave the content untouch
string(REPLACE "${KEY}" "" STRING_RESULT ${LINE})
# Swap the appended Esc character back out in favour of a line feed
string(REGEX REPLACE "${Esc}" "" STRING_RESULT ${STRING_RESULT})
endif()
endforeach()
if("${STRING_RESULT}" STREQUAL "")
set( STRING_RESULT ${DEFAULT_RESULT} )
endif()
#message( STATIC "---GTA Sa-----" "[${STRING_RESULT}]" )
endmacro()
(I need the help from this answer to write this function :p)
Here is how I use
For example: "\CMake\Dependencies\Ogre.cmake"
#include common functions
include( CMake/Dependencies/CommonFunc.cmake )
#---------------Guess the paths.----------------------
#----Set OGRE_SOURCE
readSettingFile( "OGRE_SOURCE="
"E:/Source/ogre"
STRING_RESULT
)
set( OGRE_SOURCE "${STRING_RESULT}" CACHE STRING "Path to OGRE Source" )
#----Set OGRE_BINARIES
readSettingFile( "OGRE_BINARIES="
"E:/Source/_build/ogre"
STRING_RESULT
)
set( OGRE_BINARIES "${STRING_RESULT}" CACHE STRING "Path to OGRE's build folder generated by CMake" )
Here is the setting file
"\LibPath.txt"
OGRE_SOURCE=E:/Source/ogre
OGRE_BINARIES=E:/Source/_build/ogre

In CMake how do I deal with generated source files which number and names are not known before?

Imagine a code generator which reads an input file (say a UML class diagram) and produces an arbitrary number of source files which I want to be handled in my project. (to draw a simple picture let's assume the code generator just produces .cpp files).
The problem is now the number of files generated depends on the input file and thus is not known when writing the CMakeLists.txt file or even in CMakes configure step. E.g.:
>>> code-gen uml.xml
generate class1.cpp..
generate class2.cpp..
generate class3.cpp..
What's the recommended way to handle generated files in such a case? You could use FILE(GLOB.. ) to collect the file names after running code-gen the first time but this is discouraged because CMake would not know any files on the first run and later it would not recognize when the number of files changes.
I could think of some approaches but I don't know if CMake covers them, e.g.:
(somehow) define a dependency from an input file (uml.xml in my example) to a variable (list with generated file names)
in case the code generator can be convinced to tell which files it generates the output of code-gen could be used to create a list of input file names. (would lead to similar problems but at least I would not have to use GLOB which might collect old files)
just define a custom target which runs the code generator and handles the output files without CMake (don't like this option)
Update: This question targets a similar problem but just asks how to glob generated files which does not address how to re-configure when the input file changes.
Together with Tsyvarev's answer and some more googling I came up with the following CMakeList.txt which does what I want:
project(generated)
cmake_minimum_required(VERSION 3.6)
set(IN_FILE "${CMAKE_SOURCE_DIR}/input.txt")
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${IN_FILE}")
execute_process(
COMMAND python3 "${CMAKE_SOURCE_DIR}/code-gen" "${IN_FILE}"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
INPUT_FILE "${IN_FILE}"
OUTPUT_VARIABLE GENERATED_FILES
OUTPUT_STRIP_TRAILING_WHITESPACE
)
add_executable(generated main.cpp ${GENERATED_FILES})
It turns an input file (input.txt) into output files using code-gen and compiles them.
execute_process is being executed in the configure step and the set_property() command makes sure CMake is being re-run when the input file changes.
Note: in this example the code-generator must print a CMake-friendly list on stdout which is nice if you can modify the code generator. FILE(GLOB..) would do the trick too but this would for sure lead to problems (e.g. old generated files being compiled, too, colleagues complaining about your code etc.)
PS: I don't like to answer my own questions - If you come up with a nicer or cleaner solution in the next couple of days I'll take yours!

How to tell a test under cmake/ctest where to find its input data ... without changing hard-coded file names

Directory prj/test contains some test scripts t01.exe, t02.exe etc. Some of them need input data d01.dat etc, also provided in prj/test. The names of these data files are hard-coded in the tests, and I cannot easily change this. The control file CMakeLists.txt contains
enable_testing()
file(GLOB test_sources "t*")
foreach(test_src ${test_sources})
string(REGEX REPLACE ".*/" "" test_name "${test_src}")
string(REGEX REPLACE ".exe$" "" test_name "${test_name}")
add_test(${test_name} "${test_src}")
endforeach(test_src)
I'm building the project in a subdirectory prj/build. ctest works fine ... until a test requires input data. Obviously, they are not found because they reside in prj/test whereas the test runs in prj/build/test.
Hence my questions:
What's the standard way to let the tests find their input data?
Is there a way that does not require copying the data (in case they are huge)?
True that symlinks don't work under Windows, and therefore are no acceptable solution?
add_test command accepts WORKING_DIRECTORY option. You can set this option to a directory where a test is located. So, the test will find its data file:
add_test(NAME ${test_name} COMMAND "${test_src}"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)
The variables ${CMAKE_CURRENT_SOURCE_DIR} and ${CMAKE_SOURCE_DIR} are helpful. The first one is the source directory to the current binary location. The latter is the root to the top level of the source tree.
Supposed you have an input file at prj/test/inputfile you could get the path to it with${CMAKE_CURRENT_SOURCE_DIR}/inputfile. You can pass the path as an argument to your test.

CMake - copying/merging files with dependencies a change-checking

I have a big project with one executable, some plugins and web interface with some generated JSONs.
Therefore, after I compile executables and .so plugins, I'm doing following:
Merge all .js files into one big
Compile "generators" (set of macros and printfs to describe some structures in C++ code)
Run generators and generate JSON files (with some sed and jshon) processing
In install phase, and copy all of this and some other files to their destination directories (which should be created if doesn't exists).
But I don't know, how to use CMake to make correct dependencies and date-time checking. Actually, first step is made with:
FILE(GLOB WEB_INPUT_JS *.js)
FILE(WRITE scripts.js.tmp "")
FOREACH(SCRIPTFILE ${WEB_INPUT_JS})
FILE(READ ${SCRIPTFILE} CONTENTS)
FILE(APPEND scripts.js.tmp "${CONTENTS}")
ENDFOREACH()
CONFIGURE_FILE(scripts.js.tmp ${WEB_BUILD_PATH}/scripts.js COPYONLY)
But this doesn't create dependency in makefiles. I want to re-run this piece of "code", when some of ${WEB_INPUT_JS} files has been changed or ${WEB_BUILD_PATH}/scripts.js has been deleted.
Third step is made with series of
add_custom_command(TARGET gen_somedata POST_BUILD COMMAND gen_somedata | sed ${JSON_SED} | jshon > ${JSON_BUILD_PATH}/somedata.json)
install (FILES ${JSON_BUILD_PATH}/somedata.json ......nextfiles.... DESTINATION ${JSON_OUTPUT_PATH})
How is this done? Thanks much for your answers!
So I've finally found out, how to do some of this things.
Merging files is pretty tricky.
First, cmake "script" doing merging is needed (I will explain some lines later). I will name it "concat.cmake":
FUNCTION(CONCAT_FILES OUTPUT FILELIST)
FILE(WRITE ${OUTPUT} "")
FOREACH(SCRIPTFILE ${FILELIST})
FILE(READ ${SCRIPTFILE} CONTENTS)
FILE(APPEND ${OUTPUT} "${CONTENTS}")
ENDFOREACH()
ENDFUNCTION(CONCAT_FILES)
STRING(REPLACE "," ";" FILELIST ${FILELIST})
CONCAT_FILES(${OUTPUT} "${FILELIST}")
Then, when merging script is used as follows (write it into CMakeLists.txt):
1) First, make an file list (using globbing or by writing file list by hand).
FILE(GLOB INPUT_FILES_LIST *.js) # get list of JS files
2) The only way, how to pass a cmake list to other cmake script is creating file list separated by comma, then passing comma-separated list to external script. I've done this following way:
SET(FILELIST "")
FOREACH(ITEM ${INPUT_FILES_LIST})
SET(FILELIST "${FILELIST},${ITEM}") # append list item by ','
ENDFOREACH()
STRING(SUBSTRING ${JSFILES} 1 -1 JSFILES) # remove first ','
3) Now it's not problem to call merging script..
add_custom_command(OUTPUT some_output_file.ext
COMMAND ${CMAKE_COMMAND} -DFILELIST=${FILELIST} -DOUTPUT=some_output_frile.ext -P ${CMAKE_CURRENT_SOURCE_DIR}/concat.cmake
DEPENDS ${INPUT_FILES_LIST} VERBATIM )
The precedent code will correctly track changes in input files and output file will be generated when missing or input changes. Installation is just easy as
INSTALL (FILES "output.ext" DESTINATION /usr/share/...)