Is it possible to put a hash in a coffeescript comment - coffeescript

I want to create a comment in coffeescript that transpiles to the following js:
//# This is a comment with an hash
The # in the comment is necessary because of a framework build script (qooxdoo) that uses the hashed comment as a directive. And of course that is a bit tricky as the # is used to demarcate a comment.
How can a put a hash (#) in a coffeescript comment such that the # is transpiled to javascript in a comment?

Found it:
###*
# #This is a comment with a hash
###
At least within a block comment this transpiles to:
/**
* #This is a comment with a hash
*/

Related

Doxygen EXCLUDE_PATTERNS regex

I am attempting to exclude certain files from my doxygen generated documentation. I am using version 1.8.14.
My files come in this naming convention:
/Path2/OtherFile.cs
/Path/DAL.Entity/Source.cs
/Path/DAL.Entity/SourceBase.generated.cs
I want to exclude all files that do NOT end in Base.generated.cs, and are located inside of /Path/.
Since it appears doxygen claims to use regex for the exclude_patterns variable, I eventually came up with this:
.*\\Path\\DAL\..{4,15}\\((?<!Base\.generated).)*
Needless to say, it did not work. Nor did multiple other variations. So far a simple wildcard * is the only regex character I have gotten to actually work.
doxygen uses QRegExp for a lot of things, so I assumed that was the library used for this variable as well, but even several variations of a pattern that that library claims to support did not work; granted apparently that library is full of bugs, but I would expect some things to work.
Does doxygen actually use a regex library for this variable?
If so, which library is it?
In either case, is there a method of achieving my goal?
My conclusion is; No... Doxygen Doxyfile does not support real regex. Even though they claim that it do. It's just standard wildcards that work.
We ended up with a really awkward solution to work around this.
What we did is that we added a macro in our CMakeLists.txt that creates a string with everything we want to include in INPUT instead. Manually excluding the parts we don't want.
The sad part is that CMakes regex also is crippled. So we couldn't use advanced regex such as negative lookahead in LIST(FILTER EXLUDE) similar to LIST(FILTER children EXCLUDE REGEX "^((?!autogen/public).)*$")... So even this solution is not really what we wanted.
Our CMakeLists.txt ended up looking something like this
cmake_minimum_required(VERSION 3.9)
project(documentation_html LANGUAGES CXX)
find_package(Doxygen REQUIRED dot)
# Custom macros
## Macro for getting all relevant directories when creating HTML documentain.
## This was created cause the regex matching in Doxygen and CMake are lacking support for more
## advanced syntax.
MACRO(SUBDIRS result current_dir include_regex)
FILE(GLOB_RECURSE children ${current_dir} ${current_dir}/*)
LIST(FILTER children INCLUDE REGEX "${include_regex}")
SET(dir_list "")
FOREACH(child ${children})
get_filename_component(path ${child} DIRECTORY)
IF(${path} MATCHES ".*autogen/public.*$" OR NOT ${path} MATCHES ".*build.*$") # If we have the /source/build/autogen/public folder available we create the doxygen for those interfaces also.
LIST(APPEND dir_list ${path})
ENDIF()
ENDFOREACH()
LIST(REMOVE_DUPLICATES dir_list)
string(REPLACE ";" " " dirs "${dir_list}")
SET(${result} ${dirs})
ENDMACRO()
SUBDIRS(DOCSDIRS "${CMAKE_SOURCE_DIR}/docs" ".*.plantuml$|.*.puml$|.*.md$|.*.txt$|.*.sty$|.*.tex$|")
SUBDIRS(SOURCEDIRS "${CMAKE_SOURCE_DIR}/source" ".*.cpp$|.*.hpp$|.*.h$|.*.md$")
# Common config
set(DOXYGEN_CONFIG_PATH ${CMAKE_SOURCE_DIR}/docs/doxy_config)
set(DOXYGEN_IN ${DOXYGEN_CONFIG_PATH}/Doxyfile.in)
set(DOXYGEN_IMAGE_PATH ${CMAKE_SOURCE_DIR}/docs)
set(DOXYGEN_PLANTUML_INCLUDE_PATH ${CMAKE_SOURCE_DIR}/docs)
set(DOXYGEN_OUTPUT_DIRECTORY docs)
# HTML config
set(DOXYGEN_INPUT "${DOCSDIRS} ${SOURCEDIRS}")
set(DOXYGEN_EXCLUDE_PATTERNS "*/tests/* */.*/*")
set(DOXYGEN_FILE_PATTERNS "*.cpp *.hpp *.h *.md")
set(DOXYGEN_RECURSIVE NO)
set(DOXYGEN_GENERATE_LATEX NO)
set(DOXYGEN_GENERATE_HTML YES)
set(DOXYGEN_HTML_DYNAMIC_MENUS NO)
configure_file(${DOXYGEN_IN} ${CMAKE_BINARY_DIR}/DoxyHTML #ONLY)
add_custom_target(docs
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/DoxyHTML -d Markdown
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Generating documentation"
VERBATIM)
and in the Doxyfile we added the environment variables for those fields
OUTPUT_DIRECTORY = #DOXYGEN_OUTPUT_DIRECTORY#
INPUT = #DOXYGEN_INPUT#
FILE_PATTERNS = #DOXYGEN_FILE_PATTERNS#
RECURSIVE = #DOXYGEN_RECURSIVE#
EXCLUDE_PATTERNS = #DOXYGEN_EXCLUDE_PATTERNS#
IMAGE_PATH = #DOXYGEN_IMAGE_PATH#
GENERATE_HTML = #DOXYGEN_GENERATE_HTML#
HTML_DYNAMIC_MENUS = #DOXYGEN_HTML_DYNAMIC_MENUS#
GENERATE_LATEX = #DOXYGEN_GENERATE_LATEX#
PLANTUML_INCLUDE_PATH = #DOXYGEN_PLANTUML_INCLUDE_PATH#
After this we can run cd ./build && cmake ../ && make docs to create our html documentation and have it include the autogenerated interfaces in our source folder without including all the other directories in the build folder.
Quick description of what actually happens in the CMakeLists.txt
# Macro that gets all directories from current_dir recursively and returns the result to result as a space separated string
MACRO(SUBDIRS result current_dir include_regex)
# Gets all files recursively from current_dir
FILE(GLOB_RECURSE children ${current_dir} ${current_dir}/*)
# Filter files so we only keep the files that match the include_regex (can't be to advanced regex)
LIST(FILTER children INCLUDE REGEX "${include_regex}")
SET(dir_list "")
# Let us act on all files... :)
FOREACH(child ${children})
# We're only interested in the path. So we get the path part from the file
get_filename_component(path ${child} DIRECTORY)
# Since CMakes regex also is crippled we can't do nice things such as LIST(FILTER children EXCLUDE REGEX "^((?!autogen/public).)*$") which would have been preferred (CMake regex does not understand negative lookahead/lookbehind)... So we ended up with this ugly thing instead... Adding all build/autogen/public paths and not adding any other paths inside build. I guess it would be possible to write this expression in regex without negative lookahead. But I'm both not really fluent in regex (who are... right?) and a bit lazy in this case. We just needed to get this one pointer task done... :P
IF(${path} MATCHES ".*autogen/public.*$" OR NOT ${path} MATCHES ".*build.*$")
LIST(APPEND dir_list ${path})
ENDIF()
ENDFOREACH()
# Remove all duplicates... Since we GLOBed all files there are a lot of them. So this is important or Doxygen INPUT will overflow... I know... I tested...
LIST(REMOVE_DUPLICATES dir_list)
# Convert the dir_list to a space seperated string
string(REPLACE ";" " " dirs "${dir_list}")
# Return the result! Coffee and cinnamon buns for everyone!
SET(${result} ${dirs})
ENDMACRO()
# Get all the pathes that we want to include in our documentation ... this is also where the build folders for the different applications are going to be... with our autogenerated interfaces which we want to keep.
SUBDIRS(SOURCEDIRS "${CMAKE_SOURCE_DIR}/source" ".*.cpp$|.*.hpp$|.*.h$|.*.md$")
# Add the dirs we want to the Doxygen INPUT
set(DOXYGEN_INPUT "${SOURCEDIRS}")
# Normal exlude patterns for stuff we don't want to add. This thing does not support regex... even though it should.
set(DOXYGEN_EXCLUDE_PATTERNS "*/tests/* */.*/*")
# Normal use of the file patterns that we want to keep in the documentation
set(DOXYGEN_FILE_PATTERNS "*.cpp *.hpp *.h *.md")
# IMPORTANT! Since we are creating all the INPUT paths our self we don't want Doxygen to do any recursion for us
set(DOXYGEN_RECURSIVE NO)
# Write the config
configure_file(${DOXYGEN_IN} ${CMAKE_BINARY_DIR}/DoxyHTML #ONLY)
# Create the target that will use that config to create the html documentation
add_custom_target(docs
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/DoxyHTML -d Markdown
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Generating documentation"
VERBATIM)
I know this isn't the answer anyone who stumbles in on this question wants... unfortunately it seems to be the only reasonable solution...
... you all have my deepest condolences...

How to assign particular context to --keyword for proper_name?

When using the xgettext tool it is possible to automatically add commenting to assist translators with regards to proper names (as documented).
The documentation suggests to add the following to the command line:
--keyword='proper_name:1,"This is a proper name. See the gettext manual, section Names."'
Which results in proper names being extracted to the .pot file like this:
#. This is a proper name. See the gettext manual, section Names.
#: ../Foo.cpp:18
msgid "Bob"
msgstr ""
The problem with this; is that no particular context has been defined for that string. Here is ideally how the proper name would be extracted:
#. This is a proper name. See the gettext manual, section Names.
#: ../Foo.cpp:18
msgctxt "Proper Name"
msgid "Bob"
msgstr ""
I've tried the following but with no success:
# Hoping that 0 would be the function name 'proper_name'.
--keyword='proper_name:0c,1,"This is a proper name. See the gettext manual, section Names."'
# Hoping that -1 would be the function name 'proper_name'.
--keyword='proper_name:-1c,1,"This is a proper name. See the gettext manual, section Names."'
# Hoping that the string would be used as the context.
--keyword='proper_name:"Proper Name"c,1,"This is a proper name. See the gettext manual, section Names."'
# Hoping that the string would be used as the context.
--keyword='proper_name:c"Proper Name",1,"This is a proper name. See the gettext manual, section Names."'
Is there a way to force a particular msgctxt to be used for all strings extracted with a keyword (such as proper_name from the example above)?
If there is no option to achieve this with xgettext as-is then I considered perhaps using the following:
--keyword='proper_name:1,"<PROPERNAME>"'
Resulting with:
#. <PROPERNAME>
#: ../Foo.cpp:18
msgid "Bob"
msgstr ""
The problem then becomes; how to automatically translate all occurrences of this in the resulting .pot file into the following:
#. This is a proper name. See the gettext manual, section Names.
#: ../Foo.cpp:18
msgctxt "Proper Name"
msgid "Bob"
msgstr ""
If you want to extract a message context, it has to be part of the argument list. And the numerical part in "Nc" has to be a positive integer. All your attempts with 0, -1 are fruitless, sorry.
The signature of your function must look like this:
#define PROPER_NAME "Proper Name"
const char *proper_name(const char *ctx, const char *name);
And then call it like this:
proper_name(PROPER_NAME, "Bob");
That repeats PROPER_NAME all over the code, but it's the only way to get it into the message context.
Maybe file a feature request?
There is also a hack that achieves the same without changing your source code. I assume that you're using C and the standard Makefile (but you can do the same in other languages):
Copy the file POTFILES to POTFILES-proper-names and add a line ./proper_names.pot to POTFILES.in.
Then you have to create proper_names.pot:
xgettext --files-from=POTFILES-proper-names \
--keyword='' \
--keyword='proper_names:1:"Your comment ..."' \
--output=proper_names.pox
This will now only contain the entries that were maked with "proper_names()". Now add the context:
msg-add-content proper_names.pox "Proper Name" >proper_names.pot
rm proper_names.pot
Unfortunately, there is no program called "msg-add-content". Grab one of the zillion po-parsers out there, and write one yourself (or take mine at the end of this post).
Now, update your PACKAGE.pot as usual. Since "proper_names.pox" is an input file for the main xgettext run, all your extracted proper names with the context added, are added to your pot file (and their context will be used).
Short of another script for adding a message context to all your entries in a .pot file, use this one:
#! /usr/bin/env perl
use strict;
use Locale::PO;
die "usage: $0 POFILE CONTEXT" unless #ARGV == 2;
my ($input, $context) = #ARGV;
my $entries = Locale::PO->load_file_asarray($input) or die "$input: failure";
foreach my $entry (#$entries) {
$entry->msgctxt($context) unless '""' eq $entry->msgid;
print $entry->dump;
}
You have to install the Perl library "Locale::PO" for it, either with "sudo cpan install Locale::PO" or use the pre-built version that your vendor may have.

doxygen: how to output backticks in a code section

i was wondering if it is possible to output backticks whithin a doxygen' code section.
~~~~~~~~~~
for file in `ls dir/*.filter`
do
done
~~~~~~~~~~
I get no output at all. And this seems to be caused by the backtick "`" i've inserted into my code section.
Does anyone had the same issue. Any suggestion?
many thanks
` is used to create an inline code block. Instead, use \code, \endcode rather than a markdown code block.
for example
\code
this is an inline `code block with ` characters
\endcode
renders with the ` characters included.
When a pair of `s is encountered in the code, doxygen will not process whatever is between.
The following will render correctly:
\code
for file in `ls dir/*.filter`
do
done
\endcode

Lexing/Parsing "here" documents

For those that are experts in lexing and parsing... I am attempting to write a series of programs in perl that would parse out IBM mainframe z/OS JCL for a variety of purposes, but am hitting a roadblock in methodology. I am mostly following the lexing/parsing ideology put forth in "Higher Order Perl" by Mark Jason Dominus, but there are some things that I can't quite figure out how to do.
JCL has what's called inline data, which is very similar to "here" documents. I am not quite sure how to lex these into tokens.
The layout for inline data is as follows:
//DDNAME DD *
this is the inline data
this is some more inline data
/*
...
Conventionally, the "*" after the "DD" signifies that following lines are the inline data itself, terminated by either "/*" or the next valid JCL record (starting with "//" in the first 2 columns).
More advanced, the inline data could appear as such:
//DDNAME DD *,DLM=ZZ
//THIS LOOKS LIKE JCL BUT IT'S ACTUALLY DATA
//MORE DATA MASQUERADING AS JCL
ZZ
...
Sometimes the inline data is itself JCL (perhaps to be pumped to a program or the internal reader, whatever).
But here's the rub. In JCL, the records are 80 bytes, fixed in length. Everything past column 72 (cols 73-80) is a "comment". As well, everything following a blank that follows valid JCL is likewise a comment. Since I am looking to manipulate JCL in my programs and spit it back out, I'd like to capture comments so that I can preserve them.
So, here's an example of inline comments in the case of inline data:
//DDNAME DD *,DLM=ZZ THIS IS A COMMENT COL73DAT
data
...
ZZ
...more JCL
I originally thought that I could have my top-most lexer pull in a line of JCL and immediately create a non-token for cols 1-72 and then a token (['COL73COMMENT',$1]) for the column 73 comment, if any. This would then pass downstream to the next iterator/tokenizer a string of the cols 1-72 text followed by the col73 token.
But how would I, downstream from there, grab the inline data? I'd originally figured that the top-most tokenizer could look for a "DD \*(,DLM=(\S*))" (or the like) and then just keep pulling records from the feeding iterator until it hit the delimiter or a valid JCL starter ("//").
But you may see the issue here... I can't have 2 topmost tokenizers... either the tokenizer that looks for COL73 comments must be the top or the tokenizer that gets inline data must be at the top.
I imagine that perl parsers have the same challenge, since seeing
<<DELIM
isn't necessarily the end of the line, followed by the here document data. After all, you could see perl like:
my $this=$obj->ingest(<<DELIM)->reformat();
inline here document data
more data
DELIM
How would the tokenizer/parser know to tokenize the ")->reformat();" and then still grab the following records as-is? In the case of the inline JCL data, those lines are passed as-is, cols 73-80 are NOT comments in that case...
So, any takers on this? I know there will be tons of questions clarifying my needs and I'm happy to clarify as much as is needed.
Thanks in advance for any help...
In this answer I will concentrate on heredocs, because the lessons can be easily transferred to the JCL.
Any language that supports heredocs is not context-free, and thus cannot be parsed with common techniques like recursive descent. We need a way to guide the lexer along more twisted paths, but in doing so, we can maintain the appearance of a context-free language. All we need is another stack.
For the parser, we treat introductions to heredocs <<END as string literals. But the lexer has to be extended to do the following:
When a heredoc introduction is encountered, it adds the terminator to the stack.
When a newline is encountered, the body of the heredoc is lexed, until the stack is empty. After that, normal parsing is resumed.
Take care to update the line number appropriately.
In a hand-written combined parser/lexer, this could be implemented like so:
use strict; use warnings; use 5.010;
my $s = <<'INPUT-END'; pos($s) = 0;
<<A <<B
body 1
A
body 2
B
<<C
body 3
C
INPUT-END
my #strs;
push #strs, parse_line() while pos($s) < length($s);
for my $i (0 .. $#strs) {
say "STRING $i:";
say $strs[$i];
}
sub parse_line {
my #strings;
my #heredocs;
$s =~ /\G\s+/gc;
# get the markers
while ($s =~ /\G<<(\w+)/gc) {
push #strings, '';
push #heredocs, [ \$strings[-1], $1 ];
$s =~ /\G[^\S\n]+/gc; # spaces that are no newlines
}
# lex the EOL
$s =~ /\G\n/gc or die "Newline expected";
# process the deferred heredocs:
while (my $heredoc = shift #heredocs) {
my ($placeholder, $marker) = #$heredoc;
$s =~ /\G(.*\n)$marker\n/sgc or die "Heredoc <<$marker expected";
$$placeholder = $1;
}
return #strings;
}
Output:
STRING 0:
body 1
STRING 1:
body 2
STRING 2:
body 3
The Marpa parser simplifies this a bit by allowing events to be triggered once a certain token is parsed. These are called pauses, because the built-in lexing pauses a moment for you to take over. Here is a high-level overview and a short blogpost describing this technique with the demo code on Github.
In case anyone was wondering how I decided to resolve this, here is what I did.
My main lexing routine accepts an iterator that pumps full lines of text (which can take it from a file, a string, whatever I want). The routine uses that to create another iterator, which examines the line for "comments" after column 72, which it will then return as a "mainline" token followed by a "col72" token. This iterator is then used to create yet another iterator, which passes the col72 tokens through unchanged, but takes the mainline tokens and lexes them into atomic tokens (things like STRING, NUMBER, COMMA, NEWLINE, etc).
But here's the crux... the lexing routine has the ORIGINAL ITERATOR still... so when it receives a token that indicates there is a "here" document, it continues processing tokens until it hits a NEWLINE token (meaning end of the actual line of text) and then uses the original iterator to pull off the here document data. Since that iterator feeds the atomic tokens iterator, pulling from it then prevents those lines from being atomized.
To illustrate, think of iterators like hoses. The first hose is the main iterator. To that I attach the col72 iterator hose, and to that I attach the atomic tokenizer hose. As streams of characters go in the first hose, atomized tokens come out the end of the third hose. But I can attach a 2-way nozzle to the first hose that will allow its output to come out the alternate nozzle, preventing that data from going into the second hose (and hence the third hose). When I'm done diverting the data through the alternate nozzle, I can turn that off and then data begins flowing through the second and third hoses again.
Easy-peasey.

lex - Removing "/*" also removes internal stars

I'm trying to pull comments out of a c file. But my code pulls out all stars instead of just /* and */. Can anyone help?
Input /**A**/ or /***/
Desired Output *A* and *
My Output *A and nothing
Code
"/*" /* comment */ BEGIN(Comment);
<Comment>{
[^*] /* not a '*' */ ECHO;
"*"+[^/] /* '*'s not followed by '/' */ ECHO;
"*"+"/" /* end of Comment */ BEGIN(INITIAL);
}
Change your last two patterns to
"*"+/[^/]
"*/"
Your last pattern explicitly takes every * at the end of the comment out of the comment. If you only change the last rule, then it will not recognize the end of the comment of for example /***/, because /* will start the comment, then ** is matched by the one but last pattern and the / is matched by [^*].
"*"+/[^/] matches all sequences of * followed by anything but a /, but not consuming the character that follows. This is necessary as this could be the * of the */ closing the comment.
This regex matches non-nesting C comments:
"/*"([^*]|[*]*[^*/])*"*"+"/"
Here is a complete Lex program which strips C comments from input, replacing each one with a space.
%%
"/*"([^*]|[*]*[^*/])*"*"+"/" putc(' ', yyout);
%%
However, this fails to provide helpful diagnostics. For instance if something like /* /* */ occurs, it's nice to generate a warning about a suspicious looking start of a comment within a comment. Also, if a comment is unterminated, it's useful to detect that and produce a diagnostic about were that diagnostic started.
For these reasons, it may be best to handle C comments by recognizing just the /* sequence and then taking over with a custom piece of code that reads characters from the yyin stream and recognizes the rest of the comment.