Add tdemarkdown part - embeddable lightweight markdown viewing component.

TDEMarkdown is based on the md4c library and using TDEHTML for rendering
its output. For enhanced safety, on HTML widget is turned off everything
we don't need for viewing. It integrates nicely into Konqueror and
supports both Commonmark and GitHub markdown syntaxes.

Signed-off-by: Mavridis Philippe <mavridisf@gmail.com>

Prepare to merge tdemarkdown into tdelibs.

Signed-off-by: Slávek Banko <slavek.banko@axis.cz>
pull/158/head
Mavridis Philippe 2 years ago committed by Slávek Banko
parent a291f3a0a3
commit 95279fbf6d
No known key found for this signature in database
GPG Key ID: 608F5293A04BE668

@ -120,6 +120,7 @@ tde_l10n_create_template(
"^tdecore/tdeconfig_compiler/example/"
"^tdeio/"
"^tdeioslave/"
"^tdemarkdown/md4c/"
"^tdeprint/"
"^tdeui/colors/"
"^tdestyles/.*/config/"

@ -1443,6 +1443,7 @@ add_subdirectory( kate )
add_subdirectory( tdecert )
tde_conditional_add_subdirectory( WITH_XRANDR tderandr )
add_subdirectory( tdehtml )
add_subdirectory( tdemarkdown )
add_subdirectory( tdecmshell )
add_subdirectory( tdeconf_update )
add_subdirectory( tdewidgets )

@ -21,6 +21,7 @@ tde_create_translated_desktop(
x-bibtex.desktop rdf.desktop rss.desktop calendar.desktop x-adasrc.desktop x-perl.desktop
x-csv.desktop x-latex.desktop x-xslfo.desktop x-xslt.desktop vnd.wap.wml.desktop x-katefilelist.desktop
docbook.desktop x-mswinurl.desktop x-hex.desktop vnd.abc.desktop javascript.desktop x-python.desktop
markdown.desktop
DESTINATION ${MIME_INSTALL_DIR}/text
PO_DIR mimetypes
)

@ -0,0 +1,11 @@
[Desktop Entry]
Comment=Markdown document
Type=MimeType
MimeType=text/markdown
Icon=text-x-generic
Patterns=*.md;
DefaultApp=konqueror
[Property::X-TDE-text]
Type=bool
Value=true

@ -0,0 +1,8 @@
##### create translation templates ##############
tde_l10n_create_template(
CATALOG "desktop_files/tdemarkdown-desktops/"
SOURCES *.desktop
EXCLUDES "^md4c/"
DESTINATION "${CMAKE_SOURCE_DIR}/translations"
)

@ -0,0 +1,67 @@
###########################################
# #
# Copyright (C) 2022 Mavridis Philippe #
# <mavridisf@gmail.com> #
# #
# Improvements and feedback are welcome #
# #
# Released under GNU GPL v2 or greater. #
# #
###########################################
### Header and library directories ########
#
include_directories(
${CMAKE_SOURCE_DIR}
${CMAKE_BINARY_DIR}
${CMAKE_SOURCE_DIR}/tdecore
${CMAKE_BINARY_DIR}/tdecore
${CMAKE_SOURCE_DIR}/tdehtml
${CMAKE_SOURCE_DIR}/tdeui
${CMAKE_SOURCE_DIR}/tdeutils
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/md4c/src
)
include_directories( SYSTEM
${TQT_INCLUDE_DIRS}
${TDE_INCLUDE_DIR}
)
link_directories(
${TQT_LIBRARY_DIRS}
${TDE_LIB_DIR}
)
### Build libtdemarkdown (kpart) ###########
#
tde_add_kpart( libtdemarkdown
AUTOMOC
SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/md4c/src/entity.c
${CMAKE_CURRENT_SOURCE_DIR}/md4c/src/md4c.c
${CMAKE_CURRENT_SOURCE_DIR}/md4c/src/md4c-html.c
markdown_part.cpp
LINK
tdeparts-shared tdehtml-shared
DESTINATION ${PLUGIN_INSTALL_DIR}
)
### Install part .desktop entry #############
#
tde_create_translated_desktop(
SOURCE markdown_part.desktop
DESTINATION ${SERVICES_INSTALL_DIR}
PO_DIR tdemarkdown-desktops
)
### Install XML-GUI #########################
#
install(
FILES markdown_part.rc
DESTINATION ${DATA_INSTALL_DIR}/tdemarkdown
)

@ -0,0 +1,117 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too.
When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and modification follow.
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program.
You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License.
7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found.
one line to give the program's name and an idea of what it does. Copyright (C) yyyy name of author
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker.
signature of Ty Coon, 1 April 1989 Ty Coon, President of Vice

@ -0,0 +1,14 @@
# TDEMarkdown
TDEMarkdown is a lightweight markdown viewer for Trinity based on the [md4c](https://www.github.com/mity/md4c) library and using TDEHTML for rendering its output.
It integrates nicely into Konqueror and supports both Commonmark and GitHub markdown syntaxes.
## Contributing
If you wish to contribute to TDEMarkdown:
- You can **report a bug, request a feature or contribute code** via the [TDE Gitea Workspace (TGW)](https://mirror.git.trinitydesktop.org/gitea)
- You can **contribute translations** via the [TDE Weblate Translation Workspace (TWTW)](https://mirror.git.trinitydesktop.org/weblate)

@ -0,0 +1,133 @@
/***************************************************************************
* Markdown Viewer part *
* Copyright (c) 2022 Mavridis Philippe <mavridisf@gmail.com> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
***************************************************************************/
#include <tqbuffer.h>
#include <tqfile.h>
#include <tdeparts/genericfactory.h>
#include <kstandarddirs.h>
#include <tdehtmlview.h>
/* MD4C-HTML */
#include <md4c-html.h>
#include "markdown_part.h"
typedef KParts::GenericFactory<MarkdownPart> Factory;
K_EXPORT_COMPONENT_FACTORY(libtdemarkdown, Factory)
MarkdownPart::MarkdownPart(TQWidget* parentWidget, const char* widgetName,
TQObject* parent, const char* name, const TQStringList& args)
: TDEHTMLPart(parentWidget, name = "TDEMarkdown")
{
setInstance(Factory::instance());
/* Features */
setJScriptEnabled(false);
setJavaEnabled(false);
setMetaRefreshEnabled(false);
setPluginsEnabled(false);
setAutoloadImages(true);
setXMLFile( locate("data", "libtdemarkdown/markdown_part.rc") );
}
MarkdownPart::~MarkdownPart()
{
}
TDEAboutData* MarkdownPart::createAboutData()
{
TDEAboutData* aboutData = new TDEAboutData(
"tdemarkdown", I18N_NOOP("TDE Markdown Viewer"), "1.0",
I18N_NOOP("TDEMarkdown is an embeddable viewer for Markdown documents."),
TDEAboutData::License_GPL_V2, "© 2022 Mavridis Philippe"
);
aboutData->addAuthor("Mavridis Philippe (blu.256)", I18N_NOOP("Developer"), "mavridisf@gmail.com");
return aboutData;
}
bool MarkdownPart::openURL(const KURL& u)
{
if(u.isLocalFile())
{
TQFile local(u.path());
if(!local.open(IO_ReadOnly))
{
return false;
}
TQByteArray data = local.readAll();
local.close();
if(!data.isNull())
{
begin(u);
TQString parsed(parse((MD_CHAR*) data.data()));
write(parsed);
end();
}
}
emit started(0L);
return true;
}
TQString& MarkdownPart::parse(MD_CHAR* document)
{
m_buffer = "<!DOCTYPE html>\n";
m_buffer += "<html>\n";
m_buffer += " <head>\n";
m_buffer += " <meta charset='utf-8'>\n";
m_buffer += " <title>TODO</title>\n";
m_buffer += " </head>\n";
m_buffer += " <body>\n";
TQByteArray data;
int success = md_html(document,
MD_SIZE(strlen(document)),
&MarkdownPart::processHTML,
&data,
MD_DIALECT_GITHUB | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS
| MD_FLAG_LATEXMATHSPANS | MD_FLAG_PERMISSIVEATXHEADERS | MD_FLAG_UNDERLINE | MD_FLAG_TASKLISTS,
0);
if (success == -1)
{
m_buffer += TQString("<b>%1</b>").arg(i18n("Error: malformed document."));
}
else
{
m_buffer += TQString::fromLocal8Bit(data);
}
m_buffer += " </body>\n";
m_buffer += "</html>\n";
return m_buffer;
}
void MarkdownPart::processHTML(const MD_CHAR* data, MD_SIZE data_size, void* user_data)
{
TQByteArray *ud = static_cast<TQByteArray*>(user_data);
TQBuffer buff(*ud);
if (data_size > 0)
{
buff.open(IO_WriteOnly | IO_Append);
buff.writeBlock(data, (int)data_size);
buff.close();
}
}
#include "markdown_part.moc"

@ -0,0 +1,10 @@
[Desktop Entry]
Name=Markdown Viewer
Comment=Embeddable lightweight markdown viewing component
Type=Service
Icon=text-x-generic
MimeType=text/markdown
X-TDE-Library=libtdemarkdown
X-TDE-ServiceTypes=KParts/ReadOnlyPart

@ -0,0 +1,47 @@
/***************************************************************************
* Markdown Viewer part *
* Copyright (c) 2022 Mavridis Philippe <mavridisf@gmail.com> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
***************************************************************************/
#ifndef __MARKDOWN_PART_H
#define __MARKDOWN_PART_H
#include <tqwidget.h>
#include <tdehtml_part.h>
class TDEHTMLPart;
class MarkdownPart : public TDEHTMLPart
{
TQ_OBJECT
public:
MarkdownPart(TQWidget* parentWidget, const char* widgetName,
TQObject* parent, const char* name, const TQStringList& args);
~MarkdownPart();
/* Create and return About data */
static TDEAboutData* createAboutData();
/* Implemented virtual from TDEHTMLPart */
bool openURL(const KURL& u);
/* Parser */
TQString& parse(MD_CHAR* document);
private:
TQString m_buffer;
static void processHTML(const MD_CHAR* data, MD_SIZE data_size, void* userData);
};
#endif // __MARKDOWN_PART_H

@ -0,0 +1,4 @@
<!DOCTYPE kpartgui SYSTEM "kpartgui.dtd">
<kpartgui name="MarkdownPart" version="1">
<StatusBar/>
</kpartgui>

@ -0,0 +1,442 @@
# MD4C Change Log
## Next Version (Work in Progress)
Changes:
* Changes mandated by CommonMark specification 0.30.
Actually there are only very minor changes to recognition of HTML blocks:
- The tag `<textarea>` now triggers HTML block (of type 1 as per the
specification).
- HTML declaration (HTML block type 4) is not required to begin with an
upper-case ASCII character after the `<!`. Any ASCII character is now
allowed.
Other than that, the newest specification mainly improves test coverage and
clarifies its wording in some cases, without affecting the implementation.
Refer to [CommonMark
0.30 notes](https://github.com/commonmark/commonmark-spec/releases/tag/0.30)
for more info.
Fixes:
* [#163](https://github.com/mity/md4c/issues/163):
Make HTML renderer to emit `'\n'` after the root tag when in the XHTML mode.
* [#165](https://github.com/mity/md4c/issues/165):
Make HTML renderer not to percent-encode `'~'` in URLs. Although it does
work, it's not needed, and it can actually be confusing with URLs such as
`http://www.example.com/~johndoe/`.
* [#167](https://github.com/mity/md4c/issues/167),
[#168](https://github.com/mity/md4c/issues/168):
Fix multiple instances of various buffer overflow bugs, found mostly using
a fuzz testing. Contributed by [dtldarek](https://github.com/dtldarek) and
[Thierry Coppey](https://github.com/TCKnet).
* [#169](https://github.com/mity/md4c/issues/169):
Table underline now does not require 3 characters per table column anymore.
One dash (optionally with a leading or tailing `:` appended or prepended)
is now sufficient. This improves compatibility with the GFM.
* [#172](https://github.com/mity/md4c/issues/172):
Fix quadratic time behavior caused by unnecessary lookup for link reference
definition even if the potential label contains nested brackets.
* [#173](https://github.com/mity/md4c/issues/173),
[#174](https://github.com/mity/md4c/issues/174):
Multiple bugs identified with [OSS-Fuzz](https://github.com/google/oss-fuzz)
were fixed.
## Version 0.4.8
Fixes:
* [#149](https://github.com/mity/md4c/issues/149):
A HTML block started in a container block (and not explicitly finished in
the block) could eat 1 line of actual contents.
* [#150](https://github.com/mity/md4c/issues/150):
Fix md2html utility to output proper DOCTYPE and HTML tags when `--full-html`
command line options is used, accordingly to the expected output format
(HTML or XHTML).
* [#152](https://github.com/mity/md4c/issues/152):
Suppress recognition of a permissive autolink if it would otherwise form a
complete body of an outer inline link.
* [#153](https://github.com/mity/md4c/issues/153),
[#154](https://github.com/mity/md4c/issues/154):
Set `MD_BLOCK_UL_DETAIL::mark` and `MD_BLOCK_OL_DETAIL::mark_delimiter`
correctly, even when the blocks are nested at the same line in a complicated
ways.
* [#155](https://github.com/mity/md4c/issues/155):
Avoid reading 1 character beyond the input size in some complex cases.
## Version 0.4.7
Changes:
* Add `MD_TABLE_DETAIL` structure into the API. The structure describes column
count and row count of the table, and pointer to it is passed into the
application-provided block callback with the `MD_BLOCK_TABLE` block type.
Fixes:
* [#131](https://github.com/mity/md4c/issues/131):
Fix handling of a reference image nested in a reference link.
* [#135](https://github.com/mity/md4c/issues/135):
Handle unmatched parenthesis pairs inside a permissive URL and WWW auto-links
in a way more compatible with the GFM.
* [#138](https://github.com/mity/md4c/issues/138):
The tag `<tbody></tbody>` is now suppressed whenever the table has zero body
rows.
* [#139](https://github.com/mity/md4c/issues/139):
Recognize a list item mark even when EOF follows it.
* [#142](https://github.com/mity/md4c/issues/142):
Fix reference link definition label matching in a case when the label ends
with a Unicode character with non-trivial case folding mapping.
## Version 0.4.6
Fixes:
* [#130](https://github.com/mity/md4c/issues/130):
Fix `ISANYOF` macro, which could provide unexpected results when encountering
zero byte in the input text; in some cases leading to broken internal state
of the parser.
The bug could result in denial of service and possibly also to other security
implications. Applications are advised to update to 0.4.6.
## Version 0.4.5
Fixes:
* [#118](https://github.com/mity/md4c/issues/118):
Fix HTML renderer's `MD_HTML_FLAG_VERBATIM_ENTITIES` flag, exposed in the
`md2html` utility via `--fverbatim-entities`.
* [#124](https://github.com/mity/md4c/issues/124):
Fix handling of indentation of 16 or more spaces in the fenced code blocks.
## Version 0.4.4
Changes:
* Make Unicode-specific code compliant to Unicode 13.0.
New features:
* The HTML renderer, developed originally as the heart of the `md2html`
utility, is now built as a standalone library, in order to simplify its
reuse in applications.
* With `MD_HTML_FLAG_SKIP_UTF8_BOM`, the HTML renderer now skips UTF-8 byte
order mark (BOM) if the input begins with it, before passing to the Markdown
parser.
`md2html` utility automatically enables the flag (unless it is custom-built
with `-DMD4C_USE_ASCII`).
* With `MD_HTML_FLAG_XHTML`, The HTML renderer generates XHTML instead of
HTML.
This effectively means `<br />` instead of `<br>`, `<hr />` instead of
`<hr>`, and `<img ... />` instead of `<img ...>`.
`md2html` utility now understands the command line option `-x` or `--xhtml`
enabling the XHTML mode.
Fixes:
* [#113](https://github.com/mity/md4c/issues/113):
Add missing folding info data for the following Unicode characters:
`U+0184`, `U+018a`, `U+01b2`, `U+01b5`, `U+01f4`, `U+0372`, `U+038f`,
`U+1c84`, `U+1fb9`, `U+1fbb`, `U+1fd9`, `U+1fdb`, `U+1fe9`, `U+1feb`,
`U+1ff9`, `U+1ffb`, `U+2c7f`, `U+2ced`, `U+a77b`, `U+a792`, `U+a7c9`.
Due the bug, the link definition label matching did not work in the case
insensitive way for these characters.
## Version 0.4.3
New features:
* With `MD_FLAG_UNDERLINE`, spans enclosed in underscore (`_foo_`) are seen
as underline (`MD_SPAN_UNDERLINE`) rather than an ordinary emphasis or
strong emphasis.
Changes:
* The implementation of wiki-links extension (with `MD_FLAG_WIKILINKS`) has
been simplified.
- A noticeable increase of MD4C's memory footprint introduced by the
extension implementation in 0.4.0 has been removed.
- The priority handling towards other inline elements have been unified.
(This affects an obscure case where syntax of an image was in place of
wiki-link destination made the wiki-link invalid. Now *all* inline spans
in the wiki-link destination, including the images, is suppressed.)
- The length limitation of 100 characters now always applies to wiki-link
destination.
* Recognition of strike-through spans (with the flag `MD_FLAG_STRIKETHROUGH`)
has become much stricter and, arguably, reasonable.
- Only single tildes (`~`) and double tildes (`~~`) are recognized as
strike-through marks. Longer ones are not anymore.
- The length of the opener and closer marks have to be the same.
- The tildes cannot open a strike-through span if a whitespace follows.
- The tildes cannot close a strike-through span if a whitespace precedes.
This change follows the changes of behavior in cmark-gfm some time ago, so
it is also beneficial from compatibility point of view.
* When building MD4C by hand instead of using its CMake-based build, the UTF-8
support was by default disabled, unless explicitly asked for by defining
a preprocessor macro `MD4C_USE_UTF8`.
This has been changed and the UTF-8 mode now becomes the default, no matter
how `md4c.c` is compiled. If you need to disable it and use the ASCII-only
mode, you have explicitly define macro `MD4C_USE_ASCII` when compiling it.
(The CMake-based build as provided in our repository explicitly asked for
the UTF-8 support with `-DMD4C_USE_UTF8`. I.e. if you are using MD4C library
built with our vanilla `CMakeLists.txt` files, this change should not affect
you.)
Fixes:
* Fixed some string length handling in the special `MD4C_USE_UTF16` build.
(This does not affect you unless you are on Windows and explicitly define
the macro when building MD4C.)
* [#100](https://github.com/mity/md4c/issues/100):
Fixed an off-by-one error in the maximal length limit of some segments
of e-mail addresses used in autolinks.
* [#107](https://github.com/mity/md4c/issues/107):
Fix mis-detection of asterisk-encoded emphasis in some corner cases when
length of the opener and closer differs, as in `***foo *bar baz***`.
## Version 0.4.2
Fixes:
* [#98](https://github.com/mity/md4c/issues/98):
Fix mis-detection of asterisk-encoded emphasis in some corner cases when
length of the opener and closer differs, as in `**a *b c** d*`.
## Version 0.4.1
Unfortunately, 0.4.0 has been released with badly updated ChangeLog. Fixing
this is the only change on 0.4.1.
## Version 0.4.0
New features:
* With `MD_FLAG_LATEXMATHSPANS`, LaTeX math spans (`$...$`) and LaTeX display
math spans (`$$...$$`) are now recognized. (Note though that the HTML
renderer outputs them verbatim in a custom `<x-equation>` tag.)
Contributed by [Tilman Roeder](https://github.com/dyedgreen).
* With `MD_FLAG_WIKILINKS`, Wiki-style links (`[[...]]`) are now recognized.
(Note though that the HTML renderer renders them as a custom `<x-wikilink>`
tag.)
Contributed by [Nils Blomqvist](https://github.com/niblo).
Changes:
* Parsing of tables (with `MD_FLAG_TABLES`) is now closer to the way how
cmark-gfm parses tables as we do not require every row of the table to
contain a pipe `|` anymore.
As a consequence, paragraphs now cannot interrupt tables. A paragraph which
follows the table has to be delimited with a blank line.
Fixes:
* [#94](https://github.com/mity/md4c/issues/94):
`md_build_ref_def_hashtable()`: Do not allocate more memory than strictly
needed.
* [#95](https://github.com/mity/md4c/issues/95):
`md_is_container_mark()`: Ordered list mark requires at least one digit.
* [#96](https://github.com/mity/md4c/issues/96):
Some fixes for link label comparison.
## Version 0.3.4
Changes:
* Make Unicode-specific code compliant to Unicode 12.1.
* Structure `MD_BLOCK_CODE_DETAIL` got new member `fenced_char`. Application
can use it to detect character used to form the block fences (`` ` `` or
`~`). In the case of indented code block, it is set to zero.
Fixes:
* [#77](https://github.com/mity/md4c/issues/77):
Fix maximal count of digits for numerical character references, as requested
by CommonMark specification 0.29.
* [#78](https://github.com/mity/md4c/issues/78):
Fix link reference definition label matching for Unicode characters where
the folding mapping leads to multiple codepoints, as e.g. in `ẞ` -> `SS`.
* [#83](https://github.com/mity/md4c/issues/83):
Fix recognition of an empty blockquote which interrupts a paragraph.
## Version 0.3.3
Changes:
* Make permissive URL autolink and permissive WWW autolink extensions stricter.
This brings the behavior closer to GFM and mitigates risk of false positives.
In particular, the domain has to contain at least one dot and parenthesis
can be part of the link destination only if `(` and `)` are balanced.
Fixes:
* [#73](https://github.com/mity/md4c/issues/73):
Some raw HTML inputs could lead to quadratic parsing times.
* [#74](https://github.com/mity/md4c/issues/74):
Fix input leading to a crash. Found by fuzzing.
* [#76](https://github.com/mity/md4c/issues/76):
Fix handling of parenthesis in some corner cases of permissive URL autolink
and permissive WWW autolink extensions.
## Version 0.3.2
Changes:
* Changes mandated by CommonMark specification 0.29.
Most importantly, the white-space trimming rules for code spans have changed.
At most one space/newline is trimmed from beginning/end of the code span
(if the code span contains some non-space contents, and if it begins and
ends with space at the same time). In all other cases the spaces in the code
span are now left intact.
Other changes in behavior are in corner cases only. Refer to [CommonMark
0.29 notes](https://github.com/commonmark/commonmark-spec/releases/tag/0.29)
for more info.
Fixes:
* [#68](https://github.com/mity/md4c/issues/68):
Some specific HTML blocks were not recognized when EOF follows without any
end-of-line character.
* [#69](https://github.com/mity/md4c/issues/69):
Strike-through span not working correctly when its opener mark is directly
followed by other opener mark; or when other closer mark directly precedes
its closer mark.
## Version 0.3.1
Fixes:
* [#58](https://github.com/mity/md4c/issues/58),
[#59](https://github.com/mity/md4c/issues/59),
[#60](https://github.com/mity/md4c/issues/60),
[#63](https://github.com/mity/md4c/issues/63),
[#66](https://github.com/mity/md4c/issues/66):
Some inputs could lead to quadratic parsing times. Thanks to Anders Kaseorg
for finding all those issues.
* [#61](https://github.com/mity/md4c/issues/59):
Flag `MD_FLAG_NOHTMLSPANS` erroneously affected also recognition of
CommonMark autolinks.
## Version 0.3.0
New features:
* Add extension for GitHub-style task lists:
```
* [x] foo
* [x] bar
* [ ] baz
```
(It has to be explicitly enabled with `MD_FLAG_TASKLISTS`.)
* Added support for building as a shared library. On non-Windows platforms,
this is now default behavior; on Windows static library is still the default.
The CMake option `BUILD_SHARED_LIBS` can be used to request one or the other
explicitly.
Contributed by Lisandro Damián Nicanor Pérez Meyer.
* Renamed structure `MD_RENDERER` to `MD_PARSER` and refactorize its contents
a little bit. Note this is source-level incompatible and initialization code
in apps may need to be updated.
The aim of the change is to be more friendly for long-term ABI compatibility
we shall maintain, starting with this release.
* Added `CHANGELOG.md` (this file).
* Make sure `md_process_table_row()` reports the same count of table cells for
all table rows, no matter how broken the input is. The cell count is derived
from table underline line. Bogus cells in other rows are silently ignored.
Missing cells in other rows are reported as empty ones.
Fixes:
* CID 1475544:
Calling `md_free_attribute()` on uninitialized data.
* [#47](https://github.com/mity/md4c/issues/47):
Using bad offsets in `md_is_entity_str()`, in some cases leading to buffer
overflow.
* [#51](https://github.com/mity/md4c/issues/51):
Segfault in `md_process_table_cell()`.
* [#53](https://github.com/mity/md4c/issues/53):
With `MD_FLAG_PERMISSIVEURLAUTOLINKS` or `MD_FLAG_PERMISSIVEWWWAUTOLINKS`
we could generate bad output for ordinary Markdown links, if a non-space
character immediately follows like e.g. in `[link](http://github.com)X`.
## Version 0.2.7
This was the last version before the changelog has been added.

@ -0,0 +1,59 @@
cmake_minimum_required(VERSION 3.4)
project(MD4C C)
set(MD_VERSION_MAJOR 0)
set(MD_VERSION_MINOR 4)
set(MD_VERSION_RELEASE 8)
set(MD_VERSION "${MD_VERSION_MAJOR}.${MD_VERSION_MINOR}.${MD_VERSION_RELEASE}")
set(PROJECT_VERSION "${MD_VERSION}")
set(PROJECT_URL "https://github.com/mity/md4c")
if(WIN32)
# On Windows, given there is no standard lib install dir etc., we rather
# by default build static lib.
option(BUILD_SHARED_LIBS "help string describing option" OFF)
else()
# On Linux, MD4C is slowly being adding into some distros which prefer
# shared lib.
option(BUILD_SHARED_LIBS "help string describing option" ON)
endif()
add_definitions(
-DMD_VERSION_MAJOR=${MD_VERSION_MAJOR}
-DMD_VERSION_MINOR=${MD_VERSION_MINOR}
-DMD_VERSION_RELEASE=${MD_VERSION_RELEASE}
)
set(CMAKE_CONFIGURATION_TYPES Debug Release RelWithDebInfo MinSizeRel)
if("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE})
if("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE "Release")
endif()
endif()
if(${CMAKE_C_COMPILER_ID} MATCHES GNU|Clang)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
elseif(MSVC)
# Disable warnings about the so-called unsecured functions:
add_definitions(/D_CRT_SECURE_NO_WARNINGS /W3)
# Specify proper C runtime library:
string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_RELWITHDEBINFO "{$CMAKE_C_FLAGS_RELWITHDEBINFO}")
string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL}")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE} /MT")
set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_RELEASE} /MT")
endif()
include(GNUInstallDirs)
add_subdirectory(src)
add_subdirectory(md2html)

@ -0,0 +1,22 @@
# The MIT License (MIT)
Copyright © 2016-2020 Martin Mitáš
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the “Software”),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

@ -0,0 +1,297 @@
[![Linux Build Status (travis-ci.com)](https://img.shields.io/travis/mity/md4c/master.svg?logo=linux&label=linux%20build)](https://travis-ci.com/mity/md4c)
[![Windows Build Status (appveyor.com)](https://img.shields.io/appveyor/ci/mity/md4c/master.svg?logo=windows&label=windows%20build)](https://ci.appveyor.com/project/mity/md4c/branch/master)
[![Code Coverage Status (codecov.io)](https://img.shields.io/codecov/c/github/mity/md4c/master.svg?logo=codecov&label=code%20coverage)](https://codecov.io/github/mity/md4c)
[![Coverity Scan Status](https://img.shields.io/coverity/scan/mity-md4c.svg?label=coverity%20scan)](https://scan.coverity.com/projects/mity-md4c)
# MD4C Readme
* Home: http://github.com/mity/md4c
* Wiki: http://github.com/mity/md4c/wiki
* Issue tracker: http://github.com/mity/md4c/issues
MD4C stands for "Markdown for C" and that's exactly what this project is about.
## What is Markdown
In short, Markdown is the markup language this `README.md` file is written in.
The following resources can explain more if you are unfamiliar with it:
* [Wikipedia article](http://en.wikipedia.org/wiki/Markdown)
* [CommonMark site](http://commonmark.org)
## What is MD4C
MD4C is Markdown parser implementation in C, with the following features:
* **Compliance:** Generally, MD4C aims to be compliant to the latest version of
[CommonMark specification](http://spec.commonmark.org/). Currently, we are
fully compliant to CommonMark 0.30.
* **Extensions:** MD4C supports some commonly requested and accepted extensions.
See below.
* **Performance:** MD4C is [very fast](https://talk.commonmark.org/t/2520).
* **Compactness:** MD4C parser is implemented in one source file and one header
file. There are no dependencies other than standard C library.
* **Embedding:** MD4C parser is easy to reuse in other projects, its API is
very straightforward: There is actually just one function, `md_parse()`.
* **Push model:** MD4C parses the complete document and calls few callback
functions provided by the application to inform it about a start/end of
every block, a start/end of every span, and with any textual contents.
* **Portability:** MD4C builds and works on Windows and POSIX-compliant OSes.
(It should be simple to make it run also on most other platforms, at least as
long as the platform provides C standard library, including a heap memory
management.)
* **Encoding:** MD4C by default expects UTF-8 encoding of the input document.
But it can be compiled to recognize ASCII-only control characters (i.e. to
disable all Unicode-specific code), or (on Windows) to expect UTF-16 (i.e.
what is on Windows commonly called just "Unicode"). See more details below.
* **Permissive license:** MD4C is available under the [MIT license](LICENSE.md).
## Using MD4C
### Parsing Markdown
If you need just to parse a Markdown document, you need to include `md4c.h`
and link against MD4C library (`-lmd4c`); or alternatively add `md4c.[hc]`
directly to your code base as the parser is only implemented in the single C
source file.
The main provided function is `md_parse()`. It takes a text in the Markdown
syntax and a pointer to a structure which provides pointers to several callback
functions.
As `md_parse()` processes the input, it calls the callbacks (when entering or
leaving any Markdown block or span; and when outputting any textual content of
the document), allowing application to convert it into another format or render
it onto the screen.
### Converting to HTML
If you need to convert Markdown to HTML, include `md4c-html.h` and link against
MD4C-HTML library (`-lmd4c-html`); or alternatively add the sources `md4c.[hc]`,
`md4c-html.[hc]` and `entity.[hc]` into your code base.
To convert a Markdown input, call `md_html()` function. It takes the Markdown
input and calls the provided callback function. The callback is fed with
chunks of the HTML output. Typical callback implementation just appends the
chunks into a buffer or writes them to a file.
## Markdown Extensions
The default behavior is to recognize only Markdown syntax defined by the
[CommonMark specification](http://spec.commonmark.org/).
However, with appropriate flags, the behavior can be tuned to enable some
extensions:
* With the flag `MD_FLAG_COLLAPSEWHITESPACE`, a non-trivial whitespace is
collapsed into a single space.
* With the flag `MD_FLAG_TABLES`, GitHub-style tables are supported.
* With the flag `MD_FLAG_TASKLISTS`, GitHub-style task lists are supported.
* With the flag `MD_FLAG_STRIKETHROUGH`, strike-through spans are enabled
(text enclosed in tilde marks, e.g. `~foo bar~`).
* With the flag `MD_FLAG_PERMISSIVEURLAUTOLINKS` permissive URL autolinks
(not enclosed in `<` and `>`) are supported.
* With the flag `MD_FLAG_PERMISSIVEEMAILAUTOLINKS`, permissive e-mail
autolinks (not enclosed in `<` and `>`) are supported.
* With the flag `MD_FLAG_PERMISSIVEWWWAUTOLINKS` permissive WWW autolinks
without any scheme specified (e.g. `www.example.com`) are supported. MD4C
then assumes `http:` scheme.
* With the flag `MD_FLAG_LATEXMATHSPANS` LaTeX math spans (`$...$`) and
LaTeX display math spans (`$$...$$`) are supported. (Note though that the
HTML renderer outputs them verbatim in a custom tag `<x-equation>`.)
* With the flag `MD_FLAG_WIKILINKS`, wiki-style links (`[[link label]]` and
`[[target article|link label]]`) are supported. (Note that the HTML renderer
outputs them in a custom tag `<x-wikilink>`.)
* With the flag `MD_FLAG_UNDERLINE`, underscore (`_`) denotes an underline
instead of an ordinary emphasis or strong emphasis.
Few features of CommonMark (those some people see as mis-features) may be
disabled with the following flags:
* With the flag `MD_FLAG_NOHTMLSPANS` or `MD_FLAG_NOHTMLBLOCKS`, raw inline
HTML or raw HTML blocks respectively are disabled.
* With the flag `MD_FLAG_NOINDENTEDCODEBLOCKS`, indented code blocks are
disabled.
## Input/Output Encoding
The CommonMark specification declares that any sequence of Unicode code points
is a valid CommonMark document.
But, under a closer inspection, Unicode plays any role in few very specific
situations when parsing Markdown documents:
1. For detection of word boundaries when processing emphasis and strong
emphasis, some classification of Unicode characters (whether it is
a whitespace or a punctuation) is needed.
2. For (case-insensitive) matching of a link reference label with the
corresponding link reference definition, Unicode case folding is used.
3. For translating HTML entities (e.g. `&amp;`) and numeric character
references (e.g. `&#35;` or `&#xcab;`) into their Unicode equivalents.
However note MD4C leaves this translation on the renderer/application; as
the renderer is supposed to really know output encoding and whether it
really needs to perform this kind of translation. (For example, when the
renderer outputs HTML, it may leave the entities untranslated and defer the
work to a web browser.)
MD4C relies on this property of the CommonMark and the implementation is, to
a large degree, encoding-agnostic. Most of MD4C code only assumes that the
encoding of your choice is compatible with ASCII. I.e. that the codepoints
below 128 have the same numeric values as ASCII.
Any input MD4C does not understand is simply seen as part of the document text
and sent to the renderer's callback functions unchanged.
The two situations (word boundary detection and link reference matching) where
MD4C has to understand Unicode are handled as specified by the following
preprocessor macros (as specified at the time MD4C is being built):
* If preprocessor macro `MD4C_USE_UTF8` is defined, MD4C assumes UTF-8 for the
word boundary detection and for the case-insensitive matching of link labels.
When none of these macros is explicitly used, this is the default behavior.
* On Windows, if preprocessor macro `MD4C_USE_UTF16` is defined, MD4C uses
`WCHAR` instead of `char` and assumes UTF-16 encoding in those situations.
(UTF-16 is what Windows developers usually call just "Unicode" and what
Win32API generally works with.)
Note that because this macro affects also the types in `md4c.h`, you have
to define the macro both when building MD4C as well as when including
`md4c.h`.
Also note this is only supported in the parser (`md4c.[hc]`). The HTML
renderer does not support this and you will have to write your own custom
renderer to use this feature.
* If preprocessor macro `MD4C_USE_ASCII` is defined, MD4C assumes nothing but
an ASCII input.
That effectively means that non-ASCII whitespace or punctuation characters
won't be recognized as such and that link reference matching will work in
a case-insensitive way only for ASCII letters (`[a-zA-Z]`).
## Documentation
The API of the parser is quite well documented in the comments in the `md4c.h`.
Similarly, the markdown-to-html API is described in its header `md4c-html.h`.
There is also [project wiki](http://github.com/mity/md4c/wiki) which provides
some more comprehensive documentation. However note it is incomplete and some
details may be somewhat outdated.
## FAQ
**Q: How does MD4C compare to a parser XY?**
**A:** Some other implementations combine Markdown parser and HTML generator
into a single entangled code hidden behind an interface which just allows the
conversion from Markdown to HTML. They are often unusable if you want to
process the input in any other way.
Even when the parsing is available as a standalone feature, most parsers (if
not all of them; at least within the scope of C/C++ language) are full DOM-like
parsers: They construct abstract syntax tree (AST) representation of the whole
Markdown document. That takes time and it leads to bigger memory footprint.
It's completely fine as long as you really need it. If you don't need the full
AST, there is a very high chance that using MD4C will be substantially faster
and less hungry in terms of memory consumption.
Last but not least, some Markdown parsers are implemented in a naive way. When
fed with a [smartly crafted input pattern](test/pathological_tests.py), they
may exhibit quadratic (or even worse) parsing times. What MD4C can still parse
in a fraction of second may turn into long minutes or possibly hours with them.
Hence, when such a naive parser is used to process an input from an untrusted
source, the possibility of denial-of-service attacks becomes a real danger.
A lot of our effort went into providing linear parsing times no matter what
kind of crazy input MD4C parser is fed with. (If you encounter an input pattern
which leads to a sub-linear parsing times, please do not hesitate and report it
as a bug.)
**Q: Does MD4C perform any input validation?**
**A:** No. And we are proud of it. :-)
CommonMark specification states that any sequence of Unicode characters is
a valid Markdown document. (In practice, this more or less always means UTF-8
encoding.)
In other words, according to the specification, it does not matter whether some
Markdown syntax construction is in some way broken or not. If it is broken, it
will simply not be recognized and the parser should see it just as a verbatim
text.
MD4C takes this a step further: It sees any sequence of bytes as a valid input,
following completely the GIGO philosophy (garbage in, garbage out). I.e. any
ill-formed UTF-8 byte sequence will propagate to the respective callback as
a part of the text.
If you need to validate that the input is, say, a well-formed UTF-8 document,
you have to do it on your own. The easiest way how to do this is to simply
validate the whole document before passing it to the MD4C parser.
## License
MD4C is covered with MIT license, see the file `LICENSE.md`.
## Links to Related Projects
Ports and bindings to other languages:
* [commonmark-d](https://github.com/AuburnSounds/commonmark-d):
Port of MD4C to D language.
* [markdown-wasm](https://github.com/rsms/markdown-wasm):
Port of MD4C to WebAssembly.
* [PyMD4C](https://github.com/dominickpastore/pymd4c):
Python bindings for MD4C
Software using MD4C:
* [QOwnNotes](https://www.qownnotes.org/):
A plain-text file notepad and todo-list manager with markdown support and
ownCloud / Nextcloud integration.
* [Qt](https://www.qt.io/):
Cross-platform C++ GUI framework.
* [Textosaurus](https://github.com/martinrotter/textosaurus):
Cross-platform text editor based on Qt and Scintilla.
* [8th](https://8th-dev.com/):
Cross-platform concatenative programming language.

@ -0,0 +1,22 @@
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DDEBUG")
# Build rules for md2html command line utility
include_directories("${PROJECT_SOURCE_DIR}/src")
add_executable(md2html cmdline.c cmdline.h md2html.c)
target_link_libraries(md2html md4c-html)
# Install rules
install(
TARGETS md2html
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(FILES "md2html.1" DESTINATION "${CMAKE_INSTALL_MANDIR}/man1")

@ -0,0 +1,205 @@
/*
* C Reusables
* <http://github.com/mity/c-reusables>
*
* Copyright (c) 2017-2020 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "cmdline.h"
#include <stdio.h>
#include <string.h>
#ifdef _WIN32
#define snprintf _snprintf
#endif
#define CMDLINE_AUXBUF_SIZE 32
static int
cmdline_handle_short_opt_group(const CMDLINE_OPTION* options, const char* arggroup,
int (*callback)(int /*optval*/, const char* /*arg*/, void* /*userdata*/),
void* userdata)
{
const CMDLINE_OPTION* opt;
int i;
int ret = 0;
for(i = 0; arggroup[i] != '\0'; i++) {
for(opt = options; opt->id != 0; opt++) {
if(arggroup[i] == opt->shortname)
break;
}
if(opt->id != 0 && !(opt->flags & CMDLINE_OPTFLAG_REQUIREDARG)) {
ret = callback(opt->id, NULL, userdata);
} else {
/* Unknown option. */
char badoptname[3];
badoptname[0] = '-';
badoptname[1] = arggroup[i];
badoptname[2] = '\0';
ret = callback((opt->id != 0 ? CMDLINE_OPTID_MISSINGARG : CMDLINE_OPTID_UNKNOWN),
badoptname, userdata);
}
if(ret != 0)
break;
}
return ret;
}
int
cmdline_read(const CMDLINE_OPTION* options, int argc, char** argv,
int (*callback)(int /*optval*/, const char* /*arg*/, void* /*userdata*/),
void* userdata)
{
const CMDLINE_OPTION* opt;
char auxbuf[CMDLINE_AUXBUF_SIZE+1];
int fast_optarg_decision = 1;
int after_doubledash = 0;
int i = 1;
int ret = 0;
auxbuf[CMDLINE_AUXBUF_SIZE] = '\0';
/* Check whether there is any CMDLINE_OPTFLAG_COMPILERLIKE option with
* a name not starting with '-'. That would imply we can to check for
* non-option arguments only after refusing all such options. */
for(opt = options; opt->id != 0; opt++) {
if((opt->flags & CMDLINE_OPTFLAG_COMPILERLIKE) && opt->longname[0] != '-')
fast_optarg_decision = 0;
}
while(i < argc) {
if(after_doubledash || strcmp(argv[i], "-") == 0) {
/* Non-option argument.
* Standalone "-" usually means "read from stdin" or "write to
* stdout" so treat it always as a non-option. */
ret = callback(CMDLINE_OPTID_NONE, argv[i], userdata);
} else if(strcmp(argv[i], "--") == 0) {
/* End of options. All the remaining tokens are non-options
* even if they start with a dash. */
after_doubledash = 1;
} else if(fast_optarg_decision && argv[i][0] != '-') {
/* Non-option argument. */
ret = callback(CMDLINE_OPTID_NONE, argv[i], userdata);
} else {
for(opt = options; opt->id != 0; opt++) {
if(opt->flags & CMDLINE_OPTFLAG_COMPILERLIKE) {
size_t len = strlen(opt->longname);
if(strncmp(argv[i], opt->longname, len) == 0) {
/* Compiler-like option. */
if(argv[i][len] != '\0')
ret = callback(opt->id, argv[i] + len, userdata);
else if(i+1 < argc)
ret = callback(opt->id, argv[++i], userdata);
else
ret = callback(CMDLINE_OPTID_MISSINGARG, opt->longname, userdata);
break;
}
} else if(opt->longname != NULL && strncmp(argv[i], "--", 2) == 0) {
size_t len = strlen(opt->longname);
if(strncmp(argv[i]+2, opt->longname, len) == 0) {
/* Regular long option. */
if(argv[i][2+len] == '\0') {
/* with no argument provided. */
if(!(opt->flags & CMDLINE_OPTFLAG_REQUIREDARG))
ret = callback(opt->id, NULL, userdata);
else
ret = callback(CMDLINE_OPTID_MISSINGARG, argv[i], userdata);
break;
} else if(argv[i][2+len] == '=') {
/* with an argument provided. */
if(opt->flags & (CMDLINE_OPTFLAG_OPTIONALARG | CMDLINE_OPTFLAG_REQUIREDARG)) {
ret = callback(opt->id, argv[i]+2+len+1, userdata);
} else {
snprintf(auxbuf, CMDLINE_AUXBUF_SIZE, "--%s", opt->longname);
ret = callback(CMDLINE_OPTID_BOGUSARG, auxbuf, userdata);
}
break;
} else {
continue;
}
}
} else if(opt->shortname != '\0' && argv[i][0] == '-') {
if(argv[i][1] == opt->shortname) {
/* Regular short option. */
if(opt->flags & CMDLINE_OPTFLAG_REQUIREDARG) {
if(argv[i][2] != '\0')
ret = callback(opt->id, argv[i]+2, userdata);
else if(i+1 < argc)
ret = callback(opt->id, argv[++i], userdata);
else
ret = callback(CMDLINE_OPTID_MISSINGARG, argv[i], userdata);
break;
} else {
ret = callback(opt->id, NULL, userdata);
/* There might be more (argument-less) short options
* grouped together. */
if(ret == 0 && argv[i][2] != '\0')
ret = cmdline_handle_short_opt_group(options, argv[i]+2, callback, userdata);
break;
}
}
}
}
if(opt->id == 0) { /* still not handled? */
if(argv[i][0] != '-') {
/* Non-option argument. */
ret = callback(CMDLINE_OPTID_NONE, argv[i], userdata);
} else {
/* Unknown option. */
char* badoptname = argv[i];
if(strncmp(badoptname, "--", 2) == 0) {
/* Strip any argument from the long option. */
char* assignment = strchr(badoptname, '=');
if(assignment != NULL) {
size_t len = assignment - badoptname;
if(len > CMDLINE_AUXBUF_SIZE)
len = CMDLINE_AUXBUF_SIZE;
strncpy(auxbuf, badoptname, len);
auxbuf[len] = '\0';
badoptname = auxbuf;
}
}
ret = callback(CMDLINE_OPTID_UNKNOWN, badoptname, userdata);
}
}
}
if(ret != 0)
return ret;
i++;
}
return ret;
}

@ -0,0 +1,153 @@
/*
* C Reusables
* <http://github.com/mity/c-reusables>
*
* Copyright (c) 2017 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef CRE_CMDLINE_H
#define CRE_CMDLINE_H
#ifdef __cplusplus
extern "C" {
#endif
/* The option may have an argument. (Affects only long option.) */
#define CMDLINE_OPTFLAG_OPTIONALARG 0x0001
/* The option must have an argument.
* Such short option cannot be grouped within single '-abc'. */
#define CMDLINE_OPTFLAG_REQUIREDARG 0x0002
/* Enable special compiler-like mode for the long option.
*
* Note ::shortname is not supported with this flag. CMDLINE_OPTION::shortname
* is silently ignored if the flag is used.
*
* With this flag, CMDLINE_OPTION::longname is treated differently as follows:
*
* 1. The option matches if the CMDLINE_OPTION::longname is the exact prefix
* of the argv[i] from commandline.
*
* 2. Double dash ("--") is not automatically prepended to
* CMDLINE_OPTION::longname. (If you desire any leading dash, include it
* explicitly in CMDLINE_OPTION initialization.)
*
* 3. An argument (optionally after a whitespace) is required (the flag
* CMDLINE_OPTFLAG_COMPILERLIKE implicitly implies also the flag
* CMDLINE_OPTFLAG_REQUIREDARG).
*
* But there is no delimiter expected (no "=" between the option and its
* argument). Whitespace is optional between the option and its argument.
*
* Intended use is for options similar to what many compilers accept.
* For example:
* -DDEBUG=0 (-D is the option, DEBUG=0 is the argument).
* -Isrc/include (-I is the option, src/include is the argument).
* -isystem /usr/include (-isystem is the option, /usr/include is the argument).
* -lmath (-l is the option, math is the argument).
*/
#define CMDLINE_OPTFLAG_COMPILERLIKE 0x0004
/* Special (reserved) option IDs. Do not use these for any CMDLINE_OPTION::id.
* See documentation of cmdline_read() to get info about their meaning.
*/
#define CMDLINE_OPTID_NONE 0
#define CMDLINE_OPTID_UNKNOWN (-0x7fffffff + 0)
#define CMDLINE_OPTID_MISSINGARG (-0x7fffffff + 1)
#define CMDLINE_OPTID_BOGUSARG (-0x7fffffff + 2)
typedef struct CMDLINE_OPTION {
char shortname; /* Short (single char) option or 0. */
const char* longname; /* Long name (after "--") or NULL. */
int id; /* Non-zero ID to identify the option in the callback; or zero to denote end of options list. */
unsigned flags; /* Bitmask of CMDLINE_OPTFLAG_xxxx flags. */
} CMDLINE_OPTION;
/* Parses all options and their arguments as specified by argc, argv accordingly
* with the given options (except argv[0] which is ignored).
*
* The caller must specify the list of supported options in the 1st parameter
* of the function. The array must end with a record whose CMDLINE_OPTION::id
* is zero to zero.
*
* The provided callback function is called for each option on the command
* line so that:
*
* -- the "id" refers to the id of the option as specified in options[].
*
* -- the "arg" specifies an argument of the option or NULL if none is
* provided.
*
* -- the "userdata" just allows to pass in some caller's context into
* the callback.
*
* Special cases (recognized via special "id" value) are reported to the
* callback as follows:
*
* -- If id is CMDLINE_OPTID_NONE, the callback informs about a non-option
* also known as a positional argument.
*
* All argv[] tokens which are not interpreted as an options or an argument
* of any option fall into this category.
*
* Usually, programs interpret these as paths to file to process.
*
* -- If id is CMDLINE_OPTID_UNKNOWN, the corresponding argv[] looks like an
* option but it is not found in the options[] passed to cmdline_read().
*
* The callback's parameter arg specifies the guilty command line token.
* Usually, program writes down an error message and exits.
*
* -- If id is CMDLINE_OPTID_MISSINGARG, the given option is valid but its
* flag in options[] requires an argument; yet there is none on the
* command line.
*
* The callback's parameter arg specifies the guilty option name.
* Usually, program writes down an error message and exits.
*
* -- If id is CMDLINE_OPTID_BOGUSARG, the given option is valid but its
* flag in options[] does not expect an argument; yet the command line
* does provide one.
*
* The callback's parameter arg specifies the guilty option name.
* Usually, program writes down an error message and exits.
*
* On success, zero is returned.
*
* If the callback returns a non-zero, cmdline_read() aborts immediately and
* cmdline_read() propagates the same return value to the caller.
*/
int cmdline_read(const CMDLINE_OPTION* options, int argc, char** argv,
int (*callback)(int /*id*/, const char* /*arg*/, void* /*userdata*/),
void* userdata);
#ifdef __cplusplus
} /* extern "C" { */
#endif
#endif /* CRE_CMDLINE_H */

@ -0,0 +1,113 @@
.TH MD2HTML 1 "June 2019" "" "General Commands Manual"
.nh
.ad l
.
.SH NAME
.
md2html \- convert Markdown to HTML
.
.SH SYNOPSIS
.
.B md2html
.RI [ OPTION ]...\&
.RI [ FILE ]
.
.SH OPTIONS
.
.SS General options:
.
.TP
.BR -o ", " --output= \fIOUTFILE\fR
Write output to \fIOUTFILE\fR instead of \fBstdout\fR(3)
.
.TP
.BR -f ", " --full-html
Generate full HTML document, including header
.
.TP
.BR -s ", " --stat
Measure time of input parsing
.
.TP
.BR -h ", " --help
Display help and exit
.
.TP
.BR -v ", " --version
Display version and exit
.
.SS Markdown dialect options:
.
.TP
.B --commonmark
CommonMark (the default)
.
.TP
.B --github
Github Flavored Markdown
.
.PP
Note: dialect options are equivalent to some combination of flags below.
.
.SS Markdown extension options:
.
.TP
.B --fcollapse-whitespace
Collapse non-trivial whitespace
.
.TP
.B --fverbatim-entities
Do not translate entities
.
.TP
.B --fpermissive-atx-headers
Allow ATX headers without delimiting space
.
.TP
.B --fpermissive-url-autolinks
Allow URL autolinks without "<" and ">" delimiters
.
.TP
.B --fpermissive-www-autolinks
Allow WWW autolinks without any scheme (e.g. "www.example.com")
.
.TP
.B --fpermissive-email-autolinks
Allow e-mail autolinks without "<", ">" and "mailto:"
.
.TP
.B --fpermissive-autolinks
Enable all 3 of the above permissive autolinks options
.
.TP
.B --fno-indented-code
Disable indented code blocks
.
.TP
.B --fno-html-blocks
Disable raw HTML blocks
.
.TP
.B --fno-html-spans
Disable raw HTML spans
.
.TP
.B --fno-html
Same as \fB--fno-html-blocks --fno-html-spans\fR
.
.TP
.B --ftables
Enable tables
.
.TP
.B --fstrikethrough
Enable strikethrough spans
.
.TP
.B --ftasklists
Enable task lists
.
.SH SEE ALSO
.
https://github.com/mity/md4c
.

@ -0,0 +1,383 @@
/*
* MD4C: Markdown parser for C
* (http://github.com/mity/md4c)
*
* Copyright (c) 2016-2020 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "md4c-html.h"
#include "cmdline.h"
/* Global options. */
static unsigned parser_flags = 0;
#ifndef MD4C_USE_ASCII
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG | MD_HTML_FLAG_SKIP_UTF8_BOM;
#else
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG;
#endif
static int want_fullhtml = 0;
static int want_xhtml = 0;
static int want_stat = 0;
/*********************************
*** Simple grow-able buffer ***
*********************************/
/* We render to a memory buffer instead of directly outputting the rendered
* documents, as this allows using this utility for evaluating performance
* of MD4C (--stat option). This allows us to measure just time of the parser,
* without the I/O.
*/
struct membuffer {
char* data;
size_t asize;
size_t size;
};
static void
membuf_init(struct membuffer* buf, MD_SIZE new_asize)
{
buf->size = 0;
buf->asize = new_asize;
buf->data = malloc(buf->asize);
if(buf->data == NULL) {
fprintf(stderr, "membuf_init: malloc() failed.\n");
exit(1);
}
}
static void
membuf_fini(struct membuffer* buf)
{
if(buf->data)
free(buf->data);
}
static void
membuf_grow(struct membuffer* buf, size_t new_asize)
{
buf->data = realloc(buf->data, new_asize);
if(buf->data == NULL) {
fprintf(stderr, "membuf_grow: realloc() failed.\n");
exit(1);
}
buf->asize = new_asize;
}
static void
membuf_append(struct membuffer* buf, const char* data, MD_SIZE size)
{
if(buf->asize < buf->size + size)
membuf_grow(buf, buf->size + buf->size / 2 + size);
memcpy(buf->data + buf->size, data, size);
buf->size += size;
}
/**********************
*** Main program ***
**********************/
static void
process_output(const MD_CHAR* text, MD_SIZE size, void* userdata)
{
membuf_append((struct membuffer*) userdata, text, size);
}
static int
process_file(FILE* in, FILE* out)
{
size_t n;
struct membuffer buf_in = {0};
struct membuffer buf_out = {0};
int ret = -1;
clock_t t0, t1;
membuf_init(&buf_in, 32 * 1024);
/* Read the input file into a buffer. */
while(1) {
if(buf_in.size >= buf_in.asize)
membuf_grow(&buf_in, buf_in.asize + buf_in.asize / 2);
n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in);
if(n == 0)
break;
buf_in.size += n;
}
/* Input size is good estimation of output size. Add some more reserve to
* deal with the HTML header/footer and tags. */
membuf_init(&buf_out, (MD_SIZE)(buf_in.size + buf_in.size/8 + 64));
/* Parse the document. This shall call our callbacks provided via the
* md_renderer_t structure. */
t0 = clock();
ret = md_html(buf_in.data, (MD_SIZE)buf_in.size, process_output, (void*) &buf_out,
parser_flags, renderer_flags);
t1 = clock();
if(ret != 0) {
fprintf(stderr, "Parsing failed.\n");
goto out;
}
/* Write down the document in the HTML format. */
if(want_fullhtml) {
if(want_xhtml) {
fprintf(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
fprintf(out, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" "
"\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n");
fprintf(out, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n");
} else {
fprintf(out, "<!DOCTYPE html>\n");
fprintf(out, "<html>\n");
}
fprintf(out, "<head>\n");
fprintf(out, "<title></title>\n");
fprintf(out, "<meta name=\"generator\" content=\"md2html\"%s>\n", want_xhtml ? " /" : "");
fprintf(out, "</head>\n");
fprintf(out, "<body>\n");
}
fwrite(buf_out.data, 1, buf_out.size, out);
if(want_fullhtml) {
fprintf(out, "</body>\n");
fprintf(out, "</html>\n");
}
if(want_stat) {
if(t0 != (clock_t)-1 && t1 != (clock_t)-1) {
double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC;
if (elapsed < 1)
fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3);
else
fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed);
}
}
/* Success if we have reached here. */
ret = 0;
out:
membuf_fini(&buf_in);
membuf_fini(&buf_out);
return ret;
}
static const CMDLINE_OPTION cmdline_options[] = {
{ 'o', "output", 'o', CMDLINE_OPTFLAG_REQUIREDARG },
{ 'f', "full-html", 'f', 0 },
{ 'x', "xhtml", 'x', 0 },
{ 's', "stat", 's', 0 },
{ 'h', "help", 'h', 0 },
{ 'v', "version", 'v', 0 },
{ 0, "commonmark", 'c', 0 },
{ 0, "github", 'g', 0 },
{ 0, "fcollapse-whitespace", 'W', 0 },
{ 0, "flatex-math", 'L', 0 },
{ 0, "fpermissive-atx-headers", 'A', 0 },
{ 0, "fpermissive-autolinks", 'V', 0 },
{ 0, "fpermissive-email-autolinks", '@', 0 },
{ 0, "fpermissive-url-autolinks", 'U', 0 },
{ 0, "fpermissive-www-autolinks", '.', 0 },
{ 0, "fstrikethrough", 'S', 0 },
{ 0, "ftables", 'T', 0 },
{ 0, "ftasklists", 'X', 0 },
{ 0, "funderline", '_', 0 },
{ 0, "fverbatim-entities", 'E', 0 },
{ 0, "fwiki-links", 'K', 0 },
{ 0, "fno-html-blocks", 'F', 0 },
{ 0, "fno-html-spans", 'G', 0 },
{ 0, "fno-html", 'H', 0 },
{ 0, "fno-indented-code", 'I', 0 },
{ 0, NULL, 0, 0 }
};
static void
usage(void)
{
printf(
"Usage: md2html [OPTION]... [FILE]\n"
"Convert input FILE (or standard input) in Markdown format to HTML.\n"
"\n"
"General options:\n"
" -o --output=FILE Output file (default is standard output)\n"
" -f, --full-html Generate full HTML document, including header\n"
" -x, --xhtml Generate XHTML instead of HTML\n"
" -s, --stat Measure time of input parsing\n"
" -h, --help Display this help and exit\n"
" -v, --version Display version and exit\n"
"\n"
"Markdown dialect options:\n"
"(note these are equivalent to some combinations of the flags below)\n"
" --commonmark CommonMark (this is default)\n"
" --github Github Flavored Markdown\n"
"\n"
"Markdown extension options:\n"
" --fcollapse-whitespace\n"
" Collapse non-trivial whitespace\n"
" --flatex-math Enable LaTeX style mathematics spans\n"
" --fpermissive-atx-headers\n"
" Allow ATX headers without delimiting space\n"
" --fpermissive-url-autolinks\n"
" Allow URL autolinks without '<', '>'\n"
" --fpermissive-www-autolinks\n"
" Allow WWW autolinks without any scheme (e.g. 'www.example.com')\n"
" --fpermissive-email-autolinks \n"
" Allow e-mail autolinks without '<', '>' and 'mailto:'\n"
" --fpermissive-autolinks\n"
" Same as --fpermissive-url-autolinks --fpermissive-www-autolinks\n"
" --fpermissive-email-autolinks\n"
" --fstrikethrough Enable strike-through spans\n"
" --ftables Enable tables\n"
" --ftasklists Enable task lists\n"
" --funderline Enable underline spans\n"
" --fwiki-links Enable wiki links\n"
"\n"
"Markdown suppression options:\n"
" --fno-html-blocks\n"
" Disable raw HTML blocks\n"
" --fno-html-spans\n"
" Disable raw HTML spans\n"
" --fno-html Same as --fno-html-blocks --fno-html-spans\n"
" --fno-indented-code\n"
" Disable indented code blocks\n"
"\n"
"HTML generator options:\n"
" --fverbatim-entities\n"
" Do not translate entities\n"
"\n"
);
}
static void
version(void)
{
printf("%d.%d.%d\n", MD_VERSION_MAJOR, MD_VERSION_MINOR, MD_VERSION_RELEASE);
}
static const char* input_path = NULL;
static const char* output_path = NULL;
static int
cmdline_callback(int opt, char const* value, void* data)
{
switch(opt) {
case 0:
if(input_path) {
fprintf(stderr, "Too many arguments. Only one input file can be specified.\n");
fprintf(stderr, "Use --help for more info.\n");
exit(1);
}
input_path = value;
break;
case 'o': output_path = value; break;
case 'f': want_fullhtml = 1; break;
case 'x': want_xhtml = 1; renderer_flags |= MD_HTML_FLAG_XHTML; break;
case 's': want_stat = 1; break;
case 'h': usage(); exit(0); break;
case 'v': version(); exit(0); break;
case 'c': parser_flags |= MD_DIALECT_COMMONMARK; break;
case 'g': parser_flags |= MD_DIALECT_GITHUB; break;
case 'E': renderer_flags |= MD_HTML_FLAG_VERBATIM_ENTITIES; break;
case 'A': parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
case 'I': parser_flags |= MD_FLAG_NOINDENTEDCODEBLOCKS; break;
case 'F': parser_flags |= MD_FLAG_NOHTMLBLOCKS; break;
case 'G': parser_flags |= MD_FLAG_NOHTMLSPANS; break;
case 'H': parser_flags |= MD_FLAG_NOHTML; break;
case 'W': parser_flags |= MD_FLAG_COLLAPSEWHITESPACE; break;
case 'U': parser_flags |= MD_FLAG_PERMISSIVEURLAUTOLINKS; break;
case '.': parser_flags |= MD_FLAG_PERMISSIVEWWWAUTOLINKS; break;
case '@': parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break;
case 'V': parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
case 'T': parser_flags |= MD_FLAG_TABLES; break;
case 'S': parser_flags |= MD_FLAG_STRIKETHROUGH; break;
case 'L': parser_flags |= MD_FLAG_LATEXMATHSPANS; break;
case 'K': parser_flags |= MD_FLAG_WIKILINKS; break;
case 'X': parser_flags |= MD_FLAG_TASKLISTS; break;
case '_': parser_flags |= MD_FLAG_UNDERLINE; break;
default:
fprintf(stderr, "Illegal option: %s\n", value);
fprintf(stderr, "Use --help for more info.\n");
exit(1);
break;
}
return 0;
}
int
main(int argc, char** argv)
{
FILE* in = stdin;
FILE* out = stdout;
int ret = 0;
if(cmdline_read(cmdline_options, argc, argv, cmdline_callback, NULL) != 0) {
usage();
exit(1);
}
if(input_path != NULL && strcmp(input_path, "-") != 0) {
in = fopen(input_path, "rb");
if(in == NULL) {
fprintf(stderr, "Cannot open %s.\n", input_path);
exit(1);
}
}
if(output_path != NULL && strcmp(output_path, "-") != 0) {
out = fopen(output_path, "wt");
if(out == NULL) {
fprintf(stderr, "Cannot open %s.\n", output_path);
exit(1);
}
}
ret = process_file(in, out);
if(in != stdin)
fclose(in);
if(out != stdout)
fclose(out);
return ret;
}

@ -0,0 +1,120 @@
#!/usr/bin/env python3
import os
import sys
import textwrap
self_path = os.path.dirname(os.path.realpath(__file__));
f = open(self_path + "/unicode/CaseFolding.txt", "r")
status_list = [ "C", "F" ]
folding_list = [ dict(), dict(), dict() ]
# Filter the foldings for "full" folding.
for line in f:
comment_off = line.find("#")
if comment_off >= 0:
line = line[:comment_off]
line = line.strip()
if not line:
continue
raw_codepoint, status, raw_mapping, ignored_tail = line.split(";", 3)
if not status.strip() in status_list:
continue
codepoint = int(raw_codepoint.strip(), 16)
mapping = [int(it, 16) for it in raw_mapping.strip().split(" ")]
mapping_len = len(mapping)
if mapping_len in range(1, 4):
folding_list[mapping_len-1][codepoint] = mapping
else:
assert(False)
f.close()
# If we assume that (index0 ... index-1) makes a range (as defined below),
# check that the newly provided index is compatible with the range too; i.e.
# verify that the range can be extended without breaking its properties.
#
# Currently, we can handle ranges which:
#
# (1) either form consecutive sequence of codepoints and which map that range
# to other consecutive range of codepoints (of the same length);
#
# (2) or a consecutive sequence of codepoints with step 2 where each codepoint
# CP is mapped to the codepoint CP+1
# (e.g. 0x1234 -> 0x1235; 0x1236 -> 0x1237; 0x1238 -> 0x1239; ...).
#
# Note: When the codepoints in the range are mapped to multiple codepoints,
# only the 1st mapped codepoint is considered. All the other ones have to be
# shared by all the mappings covered by the range.
def is_range_compatible(folding, codepoint_list, index0, index):
N = index - index0
codepoint0 = codepoint_list[index0]
codepoint1 = codepoint_list[index0+1]
codepointN = codepoint_list[index]
mapping0 = folding[codepoint0]
mapping1 = folding[codepoint1]
mappingN = folding[codepointN]
# Check the range type (1):
if codepoint1 - codepoint0 == 1 and codepointN - codepoint0 == N \
and mapping1[0] - mapping0[0] == 1 and mapping1[1:] == mapping0[1:] \
and mappingN[0] - mapping0[0] == N and mappingN[1:] == mapping0[1:]:
return True
# Check the range type (2):
if codepoint1 - codepoint0 == 2 and codepointN - codepoint0 == 2 * N \
and mapping0[0] - codepoint0 == 1 \
and mapping1[0] - codepoint1 == 1 and mapping1[1:] == mapping0[1:] \
and mappingN[0] - codepointN == 1 and mappingN[1:] == mapping0[1:]:
return True
return False
def mapping_str(list, mapping):
return ",".join("0x{:04x}".format(x) for x in mapping)
for mapping_len in range(1, 4):
folding = folding_list[mapping_len-1]
codepoint_list = list(folding)
index0 = 0
count = len(folding)
records = list()
data_records = list()
while index0 < count:
index1 = index0 + 1
while index1 < count and is_range_compatible(folding, codepoint_list, index0, index1):
index1 += 1
if index1 - index0 > 2:
# Range of codepoints
records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
data_records.append(mapping_str(data_records, folding[codepoint_list[index1-1]]))
index0 = index1
else:
# Single codepoint
records.append("S(0x{:04x})".format(codepoint_list[index0]))
data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
index0 += 1
sys.stdout.write("static const unsigned FOLD_MAP_{}[] = {{\n".format(mapping_len))
sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
initial_indent = " ", subsequent_indent=" ")))
sys.stdout.write("\n};\n")
sys.stdout.write("static const unsigned FOLD_MAP_{}_DATA[] = {{\n".format(mapping_len))
sys.stdout.write("\n".join(textwrap.wrap(", ".join(data_records), 110,
initial_indent = " ", subsequent_indent=" ")))
sys.stdout.write("\n};\n")

@ -0,0 +1,66 @@
#!/usr/bin/env python3
import os
import sys
import textwrap
self_path = os.path.dirname(os.path.realpath(__file__));
f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
codepoint_list = []
category_list = [ "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" ]
# Filter codepoints falling in the right category:
for line in f:
comment_off = line.find("#")
if comment_off >= 0:
line = line[:comment_off]
line = line.strip()
if not line:
continue
char_range, category = line.split(";")
char_range = char_range.strip()
category = category.strip()
if not category in category_list:
continue
delim_off = char_range.find("..")
if delim_off >= 0:
codepoint0 = int(char_range[:delim_off], 16)
codepoint1 = int(char_range[delim_off+2:], 16)
for codepoint in range(codepoint0, codepoint1 + 1):
codepoint_list.append(codepoint)
else:
codepoint = int(char_range, 16)
codepoint_list.append(codepoint)
f.close()
codepoint_list.sort()
index0 = 0
count = len(codepoint_list)
records = list()
while index0 < count:
index1 = index0 + 1
while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
index1 += 1
if index1 - index0 > 1:
# Range of codepoints
records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
else:
# Single codepoint
records.append("S(0x{:04x})".format(codepoint_list[index0]))
index0 = index1
sys.stdout.write("static const unsigned PUNCT_MAP[] = {\n")
sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
initial_indent = " ", subsequent_indent=" ")))
sys.stdout.write("\n};\n\n")

@ -0,0 +1,66 @@
#!/usr/bin/env python3
import os
import sys
import textwrap
self_path = os.path.dirname(os.path.realpath(__file__));
f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
codepoint_list = []
category_list = [ "Zs" ]
# Filter codepoints falling in the right category:
for line in f:
comment_off = line.find("#")
if comment_off >= 0:
line = line[:comment_off]
line = line.strip()
if not line:
continue
char_range, category = line.split(";")
char_range = char_range.strip()
category = category.strip()
if not category in category_list:
continue
delim_off = char_range.find("..")
if delim_off >= 0:
codepoint0 = int(char_range[:delim_off], 16)
codepoint1 = int(char_range[delim_off+2:], 16)
for codepoint in range(codepoint0, codepoint1 + 1):
codepoint_list.append(codepoint)
else:
codepoint = int(char_range, 16)
codepoint_list.append(codepoint)
f.close()
codepoint_list.sort()
index0 = 0
count = len(codepoint_list)
records = list()
while index0 < count:
index1 = index0 + 1
while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
index1 += 1
if index1 - index0 > 1:
# Range of codepoints
records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
else:
# Single codepoint
records.append("S(0x{:04x})".format(codepoint_list[index0]))
index0 = index1
sys.stdout.write("static const unsigned WHITESPACE_MAP[] = {\n")
sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
initial_indent = " ", subsequent_indent=" ")))
sys.stdout.write("\n};\n\n")

@ -0,0 +1,70 @@
#!/bin/sh
#
# This scripts attempts to build the project via cov-build utility, and prepare
# a package for uploading to the coverity scan service.
#
# (See http://scan.coverity.com for more info.)
set -e
# Check presence of coverity static analyzer.
if ! which cov-build; then
echo "Utility cov-build not found in PATH."
exit 1
fi
# Choose a build system (ninja or GNU make).
if which ninja; then
BUILD_TOOL=ninja
GENERATOR=Ninja
elif which make; then
BUILD_TOOL=make
GENERATOR="MSYS Makefiles"
else
echo "No suitable build system found."
exit 1
fi
# Choose a zip tool.
if which 7za; then
MKZIP="7za a -r -mx9"
elif which 7z; then
MKZIP="7z a -r -mx9"
elif which zip; then
MKZIP="zip -r"
else
echo "No suitable zip utility found"
exit 1
fi
# Change dir to project root.
cd `dirname "$0"`/..
CWD=`pwd`
ROOT_DIR="$CWD"
BUILD_DIR="$CWD/coverity"
OUTPUT="$CWD/cov-int.zip"
# Sanity checks.
if [ ! -x "$ROOT_DIR/scripts/coverity.sh" ]; then
echo "There is some path mismatch."
exit 1
fi
if [ -e "$BUILD_DIR" ]; then
echo "Path $BUILD_DIR already exists. Delete it and retry."
exit 1
fi
if [ -e "$OUTPUT" ]; then
echo "Path $OUTPUT already exists. Delete it and retry."
exit 1
fi
# Build the project with the Coverity analyzes enabled.
mkdir -p "$BUILD_DIR"
cd "$BUILD_DIR"
cmake -G "$GENERATOR" "$ROOT_DIR"
cov-build --dir cov-int "$BUILD_TOOL"
$MKZIP "$OUTPUT" "cov-int"
cd "$ROOT_DIR"
rm -rf "$BUILD_DIR"

@ -0,0 +1,75 @@
#!/bin/sh
#
# Run this script from build directory.
#set -e
SELF_DIR=`dirname $0`
PROJECT_DIR="$SELF_DIR/.."
TEST_DIR="$PROJECT_DIR/test"
PROGRAM="md2html/md2html"
if [ ! -x "$PROGRAM" ]; then
echo "Cannot find the $PROGRAM." >&2
echo "You have to run this script from the build directory." >&2
exit 1
fi
if which py >>/dev/null 2>&1; then
PYTHON=py
elif which python3 >>/dev/null 2>&1; then
PYTHON=python3
elif which python >>/dev/null 2>&1; then
if [ `python --version | awk '{print $2}' | cut -d. -f1` -ge 3 ]; then
PYTHON=python
fi
fi
echo
echo "CommonMark specification:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/spec.txt" -p "$PROGRAM"
echo
echo "Code coverage & regressions:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/coverage.txt" -p "$PROGRAM"
echo
echo "Permissive e-mail autolinks extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-email-autolinks.txt" -p "$PROGRAM --fpermissive-email-autolinks"
echo
echo "Permissive URL autolinks extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-url-autolinks.txt" -p "$PROGRAM --fpermissive-url-autolinks"
echo
echo "WWW autolinks extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-www-autolinks.txt" -p "$PROGRAM --fpermissive-www-autolinks"
echo
echo "Tables extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/tables.txt" -p "$PROGRAM --ftables"
echo
echo "Strikethrough extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/strikethrough.txt" -p "$PROGRAM --fstrikethrough"
echo
echo "Task lists extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/tasklists.txt" -p "$PROGRAM --ftasklists"
echo
echo "LaTeX extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/latex-math.txt" -p "$PROGRAM --flatex-math"
echo
echo "Wiki links extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/wiki-links.txt" -p "$PROGRAM --fwiki-links --ftables"
echo
echo "Underline extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/underline.txt" -p "$PROGRAM --funderline"
echo
echo "Pathological input:"
$PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,56 @@
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS 1)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DDEBUG")
# Build rules for MD4C parser library
configure_file(md4c.pc.in md4c.pc @ONLY)
add_library(md4c md4c.c md4c.h)
if(CMAKE_C_COMPILER_ID MATCHES "Clang|GNU")
target_compile_options(md4c PRIVATE -Wall -Wextra)
endif()
set_target_properties(md4c PROPERTIES
COMPILE_FLAGS "-DMD4C_USE_UTF8"
VERSION ${MD_VERSION}
SOVERSION ${MD_VERSION_MAJOR}
PUBLIC_HEADER md4c.h
)
# Build rules for HTML renderer library
configure_file(md4c-html.pc.in md4c-html.pc @ONLY)
add_library(md4c-html md4c-html.c md4c-html.h entity.c entity.h)
set_target_properties(md4c-html PROPERTIES
VERSION ${MD_VERSION}
SOVERSION ${MD_VERSION_MAJOR}
PUBLIC_HEADER md4c-html.h
)
target_link_libraries(md4c-html md4c)
# Install rules
install(
TARGETS md4c
EXPORT md4cConfig
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(FILES ${CMAKE_BINARY_DIR}/src/md4c.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
install(
TARGETS md4c-html
EXPORT md4cConfig
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(FILES ${CMAKE_BINARY_DIR}/src/md4c-html.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
install(EXPORT md4cConfig DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/md4c/ NAMESPACE md4c::)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,42 @@
/*
* MD4C: Markdown parser for C
* (http://github.com/mity/md4c)
*
* Copyright (c) 2016-2019 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef MD4C_ENTITY_H
#define MD4C_ENTITY_H
#include <stdlib.h>
/* Most entities are formed by single Unicode codepoint, few by two codepoints.
* Single-codepoint entities have codepoints[1] set to zero. */
struct entity {
const char* name;
unsigned codepoints[2];
};
const struct entity* entity_lookup(const char* name, size_t name_size);
#endif /* MD4C_ENTITY_H */

@ -0,0 +1,573 @@
/*
* MD4C: Markdown parser for C
* (http://github.com/mity/md4c)
*
* Copyright (c) 2016-2019 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdio.h>
#include <string.h>
#include "md4c-html.h"
#include "entity.h"
#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
/* C89/90 or old compilers in general may not understand "inline". */
#if defined __GNUC__
#define inline __inline__
#elif defined _MSC_VER
#define inline __inline
#else
#define inline
#endif
#endif
#ifdef _WIN32
#define snprintf _snprintf
#endif
typedef struct MD_HTML_tag MD_HTML;
struct MD_HTML_tag {
void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
void* userdata;
unsigned flags;
int image_nesting_level;
char escape_map[256];
};
#define NEED_HTML_ESC_FLAG 0x1
#define NEED_URL_ESC_FLAG 0x2
/*****************************************
*** HTML rendering helper functions ***
*****************************************/
#define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9')
#define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z')
#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
static inline void
render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
{
r->process_output(text, size, r->userdata);
}
/* Keep this as a macro. Most compiler should then be smart enough to replace
* the strlen() call with a compile-time constant if the string is a C literal. */
#define RENDER_VERBATIM(r, verbatim) \
render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
static void
render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
{
MD_OFFSET beg = 0;
MD_OFFSET off = 0;
/* Some characters need to be escaped in normal HTML text. */
#define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
while(1) {
/* Optimization: Use some loop unrolling. */
while(off + 3 < size && !NEED_HTML_ESC(data[off+0]) && !NEED_HTML_ESC(data[off+1])
&& !NEED_HTML_ESC(data[off+2]) && !NEED_HTML_ESC(data[off+3]))
off += 4;
while(off < size && !NEED_HTML_ESC(data[off]))
off++;
if(off > beg)
render_verbatim(r, data + beg, off - beg);
if(off < size) {
switch(data[off]) {
case '&': RENDER_VERBATIM(r, "&amp;"); break;
case '<': RENDER_VERBATIM(r, "&lt;"); break;
case '>': RENDER_VERBATIM(r, "&gt;"); break;
case '"': RENDER_VERBATIM(r, "&quot;"); break;
}
off++;
} else {
break;
}
beg = off;
}
}
static void
render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
{
static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
MD_OFFSET beg = 0;
MD_OFFSET off = 0;
/* Some characters need to be escaped in URL attributes. */
#define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
while(1) {
while(off < size && !NEED_URL_ESC(data[off]))
off++;
if(off > beg)
render_verbatim(r, data + beg, off - beg);
if(off < size) {
char hex[3];
switch(data[off]) {
case '&': RENDER_VERBATIM(r, "&amp;"); break;
default:
hex[0] = '%';
hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
render_verbatim(r, hex, 3);
break;
}
off++;
} else {
break;
}
beg = off;
}
}
static unsigned
hex_val(char ch)
{
if('0' <= ch && ch <= '9')
return ch - '0';
if('A' <= ch && ch <= 'Z')
return ch - 'A' + 10;
else
return ch - 'a' + 10;
}
static void
render_utf8_codepoint(MD_HTML* r, unsigned codepoint,
void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
{
static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
unsigned char utf8[4];
size_t n;
if(codepoint <= 0x7f) {
n = 1;
utf8[0] = codepoint;
} else if(codepoint <= 0x7ff) {
n = 2;
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
} else if(codepoint <= 0xffff) {
n = 3;
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
} else {
n = 4;
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
}
if(0 < codepoint && codepoint <= 0x10ffff)
fn_append(r, (char*)utf8, (MD_SIZE)n);
else
fn_append(r, utf8_replacement_char, 3);
}
/* Translate entity to its UTF-8 equivalent, or output the verbatim one
* if such entity is unknown (or if the translation is disabled). */
static void
render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
{
if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
render_verbatim(r, text, size);
return;
}
/* We assume UTF-8 output is what is desired. */
if(size > 3 && text[1] == '#') {
unsigned codepoint = 0;
if(text[2] == 'x' || text[2] == 'X') {
/* Hexadecimal entity (e.g. "&#x1234abcd;")). */
MD_SIZE i;
for(i = 3; i < size-1; i++)
codepoint = 16 * codepoint + hex_val(text[i]);
} else {
/* Decimal entity (e.g. "&1234;") */
MD_SIZE i;
for(i = 2; i < size-1; i++)
codepoint = 10 * codepoint + (text[i] - '0');
}
render_utf8_codepoint(r, codepoint, fn_append);
return;
} else {
/* Named entity (e.g. "&nbsp;"). */
const struct entity* ent;
ent = entity_lookup(text, size);
if(ent != NULL) {
render_utf8_codepoint(r, ent->codepoints[0], fn_append);
if(ent->codepoints[1])
render_utf8_codepoint(r, ent->codepoints[1], fn_append);
return;
}
}
fn_append(r, text, size);
}
static void
render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
{
int i;
for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
MD_TEXTTYPE type = attr->substr_types[i];
MD_OFFSET off = attr->substr_offsets[i];
MD_SIZE size = attr->substr_offsets[i+1] - off;
const MD_CHAR* text = attr->text + off;
switch(type) {
case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break;
default: fn_append(r, text, size); break;
}
}
}
static void
render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
{
char buf[64];
if(det->start == 1) {
RENDER_VERBATIM(r, "<ol>\n");
return;
}
snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
RENDER_VERBATIM(r, buf);
}
static void
render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det)
{
if(det->is_task) {
RENDER_VERBATIM(r, "<li class=\"task-list-item\">"
"<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
if(det->task_mark == 'x' || det->task_mark == 'X')
RENDER_VERBATIM(r, " checked");
RENDER_VERBATIM(r, ">");
} else {
RENDER_VERBATIM(r, "<li>");
}
}
static void
render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
{
RENDER_VERBATIM(r, "<pre><code");
/* If known, output the HTML 5 attribute class="language-LANGNAME". */
if(det->lang.text != NULL) {
RENDER_VERBATIM(r, " class=\"language-");
render_attribute(r, &det->lang, render_html_escaped);
RENDER_VERBATIM(r, "\"");
}
RENDER_VERBATIM(r, ">");
}
static void
render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
{
RENDER_VERBATIM(r, "<");
RENDER_VERBATIM(r, cell_type);
switch(det->align) {
case MD_ALIGN_LEFT: RENDER_VERBATIM(r, " align=\"left\">"); break;
case MD_ALIGN_CENTER: RENDER_VERBATIM(r, " align=\"center\">"); break;
case MD_ALIGN_RIGHT: RENDER_VERBATIM(r, " align=\"right\">"); break;
default: RENDER_VERBATIM(r, ">"); break;
}
}
static void
render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
{
RENDER_VERBATIM(r, "<a href=\"");
render_attribute(r, &det->href, render_url_escaped);
if(det->title.text != NULL) {
RENDER_VERBATIM(r, "\" title=\"");
render_attribute(r, &det->title, render_html_escaped);
}
RENDER_VERBATIM(r, "\">");
}
static void
render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
{
RENDER_VERBATIM(r, "<img src=\"");
render_attribute(r, &det->src, render_url_escaped);
RENDER_VERBATIM(r, "\" alt=\"");
r->image_nesting_level++;
}
static void
render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
{
if(det->title.text != NULL) {
RENDER_VERBATIM(r, "\" title=\"");
render_attribute(r, &det->title, render_html_escaped);
}
RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
r->image_nesting_level--;
}
static void
render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
{
RENDER_VERBATIM(r, "<x-wikilink data-target=\"");
render_attribute(r, &det->target, render_html_escaped);
RENDER_VERBATIM(r, "\">");
}
/**************************************
*** HTML renderer implementation ***
**************************************/
static int
enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
{
static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
MD_HTML* r = (MD_HTML*) userdata;
switch(type) {
case MD_BLOCK_DOC: /* noop */ break;
case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "<blockquote>\n"); break;
case MD_BLOCK_UL: RENDER_VERBATIM(r, "<ul>\n"); break;
case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
case MD_BLOCK_HR: RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr />\n" : "<hr>\n"); break;
case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
case MD_BLOCK_CODE: render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break;
case MD_BLOCK_HTML: /* noop */ break;
case MD_BLOCK_P: RENDER_VERBATIM(r, "<p>"); break;
case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "<table>\n"); break;
case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "<thead>\n"); break;
case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "<tbody>\n"); break;
case MD_BLOCK_TR: RENDER_VERBATIM(r, "<tr>\n"); break;
case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break;
case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break;
}
return 0;
}
static int
leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
{
static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
MD_HTML* r = (MD_HTML*) userdata;
switch(type) {
case MD_BLOCK_DOC: /*noop*/ break;
case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "</blockquote>\n"); break;
case MD_BLOCK_UL: RENDER_VERBATIM(r, "</ul>\n"); break;
case MD_BLOCK_OL: RENDER_VERBATIM(r, "</ol>\n"); break;
case MD_BLOCK_LI: RENDER_VERBATIM(r, "</li>\n"); break;
case MD_BLOCK_HR: /*noop*/ break;
case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
case MD_BLOCK_CODE: RENDER_VERBATIM(r, "</code></pre>\n"); break;
case MD_BLOCK_HTML: /* noop */ break;
case MD_BLOCK_P: RENDER_VERBATIM(r, "</p>\n"); break;
case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "</table>\n"); break;
case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "</thead>\n"); break;
case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "</tbody>\n"); break;
case MD_BLOCK_TR: RENDER_VERBATIM(r, "</tr>\n"); break;
case MD_BLOCK_TH: RENDER_VERBATIM(r, "</th>\n"); break;
case MD_BLOCK_TD: RENDER_VERBATIM(r, "</td>\n"); break;
}
return 0;
}
static int
enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
if(r->image_nesting_level > 0) {
/* We are inside a Markdown image label. Markdown allows to use any
* emphasis and other rich contents in that context similarly as in
* any link label.
*
* However, unlike in the case of links (where that contents becomes
* contents of the <a>...</a> tag), in the case of images the contents
* is supposed to fall into the attribute alt: <img alt="...">.
*
* In that context we naturally cannot output nested HTML tags. So lets
* suppress them and only output the plain text (i.e. what falls into
* text() callback).
*
* This make-it-a-plain-text approach is the recommended practice by
* CommonMark specification (for HTML output).
*/
return 0;
}
switch(type) {
case MD_SPAN_EM: RENDER_VERBATIM(r, "<em>"); break;
case MD_SPAN_STRONG: RENDER_VERBATIM(r, "<strong>"); break;
case MD_SPAN_U: RENDER_VERBATIM(r, "<u>"); break;
case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
case MD_SPAN_CODE: RENDER_VERBATIM(r, "<code>"); break;
case MD_SPAN_DEL: RENDER_VERBATIM(r, "<del>"); break;
case MD_SPAN_LATEXMATH: RENDER_VERBATIM(r, "<x-equation>"); break;
case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
case MD_SPAN_WIKILINK: render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
}
return 0;
}
static int
leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
if(r->image_nesting_level > 0) {
/* Ditto as in enter_span_callback(), except we have to allow the
* end of the <img> tag. */
if(r->image_nesting_level == 1 && type == MD_SPAN_IMG)
render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
return 0;
}
switch(type) {
case MD_SPAN_EM: RENDER_VERBATIM(r, "</em>"); break;
case MD_SPAN_STRONG: RENDER_VERBATIM(r, "</strong>"); break;
case MD_SPAN_U: RENDER_VERBATIM(r, "</u>"); break;
case MD_SPAN_A: RENDER_VERBATIM(r, "</a>"); break;
case MD_SPAN_IMG: /*noop, handled above*/ break;
case MD_SPAN_CODE: RENDER_VERBATIM(r, "</code>"); break;
case MD_SPAN_DEL: RENDER_VERBATIM(r, "</del>"); break;
case MD_SPAN_LATEXMATH: /*fall through*/
case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
case MD_SPAN_WIKILINK: RENDER_VERBATIM(r, "</x-wikilink>"); break;
}
return 0;
}
static int
text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
switch(type) {
case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
case MD_TEXT_BR: RENDER_VERBATIM(r, (r->image_nesting_level == 0
? ((r->flags & MD_HTML_FLAG_XHTML) ? "<br />\n" : "<br>\n")
: " "));
break;
case MD_TEXT_SOFTBR: RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
case MD_TEXT_HTML: render_verbatim(r, text, size); break;
case MD_TEXT_ENTITY: render_entity(r, text, size, render_html_escaped); break;
default: render_html_escaped(r, text, size); break;
}
return 0;
}
static void
debug_log_callback(const char* msg, void* userdata)
{
MD_HTML* r = (MD_HTML*) userdata;
if(r->flags & MD_HTML_FLAG_DEBUG)
fprintf(stderr, "MD4C: %s\n", msg);
}
int
md_html(const MD_CHAR* input, MD_SIZE input_size,
void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
void* userdata, unsigned parser_flags, unsigned renderer_flags)
{
MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
int i;
MD_PARSER parser = {
0,
parser_flags,
enter_block_callback,
leave_block_callback,
enter_span_callback,
leave_span_callback,
text_callback,
debug_log_callback,
NULL
};
/* Build map of characters which need escaping. */
for(i = 0; i < 256; i++) {
unsigned char ch = (unsigned char) i;
if(strchr("\"&<>", ch) != NULL)
render.escape_map[i] |= NEED_HTML_ESC_FLAG;
if(!ISALNUM(ch) && strchr("~-_.+!*(),%#@?=;:/,+$", ch) == NULL)
render.escape_map[i] |= NEED_URL_ESC_FLAG;
}
/* Consider skipping UTF-8 byte order mark (BOM). */
if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM && sizeof(MD_CHAR) == 1) {
static const MD_CHAR bom[3] = { 0xef, 0xbb, 0xbf };
if(input_size >= sizeof(bom) && memcmp(input, bom, sizeof(bom)) == 0) {
input += sizeof(bom);
input_size -= sizeof(bom);
}
}
return md_parse(input, input_size, &parser, (void*) &render);
}

@ -0,0 +1,68 @@
/*
* MD4C: Markdown parser for C
* (http://github.com/mity/md4c)
*
* Copyright (c) 2016-2017 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef MD4C_HTML_H
#define MD4C_HTML_H
#include "md4c.h"
#ifdef __cplusplus
extern "C" {
#endif
/* If set, debug output from md_parse() is sent to stderr. */
#define MD_HTML_FLAG_DEBUG 0x0001
#define MD_HTML_FLAG_VERBATIM_ENTITIES 0x0002
#define MD_HTML_FLAG_SKIP_UTF8_BOM 0x0004
#define MD_HTML_FLAG_XHTML 0x0008
/* Render Markdown into HTML.
*
* Note only contents of <body> tag is generated. Caller must generate
* HTML header/footer manually before/after calling md_html().
*
* Params input and input_size specify the Markdown input.
* Callback process_output() gets called with chunks of HTML output.
* (Typical implementation may just output the bytes to a file or append to
* some buffer).
* Param userdata is just propagated back to process_output() callback.
* Param parser_flags are flags from md4c.h propagated to md_parse().
* Param render_flags is bitmask of MD_HTML_FLAG_xxxx.
*
* Returns -1 on error (if md_parse() fails.)
* Returns 0 on success.
*/
int md_html(const MD_CHAR* input, MD_SIZE input_size,
void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
void* userdata, unsigned parser_flags, unsigned renderer_flags);
#ifdef __cplusplus
} /* extern "C" { */
#endif
#endif /* MD4C_HTML_H */

@ -0,0 +1,13 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: @PROJECT_NAME@ HTML renderer
Description: Markdown to HTML converter library.
Version: @PROJECT_VERSION@
URL: @PROJECT_URL@
Requires: md4c = @PROJECT_VERSION@
Libs: -L${libdir} -lmd4c-html
Cflags: -I${includedir}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,405 @@
/*
* MD4C: Markdown parser for C
* (http://github.com/mity/md4c)
*
* Copyright (c) 2016-2020 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef MD4C_H
#define MD4C_H
#ifdef __cplusplus
extern "C" {
#endif
#if defined MD4C_USE_UTF16
/* Magic to support UTF-16. Note that in order to use it, you have to define
* the macro MD4C_USE_UTF16 both when building MD4C as well as when
* including this header in your code. */
#ifdef _WIN32
#include <windows.h>
typedef WCHAR MD_CHAR;
#else
#error MD4C_USE_UTF16 is only supported on Windows.
#endif
#else
typedef char MD_CHAR;
#endif
typedef unsigned MD_SIZE;
typedef unsigned MD_OFFSET;
/* Block represents a part of document hierarchy structure like a paragraph
* or list item.
*/
typedef enum MD_BLOCKTYPE {
/* <body>...</body> */
MD_BLOCK_DOC = 0,
/* <blockquote>...</blockquote> */
MD_BLOCK_QUOTE,
/* <ul>...</ul>
* Detail: Structure MD_BLOCK_UL_DETAIL. */
MD_BLOCK_UL,
/* <ol>...</ol>
* Detail: Structure MD_BLOCK_OL_DETAIL. */
MD_BLOCK_OL,
/* <li>...</li>
* Detail: Structure MD_BLOCK_LI_DETAIL. */
MD_BLOCK_LI,
/* <hr> */
MD_BLOCK_HR,
/* <h1>...</h1> (for levels up to 6)
* Detail: Structure MD_BLOCK_H_DETAIL. */
MD_BLOCK_H,
/* <pre><code>...</code></pre>
* Note the text lines within code blocks are terminated with '\n'
* instead of explicit MD_TEXT_BR. */
MD_BLOCK_CODE,
/* Raw HTML block. This itself does not correspond to any particular HTML
* tag. The contents of it _is_ raw HTML source intended to be put
* in verbatim form to the HTML output. */
MD_BLOCK_HTML,
/* <p>...</p> */
MD_BLOCK_P,
/* <table>...</table> and its contents.
* Detail: Structure MD_BLOCK_TABLE_DETAIL (for MD_BLOCK_TABLE),
* structure MD_BLOCK_TD_DETAIL (for MD_BLOCK_TH and MD_BLOCK_TD)
* Note all of these are used only if extension MD_FLAG_TABLES is enabled. */
MD_BLOCK_TABLE,
MD_BLOCK_THEAD,
MD_BLOCK_TBODY,
MD_BLOCK_TR,
MD_BLOCK_TH,
MD_BLOCK_TD
} MD_BLOCKTYPE;
/* Span represents an in-line piece of a document which should be rendered with
* the same font, color and other attributes. A sequence of spans forms a block
* like paragraph or list item. */
typedef enum MD_SPANTYPE {
/* <em>...</em> */
MD_SPAN_EM,
/* <strong>...</strong> */
MD_SPAN_STRONG,
/* <a href="xxx">...</a>
* Detail: Structure MD_SPAN_A_DETAIL. */
MD_SPAN_A,
/* <img src="xxx">...</a>
* Detail: Structure MD_SPAN_IMG_DETAIL.
* Note: Image text can contain nested spans and even nested images.
* If rendered into ALT attribute of HTML <IMG> tag, it's responsibility
* of the parser to deal with it.
*/
MD_SPAN_IMG,
/* <code>...</code> */
MD_SPAN_CODE,
/* <del>...</del>
* Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
*/
MD_SPAN_DEL,
/* For recognizing inline ($) and display ($$) equations
* Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled.
*/
MD_SPAN_LATEXMATH,
MD_SPAN_LATEXMATH_DISPLAY,
/* Wiki links
* Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
*/
MD_SPAN_WIKILINK,
/* <u>...</u>
* Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
MD_SPAN_U
} MD_SPANTYPE;
/* Text is the actual textual contents of span. */
typedef enum MD_TEXTTYPE {
/* Normal text. */
MD_TEXT_NORMAL = 0,
/* NULL character. CommonMark requires replacing NULL character with
* the replacement char U+FFFD, so this allows caller to do that easily. */
MD_TEXT_NULLCHAR,
/* Line breaks.
* Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE
* or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */
MD_TEXT_BR, /* <br> (hard break) */
MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */
/* Entity.
* (a) Named entity, e.g. &nbsp;
* (Note MD4C does not have a list of known entities.
* Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is
* treated as a named entity.)
* (b) Numerical entity, e.g. &#1234;
* (c) Hexadecimal entity, e.g. &#x12AB;
*
* As MD4C is mostly encoding agnostic, application gets the verbatim
* entity text into the MD_PARSER::text_callback(). */
MD_TEXT_ENTITY,
/* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).
* If it is inside MD_BLOCK_CODE, it includes spaces for indentation and
* '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this
* kind of text. */
MD_TEXT_CODE,
/* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not
* an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used.
* The text contains verbatim '\n' for the new lines. */
MD_TEXT_HTML,
/* Text is inside an equation. This is processed the same way as inlined code
* spans (`code`). */
MD_TEXT_LATEXMATH
} MD_TEXTTYPE;
/* Alignment enumeration. */
typedef enum MD_ALIGN {
MD_ALIGN_DEFAULT = 0, /* When unspecified. */
MD_ALIGN_LEFT,
MD_ALIGN_CENTER,
MD_ALIGN_RIGHT
} MD_ALIGN;
/* String attribute.
*
* This wraps strings which are outside of a normal text flow and which are
* propagated within various detailed structures, but which still may contain
* string portions of different types like e.g. entities.
*
* So, for example, lets consider this image:
*
* ![image alt text](http://example.org/image.png 'foo &quot; bar')
*
* The image alt text is propagated as a normal text via the MD_PARSER::text()
* callback. However, the image title ('foo &quot; bar') is propagated as
* MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title.
*
* Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following:
* -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
* -- [1]: "&quot;" (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
* -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
* -- [3]: (n/a) (n/a ; substr_offsets[3] == 14)
*
* Note that these invariants are always guaranteed:
* -- substr_offsets[0] == 0
* -- substr_offsets[LAST+1] == size
* -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR
* substrings can appear. This could change only of the specification
* changes.
*/
typedef struct MD_ATTRIBUTE {
const MD_CHAR* text;
MD_SIZE size;
const MD_TEXTTYPE* substr_types;
const MD_OFFSET* substr_offsets;
} MD_ATTRIBUTE;
/* Detailed info for MD_BLOCK_UL. */
typedef struct MD_BLOCK_UL_DETAIL {
int is_tight; /* Non-zero if tight list, zero if loose. */
MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */
} MD_BLOCK_UL_DETAIL;
/* Detailed info for MD_BLOCK_OL. */
typedef struct MD_BLOCK_OL_DETAIL {
unsigned start; /* Start index of the ordered list. */
int is_tight; /* Non-zero if tight list, zero if loose. */
MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */
} MD_BLOCK_OL_DETAIL;
/* Detailed info for MD_BLOCK_LI. */
typedef struct MD_BLOCK_LI_DETAIL {
int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */
MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */
MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */
} MD_BLOCK_LI_DETAIL;
/* Detailed info for MD_BLOCK_H. */
typedef struct MD_BLOCK_H_DETAIL {
unsigned level; /* Header level (1 - 6) */
} MD_BLOCK_H_DETAIL;
/* Detailed info for MD_BLOCK_CODE. */
typedef struct MD_BLOCK_CODE_DETAIL {
MD_ATTRIBUTE info;
MD_ATTRIBUTE lang;
MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */
} MD_BLOCK_CODE_DETAIL;
/* Detailed info for MD_BLOCK_TABLE. */
typedef struct MD_BLOCK_TABLE_DETAIL {
unsigned col_count; /* Count of columns in the table. */
unsigned head_row_count; /* Count of rows in the table header (currently always 1) */
unsigned body_row_count; /* Count of rows in the table body */
} MD_BLOCK_TABLE_DETAIL;
/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */
typedef struct MD_BLOCK_TD_DETAIL {
MD_ALIGN align;
} MD_BLOCK_TD_DETAIL;
/* Detailed info for MD_SPAN_A. */
typedef struct MD_SPAN_A_DETAIL {
MD_ATTRIBUTE href;
MD_ATTRIBUTE title;
} MD_SPAN_A_DETAIL;
/* Detailed info for MD_SPAN_IMG. */
typedef struct MD_SPAN_IMG_DETAIL {
MD_ATTRIBUTE src;
MD_ATTRIBUTE title;
} MD_SPAN_IMG_DETAIL;
/* Detailed info for MD_SPAN_WIKILINK. */
typedef struct MD_SPAN_WIKILINK {
MD_ATTRIBUTE target;
} MD_SPAN_WIKILINK_DETAIL;
/* Flags specifying extensions/deviations from CommonMark specification.
*
* By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
* The following flags may allow some extensions or deviations from it.
*/
#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */
#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */
#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */
#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */
#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */
#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */
#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */
#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */
#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */
#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
/* Convenient sets of flags corresponding to well-known Markdown dialects.
*
* Note we may only support subset of features of the referred dialect.
* The constant just enables those extensions which bring us as close as
* possible given what features we implement.
*
* ABI compatibility note: Meaning of these can change in time as new
* extensions, bringing the dialect closer to the original, are implemented.
*/
#define MD_DIALECT_COMMONMARK 0
#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS)
/* Parser structure.
*/
typedef struct MD_PARSER {
/* Reserved. Set to zero.
*/
unsigned abi_version;
/* Dialect options. Bitmask of MD_FLAG_xxxx values.
*/
unsigned flags;
/* Caller-provided rendering callbacks.
*
* For some block/span types, more detailed information is provided in a
* type-specific structure pointed by the argument 'detail'.
*
* The last argument of all callbacks, 'userdata', is just propagated from
* md_parse() and is available for any use by the application.
*
* Note any strings provided to the callbacks as their arguments or as
* members of any detail structure are generally not zero-terminated.
* Application has to take the respective size information into account.
*
* Any rendering callback may abort further parsing of the document by
* returning non-zero.
*/
int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/);
/* Debug callback. Optional (may be NULL).
*
* If provided and something goes wrong, this function gets called.
* This is intended for debugging and problem diagnosis for developers;
* it is not intended to provide any errors suitable for displaying to an
* end user.
*/
void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
/* Reserved. Set to NULL.
*/
void (*syntax)(void);
} MD_PARSER;
/* For backward compatibility. Do not use in new code.
*/
typedef MD_PARSER MD_RENDERER;
/* Parse the Markdown document stored in the string 'text' of size 'size'.
* The parser provides callbacks to be called during the parsing so the
* caller can render the document on the screen or convert the Markdown
* to another format.
*
* Zero is returned on success. If a runtime error occurs (e.g. a memory
* fails), -1 is returned. If the processing is aborted due any callback
* returning non-zero, the return value of the callback is returned.
*/
int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata);
#ifdef __cplusplus
} /* extern "C" { */
#endif
#endif /* MD4C_H */

@ -0,0 +1,13 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: @PROJECT_NAME@
Description: Markdown parser library with a SAX-like callback-based interface.
Version: @PROJECT_VERSION@
URL: @PROJECT_URL@
Requires:
Libs: -L${libdir} -lmd4c
Cflags: -I${includedir}

@ -0,0 +1,64 @@
The CommonMark spec (spec.txt) and DTD (CommonMark.dtd) are
Copyright (C) 2014-16 John MacFarlane
Released under the Creative Commons CC-BY-SA 4.0 license:
<http://creativecommons.org/licenses/by-sa/4.0/>.
---
The test software in test/ and the programs in tools/ are
Copyright (c) 2014, John MacFarlane
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---
The normalization code in runtests.py was derived from the
markdowntest project, Copyright 2013 Karl Dubost:
The MIT License (MIT)
Copyright (c) 2013 Karl Dubost
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -0,0 +1,40 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ctypes import CDLL, c_char_p, c_long
from subprocess import *
import platform
import os
def pipe_through_prog(prog, text):
p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
[result, err] = p1.communicate(input=text.encode('utf-8'))
return [p1.returncode, result.decode('utf-8'), err]
def use_library(lib, text):
textbytes = text.encode('utf-8')
textlen = len(textbytes)
return [0, lib(textbytes, textlen, 0).decode('utf-8'), '']
class CMark:
def __init__(self, prog=None, library_dir=None):
self.prog = prog
if prog:
self.to_html = lambda x: pipe_through_prog(prog, x)
else:
sysname = platform.system()
if sysname == 'Darwin':
libname = "libcmark.dylib"
elif sysname == 'Windows':
libname = "cmark.dll"
else:
libname = "libcmark.so"
if library_dir:
libpath = os.path.join(library_dir, libname)
else:
libpath = os.path.join("build", "src", libname)
cmark = CDLL(libpath)
markdown = cmark.cmark_markdown_to_html
markdown.restype = c_char_p
markdown.argtypes = [c_char_p, c_long]
self.to_html = lambda x: use_library(markdown, x)

@ -0,0 +1,522 @@
# Coverage
This file is just a collection of unit tests not covered elsewhere.
Most notably regression tests, tests improving code coverage and other useful
things may drop here.
(However any tests requiring any additional command line option, like enabling
an extension, must be included in their respective files.)
## GitHub Issues
### [Issue 2](https://github.com/mity/md4c/issues/2)
Raw HTML block:
```````````````````````````````` example
<gi att1=tok1 att2=tok2>
.
<gi att1=tok1 att2=tok2>
````````````````````````````````
Inline:
```````````````````````````````` example
foo <gi att1=tok1 att2=tok2> bar
.
<p>foo <gi att1=tok1 att2=tok2> bar</p>
````````````````````````````````
Inline with a line break:
```````````````````````````````` example
foo <gi att1=tok1
att2=tok2> bar
.
<p>foo <gi att1=tok1
att2=tok2> bar</p>
````````````````````````````````
### [Issue 4](https://github.com/mity/md4c/issues/4)
```````````````````````````````` example
![alt text with *entity* &copy;](img.png 'title')
.
<p><img src="img.png" alt="alt text with entity ©" title="title"></p>
````````````````````````````````
### [Issue 9](https://github.com/mity/md4c/issues/9)
```````````````````````````````` example
> [foo
> bar]: /url
>
> [foo bar]
.
<blockquote>
<p><a href="/url">foo
bar</a></p>
</blockquote>
````````````````````````````````
### [Issue 10](https://github.com/mity/md4c/issues/10)
```````````````````````````````` example
[x]:
x
- <?
x
.
<ul>
<li><?
x
</li>
</ul>
````````````````````````````````
### [Issue 11](https://github.com/mity/md4c/issues/11)
```````````````````````````````` example
x [link](/url "foo &ndash; bar") x
.
<p>x <a href="/url" title="foo bar">link</a> x</p>
````````````````````````````````
### [Issue 14](https://github.com/mity/md4c/issues/14)
```````````````````````````````` example
a***b* c*
.
<p>a*<em><em>b</em> c</em></p>
````````````````````````````````
### [Issue 15](https://github.com/mity/md4c/issues/15)
```````````````````````````````` example
***b* c*
.
<p>*<em><em>b</em> c</em></p>
````````````````````````````````
### [Issue 21](https://github.com/mity/md4c/issues/21)
```````````````````````````````` example
a*b**c*
.
<p>a<em>b**c</em></p>
````````````````````````````````
### [Issue 33](https://github.com/mity/md4c/issues/33)
```````````````````````````````` example
```&amp;&amp;&amp;&amp;&amp;&amp;&amp;&amp;
.
<pre><code class="language-&amp;&amp;&amp;&amp;&amp;&amp;&amp;&amp;"></code></pre>
````````````````````````````````
### [Issue 36](https://github.com/mity/md4c/issues/36)
```````````````````````````````` example
__x_ _x___
.
<p><em><em>x</em> <em>x</em></em>_</p>
````````````````````````````````
### [Issue 39](https://github.com/mity/md4c/issues/39)
```````````````````````````````` example
[\\]: x
.
````````````````````````````````
### [Issue 40](https://github.com/mity/md4c/issues/40)
```````````````````````````````` example
[x](url
'title'
)x
.
<p><a href="url" title="title">x</a>x</p>
````````````````````````````````
### [Issue 65](https://github.com/mity/md4c/issues/65)
```````````````````````````````` example
`
.
<p>`</p>
````````````````````````````````
### [Issue 74](https://github.com/mity/md4c/issues/74)
```````````````````````````````` example
[f]:
-
xx
-
.
<pre><code>xx
</code></pre>
<ul>
<li></li>
</ul>
````````````````````````````````
### [Issue 78](https://github.com/mity/md4c/issues/78)
```````````````````````````````` example
[SS ẞ]: /url
[ẞ SS]
.
<p><a href="/url">ẞ SS</a></p>
````````````````````````````````
### [Issue 83](https://github.com/mity/md4c/issues/83)
```````````````````````````````` example
foo
>
.
<p>foo</p>
<blockquote>
</blockquote>
````````````````````````````````
### [Issue 95](https://github.com/mity/md4c/issues/95)
```````````````````````````````` example
. foo
.
<p>. foo</p>
````````````````````````````````
### [Issue 96](https://github.com/mity/md4c/issues/96)
```````````````````````````````` example
[ab]: /foo
[a] [ab] [abc]
.
<p>[a] <a href="/foo">ab</a> [abc]</p>
````````````````````````````````
```````````````````````````````` example
[a b]: /foo
[a b]
.
<p><a href="/foo">a b</a></p>
````````````````````````````````
### [Issue 97](https://github.com/mity/md4c/issues/97)
```````````````````````````````` example
*a **b c* d**
.
<p><em>a <em><em>b c</em> d</em></em></p>
````````````````````````````````
### [Issue 100](https://github.com/mity/md4c/issues/100)
```````````````````````````````` example
<foo@123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123>
.
<p><a href="mailto:foo@123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123">foo@123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123</a></p>
````````````````````````````````
```````````````````````````````` example
<foo@123456789012345678901234567890123456789012345678901234567890123x.123456789012345678901234567890123456789012345678901234567890123>
.
<p>&lt;foo@123456789012345678901234567890123456789012345678901234567890123x.123456789012345678901234567890123456789012345678901234567890123&gt;</p>
````````````````````````````````
(Note the `x` here which turns it over the max. allowed length limit.)
### [Issue 107](https://github.com/mity/md4c/issues/107)
```````````````````````````````` example
***foo *bar baz***
.
<p>*<strong>foo <em>bar baz</em></strong></p>
````````````````````````````````
### [Issue 124](https://github.com/mity/md4c/issues/124)
```````````````````````````````` example
~~~
x
~~~
~~~
x
~~~
.
<pre><code> x
</code></pre>
<pre><code> x
</code></pre>
````````````````````````````````
### [Issue 131](https://github.com/mity/md4c/issues/131)
```````````````````````````````` example
[![alt][img]][link]
[img]: img_url
[link]: link_url
.
<p><a href="link_url"><img src="img_url" alt="alt"></a></p>
````````````````````````````````
### [Issue 142](https://github.com/mity/md4c/issues/142)
```````````````````````````````` example
[fooﬗ]: /url
[fooﬕ]
.
<p>[fooﬕ]</p>
````````````````````````````````
### [Issue 149](https://github.com/mity/md4c/issues/149)
```````````````````````````````` example
- <script>
- foo
bar
</script>
.
<ul>
<li><script>
</li>
<li>foo
bar
</script></li>
</ul>
````````````````````````````````
## Code coverage
### `md_is_unicode_whitespace__()`
Unicode whitespace (here U+2000) forms a word boundary so these cannot be
resolved as emphasis span because there is no closer mark.
```````````````````````````````` example
*foo *bar
.
<p>*foo *bar</p>
````````````````````````````````
### `md_is_unicode_punct__()`
Ditto for Unicode punctuation (here U+00A1).
```````````````````````````````` example
*foo¡*bar
.
<p>*foo¡*bar</p>
````````````````````````````````
### `md_get_unicode_fold_info()`
```````````````````````````````` example
[Příliš žluťoučký kůň úpěl ďábelské ódy.]
[PŘÍLIŠ ŽLUŤOUČKÝ KŮŇ ÚPĚL ĎÁBELSKÉ ÓDY.]: /url
.
<p><a href="/url">Příliš žluťoučký kůň úpěl ďábelské ódy.</a></p>
````````````````````````````````
### `md_decode_utf8__()` and `md_decode_utf8_before__()`
```````````````````````````````` example
á*Á (U+00E1, i.e. two byte UTF-8 sequence)
 *  (U+2000, i.e. three byte UTF-8 sequence)
.
<p>á*Á (U+00E1, i.e. two byte UTF-8 sequence)
* (U+2000, i.e. three byte UTF-8 sequence)</p>
````````````````````````````````
### `md_is_link_destination_A()`
```````````````````````````````` example
[link](</url\.with\.escape>)
.
<p><a href="/url.with.escape">link</a></p>
````````````````````````````````
### `md_link_label_eq()`
```````````````````````````````` example
[foo bar]
[foo bar]: /url
.
<p><a href="/url">foo bar</a></p>
````````````````````````````````
### `md_is_inline_link_spec()`
```````````````````````````````` example
> [link](/url 'foo
> bar')
.
<blockquote>
<p><a href="/url" title="foo
bar">link</a></p>
</blockquote>
````````````````````````````````
### `md_build_ref_def_hashtable()`
All link labels in the following example all have the same FNV1a hash (after
normalization of the label, which means after converting to a vector of Unicode
codepoints and lowercase folding).
So the example triggers quite complex code paths which are not otherwise easily
tested.
```````````````````````````````` example
[foo]: /foo
[qnptgbh]: /qnptgbh
[abgbrwcv]: /abgbrwcv
[abgbrwcv]: /abgbrwcv2
[abgbrwcv]: /abgbrwcv3
[abgbrwcv]: /abgbrwcv4
[alqadfgn]: /alqadfgn
[foo]
[qnptgbh]
[abgbrwcv]
[alqadfgn]
[axgydtdu]
.
<p><a href="/foo">foo</a>
<a href="/qnptgbh">qnptgbh</a>
<a href="/abgbrwcv">abgbrwcv</a>
<a href="/alqadfgn">alqadfgn</a>
[axgydtdu]</p>
````````````````````````````````
For the sake of completeness, the following C program was used to find the hash
collisions by brute force:
~~~
#include <stdio.h>
#include <string.h>
static unsigned etalon;
#define MD_FNV1A_BASE 2166136261
#define MD_FNV1A_PRIME 16777619
static inline unsigned
fnv1a(unsigned base, const void* data, size_t n)
{
const unsigned char* buf = (const unsigned char*) data;
unsigned hash = base;
size_t i;
for(i = 0; i < n; i++) {
hash ^= buf[i];
hash *= MD_FNV1A_PRIME;
}
return hash;
}
static unsigned
unicode_hash(const char* data, size_t n)
{
unsigned value;
unsigned hash = MD_FNV1A_BASE;
int i;
for(i = 0; i < n; i++) {
value = data[i];
hash = fnv1a(hash, &value, sizeof(unsigned));
}
return hash;
}
static void
recurse(char* buffer, size_t off, size_t len)
{
int ch;
if(off < len - 1) {
for(ch = 'a'; ch <= 'z'; ch++) {
buffer[off] = ch;
recurse(buffer, off+1, len);
}
} else {
for(ch = 'a'; ch <= 'z'; ch++) {
buffer[off] = ch;
if(unicode_hash(buffer, len) == etalon) {
printf("Dup: %.*s\n", (int)len, buffer);
}
}
}
}
int
main(int argc, char** argv)
{
char buffer[32];
int len;
if(argc < 2)
etalon = unicode_hash("foo", 3);
else
etalon = unicode_hash(argv[1], strlen(argv[1]));
for(len = 1; len <= sizeof(buffer); len++)
recurse(buffer, 0, len);
return 0;
}
~~~

@ -0,0 +1,40 @@
# h1
## h2
### h3
#### h4
##### h5
###### h6
h1
==
h2
--
--------------------
indented code
```
fenced code
```
<tag attr='val' attr2="val2">
> quote
* list item
1. list item
[ref]: /url
paragraph
&copy; &#1234; &#xabcd;
`code`
*emph* **strong** ***strong emph***
_emph_ __strong__ ___strong emph___
[ref] [ref][] [link](/url)
![ref] ![ref][] ![img](/url)
<http://example.com> <doe@example.com>
\\ \* \. \` \

@ -0,0 +1,10 @@
* [ ] unchecked
* [x] checked
A | B | C
---|--:|:-:
aaa|bbb|ccc
~del~ ~~del~~
http://example.com www.example.com doe@example.com

@ -0,0 +1 @@
$a^2+b^2=c^2$ $$a^2+b^2=c^2$$

@ -0,0 +1 @@
[[wiki]] [[wiki|label]]

@ -0,0 +1,35 @@
#include <stdint.h>
#include <stdlib.h>
#include "md4c-html.h"
static void
process_output(const MD_CHAR* text, MD_SIZE size, void* userdata)
{
/* This is a dummy function because we don't need to generate any output
* actually. */
return;
}
int
LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
unsigned parser_flags, renderer_flags;
if(size < 2 * sizeof(unsigned)) {
/* We interpret the 1st 8 bytes as parser flags and renderer flags. */
return 0;
}
parser_flags = *(unsigned*)data;
data += sizeof(unsigned); size -= sizeof(unsigned);
renderer_flags = *(unsigned*)data;
data += sizeof(unsigned); size -= sizeof(unsigned);
/* Allocate enough space */
md_html(data, size, process_output, NULL, parser_flags, renderer_flags);
return 0;
}

@ -0,0 +1,39 @@
# LaTeX Math
With the flag `MD_FLAG_LATEXMATHSPANS`, MD4C enables extension for recognition
of LaTeX style math spans.
A math span is is any text wrapped in dollars or double dollars (`$...$` or
`$$...$$`).
```````````````````````````````` example
$a+b=c$ Hello, world!
.
<p><x-equation>a+b=c</x-equation> Hello, world!</p>
````````````````````````````````
If the double dollar sign is used, the math span is a display math span.
```````````````````````````````` example
This is a display equation: $$\int_a^b x dx$$.
.
<p>This is a display equation: <x-equation type="display">\int_a^b x dx</x-equation>.</p>
````````````````````````````````
Math spans may span multiple lines as they are normal spans:
```````````````````````````````` example
$$
\int_a^b
f(x) dx
$$
.
<p><x-equation type="display">\int_a^b f(x) dx </x-equation></p>
````````````````````````````````
Note though that many (simple) renderers may output the math spans just as a
verbatim text. (This includes the HTML renderer used by the `md2html` utility.)
Only advanced renderers which implement LaTeX math syntax can be expected to
provide better results.

@ -0,0 +1,194 @@
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
import urllib
try:
from html.parser import HTMLParseError
except ImportError:
# HTMLParseError was removed in Python 3.5. It could never be
# thrown, so we define a placeholder instead.
class HTMLParseError(Exception):
pass
from html.entities import name2codepoint
import sys
import re
import html
# Normalization code, adapted from
# https://github.com/karlcow/markdown-testsuite/
significant_attrs = ["alt", "href", "src", "title"]
whitespace_re = re.compile('\s+')
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.convert_charrefs = False
self.last = "starttag"
self.in_pre = False
self.output = ""
self.last_tag = ""
def handle_data(self, data):
after_tag = self.last == "endtag" or self.last == "starttag"
after_block_tag = after_tag and self.is_block_tag(self.last_tag)
if after_tag and self.last_tag == "br":
data = data.lstrip('\n')
if not self.in_pre:
data = whitespace_re.sub(' ', data)
if after_block_tag and not self.in_pre:
if self.last == "starttag":
data = data.lstrip()
elif self.last == "endtag":
data = data.strip()
self.output += data
self.last = "data"
def handle_endtag(self, tag):
if tag == "pre":
self.in_pre = False
elif self.is_block_tag(tag):
self.output = self.output.rstrip()
self.output += "</" + tag + ">"
self.last_tag = tag
self.last = "endtag"
def handle_starttag(self, tag, attrs):
if tag == "pre":
self.in_pre = True
if self.is_block_tag(tag):
self.output = self.output.rstrip()
self.output += "<" + tag
# For now we don't strip out 'extra' attributes, because of
# raw HTML test cases.
# attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
if attrs:
attrs.sort()
for (k,v) in attrs:
self.output += " " + k
if v in ['href','src']:
self.output += ("=" + '"' +
urllib.quote(urllib.unquote(v), safe='/') + '"')
elif v != None:
self.output += ("=" + '"' + html.escape(v,quote=True) + '"')
self.output += ">"
self.last_tag = tag
self.last = "starttag"
def handle_startendtag(self, tag, attrs):
"""Ignore closing tag for self-closing """
self.handle_starttag(tag, attrs)
self.last_tag = tag
self.last = "endtag"
def handle_comment(self, data):
self.output += '<!--' + data + '-->'
self.last = "comment"
def handle_decl(self, data):
self.output += '<!' + data + '>'
self.last = "decl"
def unknown_decl(self, data):
self.output += '<!' + data + '>'
self.last = "decl"
def handle_pi(self,data):
self.output += '<?' + data + '>'
self.last = "pi"
def handle_entityref(self, name):
try:
c = chr(name2codepoint[name])
except KeyError:
c = None
self.output_char(c, '&' + name + ';')
self.last = "ref"
def handle_charref(self, name):
try:
if name.startswith("x"):
c = chr(int(name[1:], 16))
else:
c = chr(int(name))
except ValueError:
c = None
self.output_char(c, '&' + name + ';')
self.last = "ref"
# Helpers.
def output_char(self, c, fallback):
if c == '<':
self.output += "&lt;"
elif c == '>':
self.output += "&gt;"
elif c == '&':
self.output += "&amp;"
elif c == '"':
self.output += "&quot;"
elif c == None:
self.output += fallback
else:
self.output += c
def is_block_tag(self,tag):
return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote',
'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas',
'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd',
'progress', 'div', 'section', 'dl', 'table', 'td', 'dt',
'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption',
'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style'])
def normalize_html(html):
r"""
Return normalized form of HTML which ignores insignificant output
differences:
Multiple inner whitespaces are collapsed to a single space (except
in pre tags):
>>> normalize_html("<p>a \t b</p>")
'<p>a b</p>'
>>> normalize_html("<p>a \t\nb</p>")
'<p>a b</p>'
* Whitespace surrounding block-level tags is removed.
>>> normalize_html("<p>a b</p>")
'<p>a b</p>'
>>> normalize_html(" <p>a b</p>")
'<p>a b</p>'
>>> normalize_html("<p>a b</p> ")
'<p>a b</p>'
>>> normalize_html("\n\t<p>\n\t\ta b\t\t</p>\n\t")
'<p>a b</p>'
>>> normalize_html("<i>a b</i> ")
'<i>a b</i> '
* Self-closing tags are converted to open tags.
>>> normalize_html("<br />")
'<br>'
* Attributes are sorted and lowercased.
>>> normalize_html('<a title="bar" HREF="foo">x</a>')
'<a href="foo" title="bar">x</a>'
* References are converted to unicode, except that '<', '>', '&', and
'"' are rendered using entities.
>>> normalize_html("&forall;&amp;&gt;&lt;&quot;")
'\u2200&amp;&gt;&lt;&quot;'
"""
html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
try:
parser = MyHTMLParser()
# We work around HTMLParser's limitations parsing CDATA
# by breaking the input into chunks and passing CDATA chunks
# through verbatim.
for chunk in re.finditer(html_chunk_re, html):
if chunk.group(0)[:8] == "<![CDATA":
parser.output += chunk.group(0)
else:
parser.feed(chunk.group(0))
parser.close()
return parser.output
except HTMLParseError as e:
sys.stderr.write("Normalization error: " + e.msg + "\n")
return html # on error, return unnormalized HTML

@ -0,0 +1,128 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import argparse
import sys
import platform
from cmark import CMark
from timeit import default_timer as timer
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
args = parser.parse_args(sys.argv[1:])
cmark = CMark(prog=args.program, library_dir=args.library_dir)
# list of pairs consisting of input and a regex that must match the output.
pathological = {
# note - some pythons have limit of 65535 for {num-matches} in re.
"U+0000":
("abc\u0000de\u0000",
re.compile("abc\ufffd?de\ufffd?")),
"U+FEFF (Unicode BOM)":
("\ufefffoo",
re.compile("<p>foo</p>")),
"nested strong emph":
(("*a **a " * 65000) + "b" + (" a** a*" * 65000),
re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")),
"many emph closers with no openers":
(("a_ " * 65000),
re.compile("(a[_] ){64999}a_")),
"many emph openers with no closers":
(("_a " * 65000),
re.compile("(_a ){64999}_a")),
"many 3-emph openers with no closers":
(("a***" * 65000),
re.compile("(a<em><strong>a</strong></em>){32500}")),
"many link closers with no openers":
(("a]" * 65000),
re.compile("(a\]){65000}")),
"many link openers with no closers":
(("[a" * 65000),
re.compile("(\[a){65000}")),
"mismatched openers and closers":
(("*a_ " * 50000),
re.compile("([*]a[_] ){49999}[*]a_")),
"openers and closers multiple of 3":
(("a**b" + ("c* " * 50000)),
re.compile("a[*][*]b(c[*] ){49999}c[*]")),
"link openers and emph closers":
(("[ a_" * 50000),
re.compile("(\[ a_){50000}")),
"hard link/emph case":
("**x [a*b**c*](d)",
re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
"nested brackets":
(("[" * 50000) + "a" + ("]" * 50000),
re.compile("\[{50000}a\]{50000}")),
"nested block quotes":
((("> " * 50000) + "a"),
re.compile("(<blockquote>\r?\n){50000}")),
"backticks":
("".join(map(lambda x: ("e" + "`" * x), range(1,1000))),
re.compile("^<p>[e`]*</p>\r?\n$")),
"many links":
("[t](/u) " * 50000,
re.compile("(<a href=\"/u\">t</a> ?){50000}")),
"many references":
("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,20000 * 16))) + "[0] " * 20000,
re.compile("(\[0\] ){19999}")),
"deeply nested lists":
("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))),
re.compile("<ul>\r?\n(<li>a<ul>\r?\n){999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){999}")),
"many html openers and closers":
(("<>" * 50000),
re.compile("(&lt;&gt;){50000}")),
"many html proc. inst. openers":
(("x" + "<?" * 50000),
re.compile("x(&lt;\\?){50000}")),
"many html CDATA openers":
(("x" + "<![CDATA[" * 50000),
re.compile("x(&lt;!\\[CDATA\\[){50000}")),
"many backticks and escapes":
(("\\``" * 50000),
re.compile("(``){50000}")),
"many broken link titles":
(("[ (](" * 50000),
re.compile("(\[ \(\]\(){50000}")),
"broken thematic break":
(("* " * 50000 + "a"),
re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}")),
"nested invalid link references":
(("[" * 50000 + "]" * 50000 + "\n\n[a]: /b"),
re.compile("\[{50000}\]{50000}"))
}
whitespace_re = re.compile('/s+/')
passed = 0
errored = 0
failed = 0
#print("Testing pathological cases:")
for description in pathological:
(inp, regex) = pathological[description]
start = timer()
[rc, actual, err] = cmark.to_html(inp)
end = timer()
if rc != 0:
errored += 1
print('{:35} [ERRORED (return code %d)]'.format(description, rc))
print(err)
elif regex.search(actual):
print('{:35} [PASSED] {:.3f} secs'.format(description, end-start))
passed += 1
else:
print('{:35} [FAILED]'.format(description))
print(repr(actual))
failed += 1
print("%d passed, %d failed, %d errored" % (passed, failed, errored))
if (failed == 0 and errored == 0):
exit(0)
else:
exit(1)

@ -0,0 +1,50 @@
# Permissive E-mail Autolinks
With the flag `MD_FLAG_PERMISSIVEEMAILAUTOLINKS`, MD4C enables more permissive
recognition of e-mail addresses and transforms them to autolinks, even if they
do not exactly follow the syntax of autolink as specified in CommonMark
specification.
This is standard CommonMark e-mail autolink:
```````````````````````````````` example
E-mail: <mailto:john.doe@gmail.com>
.
<p>E-mail: <a href="mailto:john.doe@gmail.com">mailto:john.doe@gmail.com</a></p>
````````````````````````````````
With the permissive autolinks enabled, this is sufficient:
```````````````````````````````` example
E-mail: john.doe@gmail.com
.
<p>E-mail: <a href="mailto:john.doe@gmail.com">john.doe@gmail.com</a></p>
````````````````````````````````
`+` can occur before the `@`, but not after.
```````````````````````````````` example
hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.
.
<p>hello@mail+xyz.example isn't valid, but <a href="mailto:hello+xyz@mail.example">hello+xyz@mail.example</a> is.</p>
````````````````````````````````
`.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at
the end of the email address, in which case it will not be considered part of
the address:
```````````````````````````````` example
a.b-c_d@a.b
a.b-c_d@a.b.
a.b-c_d@a.b-
a.b-c_d@a.b_
.
<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a></p>
<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a>.</p>
<p>a.b-c_d@a.b-</p>
<p>a.b-c_d@a.b_</p>
````````````````````````````````

@ -0,0 +1,99 @@
# Permissive URL Autolinks
With the flag `MD_FLAG_PERMISSIVEURLAUTOLINKS`, MD4C enables more permissive recognition
of URLs and transform them to autolinks, even if they do not exactly follow the syntax
of autolink as specified in CommonMark specification.
This is a standard CommonMark autolink:
```````````````````````````````` example
Homepage: <https://github.com/mity/md4c>
.
<p>Homepage: <a href="https://github.com/mity/md4c">https://github.com/mity/md4c</a></p>
````````````````````````````````
With the permissive autolinks enabled, this is sufficient:
```````````````````````````````` example
Homepage: https://github.com/mity/md4c
.
<p>Homepage: <a href="https://github.com/mity/md4c">https://github.com/mity/md4c</a></p>
````````````````````````````````
But this permissive autolink feature can work only for very widely used URL
schemes, in alphabetical order `ftp:`, `http:`, `https:`.
That's why this is not a permissive autolink:
```````````````````````````````` example
ssh://root@example.com
.
<p>ssh://root@example.com</p>
````````````````````````````````
The same rules for path validation as for permissivve WWW autolinks apply.
Therefore the final question mark here is not part of the autolink:
```````````````````````````````` example
Have you ever visited http://www.zombo.com?
.
<p>Have you ever visited <a href="http://www.zombo.com">http://www.zombo.com</a>?</p>
````````````````````````````````
But in contrast, in this example it is:
```````````````````````````````` example
http://www.bing.com/search?q=md4c
.
<p><a href="http://www.bing.com/search?q=md4c">http://www.bing.com/search?q=md4c</a></p>
````````````````````````````````
And finally one complex example:
```````````````````````````````` example
http://commonmark.org
(Visit https://encrypted.google.com/search?q=Markup+(business))
Anonymous FTP is available at ftp://foo.bar.baz.
.
<p><a href="http://commonmark.org">http://commonmark.org</a></p>
<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
<p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p>
````````````````````````````````
## GitHub Issues
### [Issue 53](https://github.com/mity/md4c/issues/53)
```````````````````````````````` example
This is [link](http://github.com/).
.
<p>This is <a href="http://github.com/">link</a>.</p>
````````````````````````````````
```````````````````````````````` example
This is [link](http://github.com/)X
.
<p>This is <a href="http://github.com/">link</a>X</p>
````````````````````````````````
## [Issue 76](https://github.com/mity/md4c/issues/76)
```````````````````````````````` example
*(http://example.com)*
.
<p><em>(<a href="http://example.com">http://example.com</a>)</em></p>
````````````````````````````````
## [Issue 152](https://github.com/mity/md4c/issues/152)
```````````````````````````````` example
[http://example.com](http://example.com)
.
<p><a href="http://example.com">http://example.com</a></p>
````````````````````````````````

@ -0,0 +1,107 @@
# Permissive WWW Autolinks
With the flag `MD_FLAG_PERMISSIVEWWWAUTOLINKS`, MD4C enables recognition of
autolinks starting with `www.`, even if they do not exactly follow the syntax
of autolink as specified in CommonMark specification.
These do not have to be enclosed in `<` and `>`, and they even do not need
any preceding scheme specification.
The WWW autolink will be recognized when the text `www.` is found followed by a
valid domain. A valid domain consists of segments of alphanumeric characters,
underscores (`_`) and hyphens (`-`) separated by periods (`.`). There must be
at least one period, and no underscores may be present in the last two segments
of the domain.
The scheme `http` will be inserted automatically:
```````````````````````````````` example
www.commonmark.org
.
<p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
````````````````````````````````
After a valid domain, zero or more non-space non-`<` characters may follow:
```````````````````````````````` example
Visit www.commonmark.org/help for more information.
.
<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>
````````````````````````````````
We then apply extended autolink path validation as follows:
Trailing punctuation (specifically, `?`, `!`, `.`, `,`, `:`, `*`, `_`, and `~`)
will not be considered part of the autolink, though they may be included in the
interior of the link:
```````````````````````````````` example
Visit www.commonmark.org.
Visit www.commonmark.org/a.b.
.
<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>
<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>
````````````````````````````````
When an autolink ends in `)`, we scan the entire autolink for the total number
of parentheses. If there is a greater number of closing parentheses than
opening ones, we don't consider the last character part of the autolink, in
order to facilitate including an autolink inside a parenthesis:
```````````````````````````````` example
www.google.com/search?q=Markup+(business)
(www.google.com/search?q=Markup+(business))
.
<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>
<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>
````````````````````````````````
This check is only done when the link ends in a closing parentheses `)`, so if
the only parentheses are in the interior of the autolink, no special rules are
applied:
```````````````````````````````` example
www.google.com/search?q=(business))+ok
.
<p><a href="http://www.google.com/search?q=(business))+ok">www.google.com/search?q=(business))+ok</a></p>
````````````````````````````````
If an autolink ends in a semicolon (`;`), we check to see if it appears to
resemble an [entity reference][entity references]; if the preceding text is `&`
followed by one or more alphanumeric characters. If so, it is excluded from
the autolink:
```````````````````````````````` example
www.google.com/search?q=commonmark&hl=en
www.google.com/search?q=commonmark&hl;
.
<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>
<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;hl;</p>
````````````````````````````````
`<` immediately ends an autolink.
```````````````````````````````` example
www.commonmark.org/he<lp
.
<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>
````````````````````````````````
## GitHub Issues
### [Issue 53](https://github.com/mity/md4c/issues/53)
```````````````````````````````` example
This is [link](www.github.com/).
.
<p>This is <a href="www.github.com/">link</a>.</p>
````````````````````````````````
```````````````````````````````` example
This is [link](www.github.com/)X
.
<p>This is <a href="www.github.com/">link</a>X</p>
````````````````````````````````

File diff suppressed because it is too large Load Diff

@ -0,0 +1,144 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
from difflib import unified_diff
import argparse
import re
import json
from cmark import CMark
from normalize import normalize_html
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
help='path to spec')
parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
default=None, help='limit to sections matching regex pattern')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
parser.add_argument('--no-normalize', dest='normalize',
action='store_const', const=False, default=True,
help='do not normalize HTML')
parser.add_argument('-d', '--dump-tests', dest='dump_tests',
action='store_const', const=True, default=False,
help='dump tests in JSON format')
parser.add_argument('--debug-normalization', dest='debug_normalization',
action='store_const', const=True,
default=False, help='filter stdin through normalizer for testing')
parser.add_argument('-n', '--number', type=int, default=None,
help='only consider the test with the given number')
args = parser.parse_args(sys.argv[1:])
def out(str):
sys.stdout.buffer.write(str.encode('utf-8'))
def print_test_header(headertext, example_number, start_line, end_line):
out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext))
def do_test(test, normalize, result_counts):
[retcode, actual_html, err] = cmark.to_html(test['markdown'])
if retcode == 0:
expected_html = test['html']
unicode_error = None
if normalize:
try:
passed = normalize_html(actual_html) == normalize_html(expected_html)
except UnicodeDecodeError as e:
unicode_error = e
passed = False
else:
passed = actual_html == expected_html
if passed:
result_counts['pass'] += 1
else:
print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
out(test['markdown'] + '\n')
if unicode_error:
out("Unicode error: " + str(unicode_error) + '\n')
out("Expected: " + repr(expected_html) + '\n')
out("Got: " + repr(actual_html) + '\n')
else:
expected_html_lines = expected_html.splitlines(True)
actual_html_lines = actual_html.splitlines(True)
for diffline in unified_diff(expected_html_lines, actual_html_lines,
"expected HTML", "actual HTML"):
out(diffline)
out('\n')
result_counts['fail'] += 1
else:
print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
out("program returned error code %d\n" % retcode)
sys.stdout.buffer.write(err)
result_counts['error'] += 1
def get_tests(specfile):
line_number = 0
start_line = 0
end_line = 0
example_number = 0
markdown_lines = []
html_lines = []
state = 0 # 0 regular text, 1 markdown example, 2 html output
headertext = ''
tests = []
header_re = re.compile('#+ ')
with open(specfile, 'r', encoding='utf-8', newline='\n') as specf:
for line in specf:
line_number = line_number + 1
l = line.strip()
#if l == "`" * 32 + " example":
if re.match("`{32} example( [a-z]{1,})?", l):
state = 1
elif state == 2 and l == "`" * 32:
state = 0
example_number = example_number + 1
end_line = line_number
tests.append({
"markdown":''.join(markdown_lines).replace('',"\t"),
"html":''.join(html_lines).replace('',"\t"),
"example": example_number,
"start_line": start_line,
"end_line": end_line,
"section": headertext})
start_line = 0
markdown_lines = []
html_lines = []
elif l == ".":
state = 2
elif state == 1:
if start_line == 0:
start_line = line_number - 1
markdown_lines.append(line)
elif state == 2:
html_lines.append(line)
elif state == 0 and re.match(header_re, line):
headertext = header_re.sub('', line).strip()
return tests
if __name__ == "__main__":
if args.debug_normalization:
out(normalize_html(sys.stdin.read()))
exit(0)
all_tests = get_tests(args.spec)
if args.pattern:
pattern_re = re.compile(args.pattern, re.IGNORECASE)
else:
pattern_re = re.compile('.')
tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ]
if args.dump_tests:
out(json.dumps(tests, ensure_ascii=False, indent=2))
exit(0)
else:
skipped = len(all_tests) - len(tests)
cmark = CMark(prog=args.program, library_dir=args.library_dir)
result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped}
for test in tests:
do_test(test, args.normalize, result_counts)
out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts))
exit(result_counts['fail'] + result_counts['error'])

@ -0,0 +1,75 @@
# Strike-Through
With the flag `MD_FLAG_STRIKETHROUGH`, MD4C enables extension for recognition
of strike-through spans.
Strike-through text is any text wrapped in one or two tildes (`~`).
```````````````````````````````` example
~Hi~ Hello, world!
.
<p><del>Hi</del> Hello, world!</p>
````````````````````````````````
If the length of the opener and closer doesn't match, the strike-through is
not recognized.
```````````````````````````````` example
This ~text~~ is curious.
.
<p>This ~text~~ is curious.</p>
````````````````````````````````
Too long tilde sequence won't be recognized:
```````````````````````````````` example
foo ~~~bar~~~
.
<p>foo ~~~bar~~~</p>
````````````````````````````````
Also note the markers cannot open a strike-through span if they are followed
with a whitespace; and similarly, then cannot close the span if they are
preceded with a whitespace:
```````````````````````````````` example
~foo ~bar
.
<p>~foo ~bar</p>
````````````````````````````````
As with regular emphasis delimiters, a new paragraph will cause the cessation
of parsing a strike-through:
```````````````````````````````` example
This ~~has a
new paragraph~~.
.
<p>This ~~has a</p>
<p>new paragraph~~.</p>
````````````````````````````````
## GitHub Issues
### [Issue 69](https://github.com/mity/md4c/issues/69)
```````````````````````````````` example
~`foo`~
.
<p><del><code>foo</code></del></p>
````````````````````````````````
```````````````````````````````` example
~*foo*~
.
<p><del><em>foo</em></del></p>
````````````````````````````````
```````````````````````````````` example
*~foo~*
.
<p><em><del>foo</del></em></p>
````````````````````````````````

@ -0,0 +1,357 @@
# Tables
With the flag `MD_FLAG_TABLES`, MD4C enables extension for recognition of
tables.
Basic table example of a table with two columns and three lines (when not
counting the header) is as follows:
```````````````````````````````` example
| Column 1 | Column 2 |
|----------|----------|
| foo | bar |
| baz | qux |
| quux | quuz |
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
````````````````````````````````
The leading and succeeding pipe characters (`|`) on each line are optional:
```````````````````````````````` example
Column 1 | Column 2 |
---------|--------- |
foo | bar |
baz | qux |
quux | quuz |
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
````````````````````````````````
```````````````````````````````` example
| Column 1 | Column 2
|----------|---------
| foo | bar
| baz | qux
| quux | quuz
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
````````````````````````````````
```````````````````````````````` example
Column 1 | Column 2
---------|---------
foo | bar
baz | qux
quux | quuz
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
````````````````````````````````
However for one-column table, at least one pipe has to be used in the table
header underline, otherwise it would be parsed as a Setext title followed by
a paragraph.
```````````````````````````````` example
Column 1
--------
foo
baz
quux
.
<h2>Column 1</h2>
<p>foo
baz
quux</p>
````````````````````````````````
Leading and trailing whitespace in a table cell is ignored and the columns do
not need to be aligned.
```````````````````````````````` example
Column 1 |Column 2
---|---
foo | bar
baz| qux
quux|quuz
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
````````````````````````````````
The table cannot interrupt a paragraph.
```````````````````````````````` example
Lorem ipsum dolor sit amet.
| Column 1 | Column 2
| ---------|---------
| foo | bar
| baz | qux
| quux | quuz
.
<p>Lorem ipsum dolor sit amet.
| Column 1 | Column 2
| ---------|---------
| foo | bar
| baz | qux
| quux | quuz</p>
````````````````````````````````
Similarly, paragraph cannot interrupt a table:
```````````````````````````````` example
Column 1 | Column 2
---------|---------
foo | bar
baz | qux
quux | quuz
Lorem ipsum dolor sit amet.
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
<tr><td>Lorem ipsum dolor sit amet.</td><td></td></tr>
</tbody>
</table>
````````````````````````````````
The first, the last or both the first and the last dash in each column
underline can be replaced with a colon (`:`) to request left, right or middle
alignment of the respective column:
```````````````````````````````` example
| Column 1 | Column 2 | Column 3 | Column 4 |
|----------|:---------|:--------:|---------:|
| default | left | center | right |
.
<table>
<thead>
<tr><th>Column 1</th><th align="left">Column 2</th><th align="center">Column 3</th><th align="right">Column 4</th></tr>
</thead>
<tbody>
<tr><td>default</td><td align="left">left</td><td align="center">center</td><td align="right">right</td></tr>
</tbody>
</table>
````````````````````````````````
To include a literal pipe character in any cell, it has to be escaped.
```````````````````````````````` example
Column 1 | Column 2
---------|---------
foo | bar
baz | qux \| xyzzy
quux | quuz
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux | xyzzy</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
````````````````````````````````
Contents of each cell is parsed as an inline text which may contents any
inline Markdown spans like emphasis, strong emphasis, links etc.
```````````````````````````````` example
Column 1 | Column 2
---------|---------
*foo* | bar
**baz** | [qux]
quux | [quuz](/url2)
[qux]: /url
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td><em>foo</em></td><td>bar</td></tr>
<tr><td><strong>baz</strong></td><td><a href="/url">qux</a></td></tr>
<tr><td>quux</td><td><a href="/url2">quuz</a></td></tr>
</tbody>
</table>
````````````````````````````````
However pipes which are inside a code span are not recognized as cell
boundaries.
```````````````````````````````` example
Column 1 | Column 2
---------|---------
`foo | bar`
baz | qux
quux | quuz
.
<table>
<thead>
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
<tr><td><code>foo | bar</code></td><td></td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
````````````````````````````````
## GitHub Issues
### [Issue 41](https://github.com/mity/md4c/issues/41)
```````````````````````````````` example
* x|x
---|---
.
<ul>
<li>x|x
---|---</li>
</ul>
````````````````````````````````
(Not a table, because the underline has wrong indentation and is not part of the
list item.)
```````````````````````````````` example
* x|x
---|---
x|x
.
<ul>
<li><table>
<thead>
<tr>
<th>x</th>
<th>x</th>
</tr>
</thead>
</table>
</li>
</ul>
<p>x|x</p>
````````````````````````````````
(Here the underline has the right indentation so the table is detected.
But the last line is not part of it due its indentation.)
### [Issue 42](https://github.com/mity/md4c/issues/42)
```````````````````````````````` example
] http://x.x *x*
|x|x|
|---|---|
|x|
.
<p>] http://x.x <em>x</em></p>
<table>
<thead>
<tr>
<th>x</th>
<th>x</th>
</tr>
</thead>
<tbody>
<tr>
<td>x</td>
<td></td>
</tr>
</tbody>
</table>
````````````````````````````````
### [Issue 104](https://github.com/mity/md4c/issues/104)
```````````````````````````````` example
A | B
--- | ---
[x](url)
.
<table>
<thead>
<tr>
<th>A</th>
<th>B</th>
</tr>
</thead>
<tbody>
<tr>
<td><a href="url">x</a></td>
<td></td>
</tr>
</tbody>
</table>
````````````````````````````````
### [Issue 138](https://github.com/mity/md4c/issues/138)
```````````````````````````````` example
| abc | def |
| --- | --- |
.
<table>
<thead>
<tr>
<th>abc</th>
<th>def</th>
</tr>
</thead>
</table>
````````````````````````````````

@ -0,0 +1,117 @@
# Tasklists
With the flag `MD_FLAG_TASKLISTS`, MD4C enables extension for recognition of
task lists.
Basic task list may look as follows:
```````````````````````````````` example
* [x] foo
* [X] bar
* [ ] baz
.
<ul>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
</ul>
````````````````````````````````
Task lists can also be in ordered lists:
```````````````````````````````` example
1. [x] foo
2. [X] bar
3. [ ] baz
.
<ol>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
</ol>
````````````````````````````````
Task lists can also be nested in ordinary lists:
```````````````````````````````` example
* xxx:
* [x] foo
* [x] bar
* [ ] baz
* yyy:
* [ ] qux
* [x] quux
* [ ] quuz
.
<ul>
<li>xxx:
<ul>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
</ul></li>
<li>yyy:
<ul>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>qux</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>quux</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>quuz</li>
</ul></li>
</ul>
````````````````````````````````
Or in a parent task list:
```````````````````````````````` example
1. [x] xxx:
* [x] foo
* [x] bar
* [ ] baz
2. [ ] yyy:
* [ ] qux
* [x] quux
* [ ] quuz
.
<ol>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>xxx:
<ul>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
</ul></li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>yyy:
<ul>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>qux</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>quux</li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>quuz</li>
</ul></li>
</ol>
````````````````````````````````
Also, ordinary lists can be nested in the task lists.
```````````````````````````````` example
* [x] xxx:
* foo
* bar
* baz
* [ ] yyy:
* qux
* quux
* quuz
.
<ul>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>xxx:
<ul>
<li>foo</li>
<li>bar</li>
<li>baz</li>
</ul></li>
<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>yyy:
<ul>
<li>qux</li>
<li>quux</li>
<li>quuz</li>
</ul></li>
</ul>
````````````````````````````````

@ -0,0 +1,39 @@
# Underline
With the flag `MD_FLAG_UNDERLINE`, MD4C sees underscore `_` rather as a mark
denoting an underlined span rather than an ordinary emphasis (or a strong
emphasis).
```````````````````````````````` example
_foo_
.
<p><u>foo</u></p>
````````````````````````````````
In sequences of multiple underscores, each single one translates into an
underline span mark.
```````````````````````````````` example
___foo___
.
<p><u><u><u>foo</u></u></u></p>
````````````````````````````````
Intra-word underscores are not recognized as underline marks:
```````````````````````````````` example
foo_bar_baz
.
<p>foo_bar_baz</p>
````````````````````````````````
Also the parser follows the standard understanding when the underscore can
or cannot open or close a span. Therefore there is no underline in the following
example because no underline can be seen as a closing mark.
```````````````````````````````` example
_foo _bar
.
<p>_foo _bar</p>
````````````````````````````````

@ -0,0 +1,232 @@
# Wiki Links
With the flag `MD_FLAG_WIKILINKS`, MD4C recognizes wiki links.
The simple wiki-link is a wiki-link destination enclosed in `[[` followed with
`]]`.
```````````````````````````````` example
[[foo]]
.
<p><x-wikilink data-target="foo">foo</x-wikilink></p>
````````````````````````````````
However wiki-link may contain an explicit label, delimited from the destination
with `|`.
```````````````````````````````` example
[[foo|bar]]
.
<p><x-wikilink data-target="foo">bar</x-wikilink></p>
````````````````````````````````
A wiki-link destination cannot be empty.
```````````````````````````````` example
[[]]
.
<p>[[]]</p>
````````````````````````````````
```````````````````````````````` example
[[|foo]]
.
<p>[[|foo]]</p>
````````````````````````````````
The wiki-link destination cannot contain a new line.
```````````````````````````````` example
[[foo
bar]]
.
<p>[[foo
bar]]</p>
````````````````````````````````
```````````````````````````````` example
[[foo
bar|baz]]
.
<p>[[foo
bar|baz]]</p>
````````````````````````````````
The wiki-link destination is rendered verbatim; inline markup in it is not
recognized.
```````````````````````````````` example
[[*foo*]]
.
<p><x-wikilink data-target="*foo*">*foo*</x-wikilink></p>
````````````````````````````````
```````````````````````````````` example
[[foo|![bar](bar.jpg)]]
.
<p><x-wikilink data-target="foo"><img src="bar.jpg" alt="bar"></x-wikilink></p>
````````````````````````````````
With multiple `|` delimiters, only the first one is recognized and the other
ones are part of the label.
```````````````````````````````` example
[[foo|bar|baz]]
.
<p><x-wikilink data-target="foo">bar|baz</x-wikilink></p>
````````````````````````````````
However the delimiter `|` can be escaped with `/`.
```````````````````````````````` example
[[foo\|bar|baz]]
.
<p><x-wikilink data-target="foo|bar">baz</x-wikilink></p>
````````````````````````````````
The label can contain inline elements.
```````````````````````````````` example
[[foo|*bar*]]
.
<p><x-wikilink data-target="foo"><em>bar</em></x-wikilink></p>
````````````````````````````````
Empty explicit label is the same as using the implicit label; i.e. the verbatim
destination string is used as the label.
```````````````````````````````` example
[[foo|]]
.
<p><x-wikilink data-target="foo">foo</x-wikilink></p>
````````````````````````````````
The label can span multiple lines.
```````````````````````````````` example
[[foo|foo
bar
baz]]
.
<p><x-wikilink data-target="foo">foo
bar
baz</x-wikilink></p>
````````````````````````````````
Wiki-links have higher priority than links.
```````````````````````````````` example
[[foo]](foo.jpg)
.
<p><x-wikilink data-target="foo">foo</x-wikilink>(foo.jpg)</p>
````````````````````````````````
```````````````````````````````` example
[foo]: /url
[[foo]]
.
<p><x-wikilink data-target="foo">foo</x-wikilink></p>
````````````````````````````````
Wiki links can be inlined in tables.
```````````````````````````````` example
| A | B |
|------------------|-----|
| [[foo|*bar*]] | baz |
.
<table>
<thead>
<tr>
<th>A</th>
<th>B</th>
</tr>
</thead>
<tbody>
<tr>
<td><x-wikilink data-target="foo"><em>bar</em></x-wikilink></td>
<td>baz</td>
</tr>
</tbody>
</table>
````````````````````````````````
Wiki-links are not prioritized over images.
```````````````````````````````` example
![[foo]](foo.jpg)
.
<p><img src="foo.jpg" alt="[foo]"></p>
````````````````````````````````
Something that may look like a wiki-link at first, but turns out not to be,
is recognized as a normal link.
```````````````````````````````` example
[[foo]
[foo]: /url
.
<p>[<a href="/url">foo</a></p>
````````````````````````````````
Escaping the opening `[` escapes only that one character, not the whole `[[`
opener:
```````````````````````````````` example
\[[foo]]
[foo]: /url
.
<p>[<a href="/url">foo</a>]</p>
````````````````````````````````
Like with other inline links, the innermost wiki-link is preferred.
```````````````````````````````` example
[[foo[[bar]]]]
.
<p>[[foo<x-wikilink data-target="bar">bar</x-wikilink>]]</p>
````````````````````````````````
There is limit of 100 characters for the wiki-link destination.
```````````````````````````````` example
[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901]]
[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901|foo]]
.
<p>[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901]]
[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901|foo]]</p>
````````````````````````````````
100 characters inside a wiki link target works.
```````````````````````````````` example
[[1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890]]
[[1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890|foo]]
.
<p><x-wikilink data-target="1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890">1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890</x-wikilink>
<x-wikilink data-target="1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890">foo</x-wikilink></p>
````````````````````````````````
The limit on link content does not include any characters belonging to a block
quote, if the label spans multiple lines contained in a block quote.
```````````````````````````````` example
> [[12345678901234567890123456789012345678901234567890|1234567890
> 1234567890
> 1234567890
> 1234567890
> 123456789]]
.
<blockquote>
<p><x-wikilink data-target="12345678901234567890123456789012345678901234567890">1234567890
1234567890
1234567890
1234567890
123456789</x-wikilink></p>
</blockquote>
````````````````````````````````

@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-07-07 18:16+0000\n"
"POT-Creation-Date: 2022-04-15 19:59+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@ -1395,6 +1395,11 @@ msgstr ""
msgid "HTML Document"
msgstr ""
#. Comment
#: text/markdown.desktop:2
msgid "Markdown document"
msgstr ""
#. Comment
#: text/plain.desktop:3
msgid "Plain Text Document"

@ -0,0 +1,27 @@
# SOME DESCRIPTIVE TITLE.
# This file is put in the public domain.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-04-15 14:26+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
#. Name
#: markdown_part.desktop:2
msgid "Markdown Viewer"
msgstr ""
#. Comment
#: markdown_part.desktop:4
msgid "Embeddable lightweight markdown viewing component"
msgstr ""
Loading…
Cancel
Save