Skip to content

Commit

Permalink
Update cmark-upstream to github/cmark-gfm@c8dcdc7
Browse files Browse the repository at this point in the history
  • Loading branch information
anticomputer committed Apr 6, 2023
1 parent bbb49db commit 2eb8ca8
Show file tree
Hide file tree
Showing 13 changed files with 80 additions and 183 deletions.
131 changes: 23 additions & 108 deletions ext/commonmarker/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@
#define CODE_INDENT 4
#define TAB_STOP 4

/**
* Very deeply nested lists can cause quadratic performance issues.
* This constant is used in open_new_blocks() to limit the nesting
* depth. It is unlikely that a non-contrived markdown document will
* be nested this deeply.
*/
#define MAX_LIST_DEPTH 100

#ifndef MIN
#define MIN(x, y) ((x < y) ? x : y)
#endif
Expand Down Expand Up @@ -70,22 +78,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
bufsize_t bytes);

static void subtract_open_block_counts(cmark_parser *parser, cmark_node *node) {
do {
decr_open_block_count(parser, S_type(node));
node->flags &= ~CMARK_NODE__OPEN_BLOCK;
node = node->last_child;
} while (node);
}

static void add_open_block_counts(cmark_parser *parser, cmark_node *node) {
do {
incr_open_block_count(parser, S_type(node));
node->flags |= CMARK_NODE__OPEN_BLOCK;
node = node->last_child;
} while (node);
}

static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
int start_line, int start_column) {
cmark_node *e;
Expand Down Expand Up @@ -145,7 +137,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
parser->refmap = cmark_reference_map_new(parser->mem);
parser->root = document;
parser->current = document;
add_open_block_counts(parser, document);

parser->syntax_extensions = saved_exts;
parser->inline_syntax_extensions = saved_inline_exts;
Expand Down Expand Up @@ -259,18 +250,15 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
// Check to see if a node ends with a blank line, descending
// if needed into lists and sublists.
static bool S_ends_with_blank_line(cmark_node *node) {
while (true) {
if (S_last_line_checked(node)) {
return(S_last_line_blank(node));
} else if ((S_type(node) == CMARK_NODE_LIST ||
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
S_set_last_line_checked(node);
node = node->last_child;
continue;
} else {
S_set_last_line_checked(node);
return (S_last_line_blank(node));
}
if (S_last_line_checked(node)) {
return(S_last_line_blank(node));
} else if ((S_type(node) == CMARK_NODE_LIST ||
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
S_set_last_line_checked(node);
return(S_ends_with_blank_line(node->last_child));
} else {
S_set_last_line_checked(node);
return (S_last_line_blank(node));
}
}

Expand Down Expand Up @@ -330,12 +318,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
has_content = resolve_reference_link_definitions(parser, b);
if (!has_content) {
// remove blank node (former reference def)
if (b->flags & CMARK_NODE__OPEN_BLOCK) {
decr_open_block_count(parser, S_type(b));
if (b->prev) {
add_open_block_counts(parser, b->prev);
}
}
cmark_node_free(b);
}
break;
Expand Down Expand Up @@ -408,17 +390,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
return parent;
}

// Recalculates the number of open blocks. Returns true if it matches what's currently stored
// in parser. (Used to check that the counts in parser, which are updated incrementally, are
// correct.)
bool check_open_block_counts(cmark_parser *parser) {
cmark_parser tmp_parser = {0}; // Only used for its open_block_counts and total_open_blocks fields.
add_open_block_counts(&tmp_parser, parser->root);
return
tmp_parser.total_open_blocks == parser->total_open_blocks &&
memcmp(tmp_parser.open_block_counts, parser->open_block_counts, sizeof(parser->open_block_counts)) == 0;
}

// Add a node as child of another. Return pointer to child.
static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
cmark_node_type block_type, int start_column) {
Expand All @@ -437,14 +408,11 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
if (parent->last_child) {
parent->last_child->next = child;
child->prev = parent->last_child;
subtract_open_block_counts(parser, parent->last_child);
} else {
parent->first_child = child;
child->prev = NULL;
}
parent->last_child = child;
add_open_block_counts(parser, child);

return child;
}

Expand Down Expand Up @@ -1087,14 +1055,8 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
*all_matched = false;
cmark_node *container = parser->root;
cmark_node_type cont_type;
cmark_parser tmp_parser; // Only used for its open_block_counts and total_open_blocks fields.
memcpy(tmp_parser.open_block_counts, parser->open_block_counts, sizeof(parser->open_block_counts));
tmp_parser.total_open_blocks = parser->total_open_blocks;

assert(check_open_block_counts(parser));

while (S_last_child_is_open(container)) {
decr_open_block_count(&tmp_parser, S_type(container));
container = container->last_child;
cont_type = S_type(container);

Expand All @@ -1106,53 +1068,6 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
continue;
}

// This block of code is a workaround for the quadratic performance
// issue described here (issue 2):
//
// https://github.com/github/cmark-gfm/security/advisories/GHSA-66g8-4hjf-77xh
//
// If the current line is empty then we might be able to skip directly
// to the end of the list of open blocks. To determine whether this is
// possible, we have been maintaining a count of the number of
// different types of open blocks. The main criterium is that every
// remaining block, except the last element of the list, is a LIST or
// ITEM. The code below checks the conditions, and if they're ok, skips
// forward to parser->current.
if (parser->blank && parser->indent == 0) { // Current line is empty
// Make sure that parser->current doesn't point to a closed block.
if (parser->current->flags & CMARK_NODE__OPEN_BLOCK) {
if (parser->current->flags & CMARK_NODE__OPEN) {
const size_t n_list = read_open_block_count(&tmp_parser, CMARK_NODE_LIST);
const size_t n_item = read_open_block_count(&tmp_parser, CMARK_NODE_ITEM);
// At most one block can be something other than a LIST or ITEM.
if (n_list + n_item + 1 >= tmp_parser.total_open_blocks) {
// Check that parser->current is suitable for jumping to.
switch (S_type(parser->current)) {
case CMARK_NODE_LIST:
case CMARK_NODE_ITEM:
if (n_list + n_item != tmp_parser.total_open_blocks) {
if (parser->current->last_child == NULL) {
// There's another node type somewhere in the middle of
// the list, so don't attempt the optimization.
break;
}
}
// fall through
case CMARK_NODE_CODE_BLOCK:
case CMARK_NODE_PARAGRAPH:
case CMARK_NODE_HTML_BLOCK:
// Jump to parser->current
container = parser->current;
cont_type = S_type(container);
break;
default:
break;
}
}
}
}
}

switch (cont_type) {
case CMARK_NODE_BLOCK_QUOTE:
if (!parse_block_quote_prefix(parser, input))
Expand Down Expand Up @@ -1212,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
bool has_content;
int save_offset;
int save_column;
size_t depth = 0;

while (cont_type != CMARK_NODE_CODE_BLOCK &&
cont_type != CMARK_NODE_HTML_BLOCK) {

depth++;
S_find_first_nonspace(parser, input);
indented = parser->indent >= CODE_INDENT;

Expand Down Expand Up @@ -1286,9 +1202,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
has_content = resolve_reference_link_definitions(parser, *container);

if (has_content) {
cmark_node_set_type(*container, CMARK_NODE_HEADING);
decr_open_block_count(parser, CMARK_NODE_PARAGRAPH);
incr_open_block_count(parser, CMARK_NODE_HEADING);

(*container)->type = (uint16_t)CMARK_NODE_HEADING;
(*container)->as.heading.level = lev;
(*container)->as.heading.setext = true;
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
Expand Down Expand Up @@ -1318,6 +1233,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->internal_offset = matched;
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
parser->indent < 4 &&
depth < MAX_LIST_DEPTH &&
(matched = parse_list_marker(
parser->mem, input, parser->first_nonspace,
(*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
Expand Down Expand Up @@ -1443,7 +1359,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
S_set_last_line_blank(container, last_line_blank);

tmp = container;
while (tmp->parent && S_last_line_blank(tmp->parent)) {
while (tmp->parent) {
S_set_last_line_blank(tmp->parent, false);
tmp = tmp->parent;
}
Expand Down Expand Up @@ -1572,7 +1488,6 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,

parser->line_number++;

assert(parser->current->next == NULL);
last_matched_container = check_open_blocks(parser, &input, &all_matched);

if (!last_matched_container)
Expand Down
21 changes: 11 additions & 10 deletions ext/commonmarker/cmark-gfm.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,6 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options);
#define CMARK_NODE_TYPE_MASK (0xc000)
#define CMARK_NODE_VALUE_MASK (0x3fff)

/**
* This is the maximum number of block types (CMARK_NODE_DOCUMENT,
* CMARK_NODE_HEADING, ...). It needs to be bigger than the number of
* hardcoded block types (below) to allow for extensions (see
* cmark_syntax_extension_add_node). But it also determines the size of the
* open_block_counts array in the cmark_parser struct, so we don't want it
* to be excessively large.
*/
#define CMARK_NODE_TYPE_BLOCK_LIMIT 0x20

typedef enum {
/* Error status */
CMARK_NODE_NONE = 0x0000,
Expand Down Expand Up @@ -423,6 +413,17 @@ CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
*/
CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);

/**
* Returns item index of 'node'. This is only used when rendering output
* formats such as commonmark, which need to output the index. It is not
* required for formats such as html or latex.
*/
CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node);

/** Sets item index of 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx);

/** Returns the info string from a fenced code block.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
Expand Down
3 changes: 1 addition & 2 deletions ext/commonmarker/commonmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,14 +216,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
LIT("<!-- end list -->");
BLANKLINE();
}
renderer->list_number = cmark_node_get_list_start(node);
break;

case CMARK_NODE_ITEM:
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
marker_width = 4;
} else {
list_number = renderer->list_number++;
list_number = cmark_node_get_item_index(node);
list_delim = cmark_node_get_list_delim(node->parent);
// we ensure a width of at least 4 so
// we get nice transition from single digits
Expand Down
3 changes: 1 addition & 2 deletions ext/commonmarker/man.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
break;

case CMARK_NODE_LIST:
renderer->list_number = cmark_node_get_list_start(node);
break;

case CMARK_NODE_ITEM:
Expand All @@ -123,7 +122,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT("\\[bu] 2");
} else {
list_number = renderer->list_number++;
list_number = cmark_node_get_item_index(node);
char list_number_s[LIST_NUMBER_SIZE];
snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number);
LIT(list_number_s);
Expand Down
27 changes: 26 additions & 1 deletion ext/commonmarker/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void cmark_register_node_flag(cmark_node_internal_flags *flags) {
nextflag <<= 1;
}

void cmark_init_standard_node_flags() {}
void cmark_init_standard_node_flags(void) {}

bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) {
if (child_type == CMARK_NODE_DOCUMENT) {
Expand Down Expand Up @@ -564,6 +564,31 @@ int cmark_node_set_list_tight(cmark_node *node, int tight) {
}
}

int cmark_node_get_item_index(cmark_node *node) {
if (node == NULL) {
return 0;
}

if (node->type == CMARK_NODE_ITEM) {
return node->as.list.start;
} else {
return 0;
}
}

int cmark_node_set_item_index(cmark_node *node, int idx) {
if (node == NULL || idx < 0) {
return 0;
}

if (node->type == CMARK_NODE_ITEM) {
node->as.list.start = idx;
return 1;
} else {
return 0;
}
}

const char *cmark_node_get_fence_info(cmark_node *node) {
if (node == NULL) {
return NULL;
Expand Down
9 changes: 4 additions & 5 deletions ext/commonmarker/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,12 @@ typedef struct {

enum cmark_node__internal_flags {
CMARK_NODE__OPEN = (1 << 0),
CMARK_NODE__OPEN_BLOCK = (1 << 1),
CMARK_NODE__LAST_LINE_BLANK = (1 << 2),
CMARK_NODE__LAST_LINE_CHECKED = (1 << 3),
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),

// Extensions can register custom flags by calling `cmark_register_node_flag`.
// This is the starting value for the custom flags.
CMARK_NODE__REGISTER_FIRST = (1 << 4),
CMARK_NODE__REGISTER_FIRST = (1 << 3),
};

typedef uint16_t cmark_node_internal_flags;
Expand Down Expand Up @@ -128,7 +127,7 @@ void cmark_register_node_flag(cmark_node_internal_flags *flags);
* library. It is now a no-op.
*/
CMARK_GFM_EXPORT
void cmark_init_standard_node_flags();
void cmark_init_standard_node_flags(void);

static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
return node->content.mem;
Expand Down
Loading

0 comments on commit 2eb8ca8

Please sign in to comment.