Skip to content

Commit

Permalink
Initial support for the GitHub table syntax.
Browse files Browse the repository at this point in the history
For now, leading and trailing pipes are always required.
  • Loading branch information
mkende committed Mar 30, 2024
1 parent 391a9db commit 1b9d7e4
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 35 deletions.
3 changes: 2 additions & 1 deletion .aspelldict
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
personal_ws-1.1 en 168
personal_ws-1.1 en 169
CDATA
CommonMark
CounterClockwiseContourIntegral
Expand Down Expand Up @@ -77,6 +77,7 @@ figcaption
fj
frameset
gc
gcx
gfm
github
gx
Expand Down
17 changes: 17 additions & 0 deletions lib/Markdown/Perl.pm
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,23 @@ sub _emit_html {
.join("</li>\n<li>",
map { $this->_emit_html(!$loose, 'list', $linkrefs, @{$_->{content}}) } @{$b->{items}})
."</li>\n</${type}>\n";
} elsif ($b->{type} eq 'table') {
$out .= '<table><thead><tr><th>';
$out .= join('</th><th>',
map { $this->_render_inlines($linkrefs, $_) } @{$b->{content}{headers}});
$out .= '</th></tr></thead>';
if (@{$b->{content}{table}}) {
$out .= '<tbody>';
for my $l (@{$b->{content}{table}}) {
$out .= '<tr><td>';
$out .= join('</td><td>', map { $this->_render_inlines($linkrefs, $_) } @{$l});
$out .= '</td></tr>';
}
$out .= '</tbody>';
}
$out .= '</table>';
} else {
confess 'Unexpected block type when rendering HTML output: '.$b->{type};
}
}
# Note: a final new line should always be appended to $out. This is not
Expand Down
105 changes: 72 additions & 33 deletions lib/Markdown/Perl/BlockParser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ sub AUTOLOAD { ## no critic (ProhibitAutoloading, RequireArgUnpacking)
return $this->{pmarkdown}->$AUTOLOAD(@_);
}

my $eol_re = qr/ \r\n | \n | \r /x;

sub next_line {
my ($this) = @_;
# When we are forcing a line, we don’t recompute the line_ending, but it
Expand All @@ -71,7 +73,7 @@ sub next_line {
return delete $this->{forced_line} if exists $this->{forced_line};
return if pos($this->{md}) == length($this->{md});
$this->{last_pos} = pos($this->{md});
$this->{md} =~ m/\G([^\n\r]*)(\r\n|\n|\r)?/g or confess 'Should not happen';
$this->{md} =~ m/\G([^\n\r]*)(${eol_re})?/g or confess 'Should not happen';
my ($t, $e) = ($1, $2);
if ($1 =~ /^[ \t]+$/) {
$this->{line_ending} = $t.($e // '') if $this->get_preserve_white_lines;
Expand Down Expand Up @@ -649,23 +651,18 @@ sub _do_link_reference_definition {
$this->redo_line();
my $start_pos = $this->get_pos();

# We consume the prefix of enclosing blocks until we find the marker that we
# know is there. This won’t work if we accept task list markers in the
# future.
# This also won’t work to consume markers of subsequent lines of the link
# reference definition.
# TODO: fix these two bugs above (hard! — although in practice the only
# prefix character that can exist are '>' at the beginning of the line, so
# we could try to count them, we don’t even need to count spaces for the lists
# because the link definition is considered to be paragraph continuation text
# by cmark, the spec seems to accept any number of additional spaces too).
# We consume the continuation prefix of enclosing blocks. Note that in the big
# regex we allow any number of space after the continuation because it’s what
# cmark does.
my $cont = $this->{continuation_re};
$this->{md} =~ m/\G${cont}/g;
confess 'Unexpected regex match failure' unless $this->{md} =~ m/\G${cont}/g;

# TODO:
# - Support for escaped or balanced parenthesis in naked destination
# - break this up in smaller pieces and test them independently.
# - The need to disable ProhibitUnusedCapture seems to be buggy...
# - most of the regex parses only \n and not other eol sequence. The regex
# should either be fixed or the entry be normalized.
## no critic (ProhibitComplexRegexes, ProhibitUnusedCapture)
if (
$this->{md} =~ m/\G
Expand Down Expand Up @@ -722,28 +719,70 @@ sub _do_link_reference_definition {
# https://github.github.com/gfm/#tables-extension-
sub _do_table_block {
my ($this) = @_;
return;

# # TODO: add an option to prevent interrupting a paragraph with a table (and
# # make it be true for pmarkdown, but not for github where tables can interrupt
# # a paragraph).
# return unless $l =~ m/^ {0,3}\|/;
# my $init_pos = $this->get_pos();
# $this->redo_line();
# my $start_pos = $this->get_pos();

# # See the note in the link_reference parsing for this approach. Note that,
# # as opposed to what happens for links, subsequent lines can have at most
# # 3 more spaces than the initial one with the GitHub implementation (but not
# # some other GFM implementations).
# $this->{md} =~ m/\G.*?\|/g;

# # TODO:
# # - break this up in smaller pieces and test them independently.
# ## no critic (ProhibitComplexRegexes)
# if ($this->{md} =~ m/\G/x) { }

# return;
# TODO: add an option to prevent interrupting a paragraph with a table (and
# make it be true for pmarkdown, but not for github where tables can interrupt
# a paragraph).
# TODO: github supports omitting the first | even on the first line when we
# are not interrupting a paragraph and when subsequent the delimiter line has
# more than one dash per cell.
return unless $l =~ m/^ {0,3}\|/;
my $init_pos = $this->get_pos();
$this->redo_line();

my $table = $this->_parse_table_structure();
if (!$table) {
$this->set_pos($init_pos);
return;
}

$this->_add_block({type => 'table', content => $table});

return 1;
}

sub _parse_table_structure {
my ($this) = @_;

# A regexp that matches no back-slashes or an even number of them, so that the
# next character cannot be escaped.
my $e = qr/(?<! \\) (?:\\\\)*/x;

# We consume the continuation prefix of enclosing blocks. Note that,
# as opposed to what happens for links, subsequent lines can have at most
# 3 more spaces than the initial one with the GitHub implementation (but not
# some other GFM implementations).
my $cont = $this->{continuation_re};
confess 'Unexpected regex match failure' unless $this->{md} =~ m/\G${cont}/g;

# Now we consume the initial | marking the beginning of the table that we know
# is here because of the initial match against $l in _do_table_block.
confess 'Unexpected missing table markers' unless $this->{md} =~ m/\G {0,3}\|/g;

# We parse the header row
my @headers = $this->{md} =~ m/\G [ \t]* (.*? [ \t]* $e) \| /gcx;
return unless @headers;

# We consume the end of line that must happen after the headers.
return unless $this->{md} =~ m/\G [ \t]* ${eol_re} ${cont} \ {0,3} \|? /gx;

my @separators = $this->{md} =~ m/\G [ \t]* ( :? -+ :? [ \t]* $e) \| /gcx;
return unless @separators == @headers;

# We consume the end of line that must happen after the headers.
return unless $this->{md} =~ m/\G [ \t]* (:? ${eol_re} | $ ) /gx;

# And now we try to read as many lines as possible
my @table;
while (1) {
last unless $this->{md} =~ m/\G ${cont} \ {0,3} \| /gcx;
my @cells = $this->{md} =~ m/\G [ \t]* (.*? [ \t]* $e) \| /gcx;
# We consume the end of line that must happen after the cells.
return unless $this->{md} =~ m/\G [ \t]* (:? ${eol_re} | $ ) /gx;
push @table, \@cells;
}

return {headers => \@headers, separators => \@separators, table => \@table};
}

# https://spec.commonmark.org/0.30/#paragraphs
Expand Down
2 changes: 1 addition & 1 deletion t/901-github-test-suite.t
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use CmarkTest;
use Test2::V0;

# TODO: remove these todos.
my %opt = (todo => [198 .. 202, 204, 205],
my %opt = (todo => [199 .. 202, 204, 205],
# These are bugs in the GitHub spec, not in our implementation. All
# of these have been tested to be buggy in the real cmark-gfm
# implementation.
Expand Down

0 comments on commit 1b9d7e4

Please sign in to comment.