From 2b22d239ad594ff6fbe0d49ad5ba27a6e97115fb Mon Sep 17 00:00:00 2001 From: chrispy Date: Mon, 15 Jan 2024 07:59:39 -0500 Subject: [PATCH 01/13] avoid text normalization/escaping in any preformatted/code context Signed-off-by: chrispy --- markdownify/__init__.py | 9 ++++----- tests/test_conversions.py | 12 ++++++++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index e15ecd4..bf765ec 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -152,13 +152,12 @@ def is_nested_node(el): def process_text(self, el): text = six.text_type(el) or '' - # dont remove any whitespace when handling pre or code in pre - if not (el.parent.name == 'pre' - or (el.parent.name == 'code' - and el.parent.parent.name == 'pre')): + # normalize whitespace if we're not inside a preformatted element + if not el.find_parent('pre'): text = whitespace_re.sub(' ', text) - if el.parent.name != 'code' and el.parent.name != 'pre': + # escape special characters if we're not inside a preformatted or code element + if not el.find_parent(['pre', 'code', 'kbd', 'samp']): text = self.escape(text) # remove trailing whitespaces if any of the following condition is true: diff --git a/tests/test_conversions.py b/tests/test_conversions.py index da78649..1ecaf3c 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -70,7 +70,12 @@ def test_br(): def test_code(): inline_tests('code', '`') - assert md('this_should_not_escape') == '`this_should_not_escape`' + assert md('*this_should_not_escape*') == '`*this_should_not_escape*`' + assert md('*this_should_not_escape*') == '`*this_should_not_escape*`' + assert md('*this_should_not_escape*') == '`*this_should_not_escape*`' + assert md('*this_should_not_escape*') == '`*this_should_not_escape*`' + assert md('this should\t\tnormalize') == '`this should normalize`' + assert md('this should\t\tnormalize') == '`this should normalize`' def test_del(): @@ -187,7 +192,10 @@ def test_p(): def test_pre(): assert md('
test\n    foo\nbar
') == '\n```\ntest\n foo\nbar\n```\n' assert md('
test\n    foo\nbar
') == '\n```\ntest\n foo\nbar\n```\n' - assert md('
this_should_not_escape
') == '\n```\nthis_should_not_escape\n```\n' + assert md('
*this_should_not_escape*
') == '\n```\n*this_should_not_escape*\n```\n' + assert md('
*this_should_not_escape*
') == '\n```\n*this_should_not_escape*\n```\n' + assert md('
\t\tthis  should\t\tnot  normalize
') == '\n```\n\t\tthis should\t\tnot normalize\n```\n' + assert md('
\t\tthis  should\t\tnot  normalize
') == '\n```\n\t\tthis should\t\tnot normalize\n```\n' def test_s(): From 60967c1c9563405a06ebae2735a049e37258b689 Mon Sep 17 00:00:00 2001 From: "Thomas L. Kjeldsen" Date: Mon, 11 Mar 2024 21:07:24 +0100 Subject: [PATCH 02/13] ignore script and style content (such as css and javascript) (#112) --- markdownify/__init__.py | 6 ++++++ tests/test_conversions.py | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index bf765ec..151a222 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -350,6 +350,12 @@ def convert_pre(self, el, text, convert_as_inline): return '\n```%s\n%s\n```\n' % (code_language, text) + def convert_script(self, el, text, convert_as_inline): + return '' + + def convert_style(self, el, text, convert_as_inline): + return '' + convert_s = convert_del convert_strong = convert_b diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 1ecaf3c..5c6ec06 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -198,6 +198,14 @@ def test_pre(): assert md('
\t\tthis  should\t\tnot  normalize
') == '\n```\n\t\tthis should\t\tnot normalize\n```\n' +def test_script(): + assert md('foo bar') == 'foo bar' + + +def test_style(): + assert md('foo bar') == 'foo bar' + + def test_s(): inline_tests('s', '~~') From a2f82678f762061527473692b6a0ac7287152959 Mon Sep 17 00:00:00 2001 From: G <17325189+GeeCastro@users.noreply.github.com> Date: Mon, 11 Mar 2024 20:10:08 +0000 Subject: [PATCH 03/13] Add no css example to readme (#111) * Add no css example --------- Co-authored-by: G <17325189+Chichilele@users.noreply.github.com> --- README.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.rst b/README.rst index 06f319b..21a5972 100644 --- a/README.rst +++ b/README.rst @@ -173,6 +173,22 @@ change: def md(html, **options): return ImageBlockConverter(**options).convert(html) +.. code:: python + + from markdownify import MarkdownConverter + + class NoCssConverter(MarkdownConverter): + """ + Create a custom MarkdownConverter that removes the CSS code by ignoring the `style` tag + """ + def convert_style(self, el, text, convert_as_inline): + return '' + + # Create shorthand method for conversion + def md(html, **options): + return NoCssConverter(**options).convert(html) + + Command Line Interface ====================== From f33ccd7c1aef77dcfc4751715f1008fa8d519b46 Mon Sep 17 00:00:00 2001 From: Veronika Butkevich <13301101+5yato4ok@users.noreply.github.com> Date: Tue, 26 Mar 2024 20:46:30 +0100 Subject: [PATCH 04/13] Fix newline start in header tags (#89) * Fix newline start in header tags --- markdownify/__init__.py | 2 +- tests/test_conversions.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 151a222..a60400c 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -265,7 +265,7 @@ def convert_hn(self, n, el, text, convert_as_inline): return text style = self.options['heading_style'].lower() - text = text.rstrip() + text = text.strip() if style == UNDERLINED and n <= 2: line = '=' if n == 1 else '-' return self.underline(text, line) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 5c6ec06..b03c874 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -90,6 +90,14 @@ def test_em(): inline_tests('em', '*') +def test_header_with_space(): + assert md('

\n\nHello

') == '### Hello\n\n' + assert md('

\n\nHello

') == '#### Hello\n\n' + assert md('
\n\nHello
') == '##### Hello\n\n' + assert md('
\n\nHello\n\n
') == '##### Hello\n\n' + assert md('
\n\nHello \n\n
') == '##### Hello\n\n' + + def test_h1(): assert md('

Hello

') == 'Hello\n=====\n\n' From 0477a0c8a08b0f9b0ada8d22090a8bb6915085f7 Mon Sep 17 00:00:00 2001 From: Carina de Oliveira Antunes Date: Tue, 26 Mar 2024 20:49:50 +0100 Subject: [PATCH 05/13] convert_td: strip text (#91) --- markdownify/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index a60400c..eaa58c1 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -370,7 +370,7 @@ def convert_table(self, el, text, convert_as_inline): return '\n\n' + text + '\n' def convert_td(self, el, text, convert_as_inline): - return ' ' + text + ' |' + return ' ' + text.strip() + ' |' def convert_th(self, el, text, convert_as_inline): return ' ' + text + ' |' From 96a25cfbf3c4b00d226d20ee97d9d7f39c2c60e0 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Tue, 26 Mar 2024 21:05:31 +0100 Subject: [PATCH 06/13] added tests for linebreaks in table cells --- markdownify/__init__.py | 2 +- tests/test_tables.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index eaa58c1..f9b5f9b 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -370,7 +370,7 @@ def convert_table(self, el, text, convert_as_inline): return '\n\n' + text + '\n' def convert_td(self, el, text, convert_as_inline): - return ' ' + text.strip() + ' |' + return ' ' + text.strip().replace("\n", " ") + ' |' def convert_th(self, el, text, convert_as_inline): return ' ' + text + ' |' diff --git a/tests/test_tables.py b/tests/test_tables.py index e0c07ea..334bfb7 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -57,6 +57,26 @@ """ +table_with_linebreaks = """ + + + + + + + + + + + + + + + +
FirstnameLastnameAge
JillSmith + Jackson50
EveJackson + Smith94
""" + table_with_header_column = """ @@ -164,6 +184,7 @@ def test_table(): assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' From 2f9a42d3b8873e9ac4ce91379c8f36fc610e7ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Tue, 26 Mar 2024 21:07:28 +0100 Subject: [PATCH 07/13] Strip text before adding blockquote markers (#76) --- markdownify/__init__.py | 2 +- tests/test_conversions.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index f9b5f9b..1e7b787 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -237,7 +237,7 @@ def convert_blockquote(self, el, text, convert_as_inline): if convert_as_inline: return text - return '\n' + (line_beginning_re.sub('> ', text) + '\n\n') if text else '' + return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else '' def convert_br(self, el, text, convert_as_inline): if convert_as_inline: diff --git a/tests/test_conversions.py b/tests/test_conversions.py index b03c874..ae56837 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -52,6 +52,12 @@ def test_b_spaces(): def test_blockquote(): assert md('
Hello
') == '\n> Hello\n\n' + assert md('
\nHello\n
') == '\n> Hello\n\n' + + +def test_blockquote_with_nested_paragraph(): + assert md('

Hello

') == '\n> Hello\n\n' + assert md('

Hello

Hello again

') == '\n> Hello\n> \n> Hello again\n\n' def test_blockquote_with_paragraph(): @@ -60,7 +66,7 @@ def test_blockquote_with_paragraph(): def test_blockquote_nested(): text = md('
And she was like
Hello
') - assert text == '\n> And she was like \n> > Hello\n> \n> \n\n' + assert text == '\n> And she was like \n> > Hello\n\n' def test_br(): From 7d0bf46057558508bd122b2fcfe5419276d59e71 Mon Sep 17 00:00:00 2001 From: Chris Papademetrious Date: Tue, 26 Mar 2024 16:15:22 -0400 Subject: [PATCH 08/13] revert workaround example in README.rst for