Special-case use of HTML tags for converting <sub> / <sup> (#119)

Allow different strings before / after `<sub>` / `<sup>` content In particular, this allows setting `sub_symbol='<sub>'`, `sup_symbol='<sup>'`, to use raw HTML in the output when converting subscripts and superscripts.
matthewwithanm · Jun 23, 2024 · 7861b33 · 7861b33
1 parent 2ec3338
commit 7861b33
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 3 deletions.
diff --git a/README.rst b/README.rst
@@ -87,7 +87,11 @@ strong_em_symbol
 sub_symbol, sup_symbol
   Define the chars that surround ``<sub>`` and ``<sup>`` text. Defaults to an
   empty string, because this is non-standard behavior. Could be something like
-  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.
+  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.  If the value starts
+  with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is
+  inserted after the ``<`` in the string used after the text; this allows
+  specifying ``<sub>`` to use raw HTML in the output for subscripts, for
+  example.
 
 newline_style
   Defines the style of marking linebreaks (``<br>``) in markdown. The default

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
@@ -43,17 +43,22 @@ def abstract_inline_conversion(markup_fn):
     """
     This abstracts all simple inline tags like b, em, del, ...
     Returns a function that wraps the chomped text in a pair of the string
-    that is returned by markup_fn. markup_fn is necessary to allow for
+    that is returned by markup_fn, with '/' inserted in the string used after
+    the text if it looks like an HTML tag. markup_fn is necessary to allow for
     references to self.strong_em_symbol etc.
     """
     def implementation(self, el, text, convert_as_inline):
         markup = markup_fn(self)
+        if markup.startswith('<') and markup.endswith('>'):
+            markup_after = '</' + markup[1:]
+        else:
+            markup_after = markup
         if el.find_parent(['pre', 'code', 'kbd', 'samp']):
             return text
         prefix, suffix, text = chomp(text)
         if not text:
             return ''
-        return '%s%s%s%s%s' % (prefix, markup, text, markup, suffix)
+        return '%s%s%s%s%s' % (prefix, markup, text, markup_after, suffix)
     return implementation
 
 

diff --git a/tests/test_conversions.py b/tests/test_conversions.py
@@ -268,11 +268,13 @@ def test_strong_em_symbol():
 def test_sub():
     assert md('<sub>foo</sub>') == 'foo'
     assert md('<sub>foo</sub>', sub_symbol='~') == '~foo~'
+    assert md('<sub>foo</sub>', sub_symbol='<sub>') == '<sub>foo</sub>'
 
 
 def test_sup():
     assert md('<sup>foo</sup>') == 'foo'
     assert md('<sup>foo</sup>', sup_symbol='^') == '^foo^'
+    assert md('<sup>foo</sup>', sup_symbol='<sup>') == '<sup>foo</sup>'
 
 
 def test_lang():