-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Write new Format and Formats types, some helper functions
A new Format type enumerates exactly what formats Pandoc can recognize in some way (input, output, raw content). A new Formats type and related functions make it easier for writers to determine when they can include raw content in their output. None of the types in Definitions have been changed yet.
- Loading branch information
Showing
4 changed files
with
399 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,358 @@ | ||
{-# LANGUAGE DeriveDataTypeable #-} | ||
{-# LANGUAGE DeriveGeneric #-} | ||
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
{-# LANGUAGE NoImplicitPrelude #-} | ||
|
||
{- | | ||
Module : Text.Pandoc.Format | ||
Copyright : Copyright 2020 Christian Despres | ||
License : BSD3 | ||
Maintainer : Christian Despres <christian.j.j.despres@gmail.com> | ||
Stability : alpha | ||
Portability : portable | ||
Definition of a 'Format' type that enumerates the formats that Pandoc | ||
recognizes, and a 'Formats' type and combinators to implement 'Format' | ||
patterns and matching. This module was designed to have its functions | ||
imported qualified. | ||
-} | ||
|
||
module Text.Pandoc.Format | ||
( -- * The 'Format' type | ||
Format(..) | ||
|
||
-- * The 'Formats' pattern language | ||
-- $formats | ||
, Formats(..) | ||
, ToFormats(..) | ||
, anyOf | ||
, noneOf | ||
, anything | ||
, nothing | ||
, and | ||
, or | ||
, except | ||
, not | ||
|
||
-- * Matching against 'Formats' | ||
, matches | ||
|
||
-- * Direct 'Formats' conversions | ||
, listMatches | ||
, exactly | ||
, toList | ||
) | ||
where | ||
|
||
import Control.DeepSeq | ||
import Data.Foldable ( foldl' ) | ||
import Data.Generics ( Data | ||
, Typeable | ||
) | ||
import Data.Set ( Set ) | ||
import qualified Data.Set as Set | ||
import GHC.Generics ( Generic ) | ||
import Prelude hiding ( not | ||
, any | ||
, or | ||
, and | ||
) | ||
|
||
-- | An enumeration of the formats that Pandoc can recognize in some | ||
-- way. The formats are listed with the string that will specify them | ||
-- in input, output, or as the attribute of a | ||
-- 'Text.Pandoc.Definition.RawBlock' or | ||
-- 'Text.Pandoc.Definition.RawInline' (when applicable). | ||
data Format | ||
= AsciiDoc -- ^ asciidoc | ||
| AsciiDoctor -- ^ asciidoctor | ||
| Beamer -- ^ beamer | ||
| CommonMark -- ^ commonmark | ||
| CommonMarkX -- ^ commonmark_x | ||
| ConTeXt -- ^ context | ||
| Creole -- ^ creole | ||
| Csv -- ^ csv | ||
| DocBook -- ^ docbook | ||
| DocBook4 -- ^ docbook4 | ||
| DocBook5 -- ^ docbook5 | ||
| Docx -- ^ docx | ||
| DokuWiki -- ^ dokuwiki | ||
| Dzslides -- ^ dzslides | ||
| Epub -- ^ epub | ||
| Epub2 -- ^ epub2 | ||
| Epub3 -- ^ epub3 | ||
| FictionBook -- ^ fb2 | ||
| Gfm -- ^ gfm | ||
| Haddock -- ^ haddock | ||
| Html -- ^ html | ||
| Html4 -- ^ html4 | ||
| Html5 -- ^ html5 | ||
| Icml -- ^ icml | ||
| Jats -- ^ jats | ||
| JatsArchiving -- ^ jats_archiving | ||
| JatsArticleAuthoring -- ^ jats_articleauthoring | ||
| JatsPublishing -- ^ jats_publishing | ||
| Jira -- ^ jira | ||
| Json -- ^ json | ||
| Jupyter -- ^ ipynb | ||
| LaTeX -- ^ latex | ||
| Man -- ^ man | ||
| Markdown -- ^ markdown | ||
| MarkdownGithub -- ^ markdown_github (deprecated) | ||
| MarkdownMmd -- ^ markdown_mmd | ||
| MarkdownPhpExtra -- ^ markdown_phpextra | ||
| MarkdownStrict -- ^ markdown_strict | ||
| MediaWiki -- ^ mediawiki | ||
| Ms -- ^ ms | ||
| Muse -- ^ muse | ||
| Native -- ^ native | ||
| Odt -- ^ odt | ||
| OpenDocument -- ^ opendocument | ||
| OpenXml -- ^ openxml | ||
| Opml -- ^ opml | ||
| Org -- ^ org | ||
| Pdf -- ^ pdf | ||
| Plain -- ^ plain | ||
| Pptx -- ^ pptx | ||
| RevealJS -- ^ revealjs | ||
| Rst -- ^ rst | ||
| Rtf -- ^ rtf | ||
| S5 -- ^ s5 | ||
| Slideous -- ^ slideous | ||
| Slidy -- ^ slidy | ||
| T2T -- ^ t2t | ||
| TWiki -- ^ twiki | ||
| TeX -- ^ tex | ||
| Tei -- ^ tei | ||
| Texinfo -- ^ texinfo | ||
| Textile -- ^ textile | ||
| TikiWiki -- ^ tikiwiki | ||
| VimWiki -- ^ vimwiki | ||
| XWiki -- ^ xwiki | ||
| ZimWiki -- ^ zimwiki | ||
deriving (Eq, Ord, Read, Show, Enum, Bounded, Typeable, Data, Generic) | ||
|
||
-- $formats | ||
-- The 'Formats' type expresses patterns that a particular 'Format' | ||
-- can be matched against. These patterns are intended to capture the | ||
-- fuzziness and sub-format relationships between the different | ||
-- elements of 'Format', so that writers can figure out when to | ||
-- include raw elements in their output. This happens in the 'Formats' | ||
-- combinators, which take @'ToFormats'@ arguments that can interpret | ||
-- a particular 'Format' as representing multiple 'Format's. | ||
-- | ||
-- When a writer encounters a @'Text.Pandoc.Definition.RawBlock' f | ||
-- str@, it can use 'matches' to test what @str@ is, like this: | ||
-- | ||
-- > f `matches` LaTeX -- can str be included in LaTeX content? | ||
-- > f `matches` Markdown -- can str be included in any Markdown content? | ||
-- | ||
-- but the Boolean algebra operations on 'Formats' can construct more | ||
-- general patterns. Some concrete examples: | ||
-- | ||
-- > TeX `matches` TeX = True | ||
-- > TeX `matches` LaTeX = True | ||
-- > LaTeX `matches` TeX = False | ||
-- > TeX `matches` (LaTeX `or` Html) = True | ||
-- > LaTeX `matches` (LaTeX `except` Beamer) = False | ||
-- > LaTeX `matches` (Beamer `except` TeX) = True | ||
-- > TeX `matches` (LaTeX `and` Html) = False | ||
-- > LaTeX `matches` not Html = True | ||
|
||
-- | A 'Formats' value is a pattern that a particular 'Format' can be | ||
-- matched against using 'matches'. At a lower level, the expression | ||
-- @Formats x@ means "any of the formats in @x@". | ||
-- | ||
-- The 'Monoid' instance has @mempty = 'nothing'@ and @(<>) = 'or'@. | ||
newtype Formats = Formats (Set Format) | ||
deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Semigroup, Monoid) | ||
|
||
-- | The class of types that can be interpreted as a 'Formats' | ||
-- pattern. The instance for 'Format' is "fuzzy", in that a 'Format' | ||
-- will become a pattern that matches against multiple formats (see | ||
-- 'listMatches'). So, for instance, | ||
-- | ||
-- @ | ||
-- 'toFormats' 'Beamer' = 'exactly' ['Beamer', 'LaTeX', 'TeX'] | ||
-- @ | ||
-- | ||
-- and for a 'Format' in general, | ||
-- | ||
-- @ | ||
-- 'toFormats' = 'listMatches' | ||
-- @ | ||
-- | ||
-- Note that the instance for 'Formats' is simply @'toFormats' = id@, | ||
-- i.e. a 'Formats' pattern is never itself interpreted fuzzily. | ||
class ToFormats a where | ||
toFormats :: a -> Formats | ||
|
||
instance ToFormats Formats where | ||
toFormats = id | ||
|
||
instance ToFormats Format where | ||
toFormats = listMatches | ||
|
||
-- | 'anything' matches any format. | ||
anything :: Formats | ||
anything = exactly [minBound .. maxBound] | ||
|
||
-- | 'nothing' matches no format. Note that @nothing = mempty@. | ||
nothing :: Formats | ||
nothing = mempty | ||
|
||
-- | 'anyOf' matches any of the given patterns. | ||
anyOf :: (Foldable t, ToFormats a) => t a -> Formats | ||
anyOf = foldl' go mempty where go x y = x `or` toFormats y | ||
|
||
-- | 'noneOf' matches anything except the given patterns. | ||
noneOf :: (Foldable t, ToFormats a) => t a -> Formats | ||
noneOf = foldl' go anything where go x y = x `except` toFormats y | ||
|
||
-- | 'exactly' matches exactly the given formats. You will normally | ||
-- want to use the functions accepting 'ToFormats' types to construct | ||
-- a 'Formats' pattern instead of using this directly. | ||
exactly :: [Format] -> Formats | ||
exactly = Formats . Set.fromList | ||
|
||
-- | @'and' x y@ matches a format @f@ when @f@ matches both @x@ and | ||
-- @y@. | ||
and :: (ToFormats a, ToFormats b) => a -> b -> Formats | ||
and x y = Formats $ s `Set.intersection` t | ||
where | ||
Formats s = toFormats x | ||
Formats t = toFormats y | ||
|
||
-- | @'or' x y@ matches a format @f@ when @f@ matches either @x@ or | ||
-- @y@. | ||
or :: (ToFormats a, ToFormats b) => a -> b -> Formats | ||
or x y = Formats $ s `Set.union` t | ||
where | ||
Formats s = toFormats x | ||
Formats t = toFormats y | ||
|
||
-- | @'not' x@ matches a format @f@ when @f@ does not match @x@. | ||
not :: (ToFormats a) => a -> Formats | ||
not t = anything `except` (toFormats t) | ||
|
||
-- | @'except' x y@ matches a format @f@ when @f@ matches @x@ and | ||
-- not @y@. | ||
-- | ||
-- > x `except` y = x `and` not y | ||
except :: (ToFormats a, ToFormats b) => a -> b -> Formats | ||
except x y = Formats $ s `Set.difference` t | ||
where | ||
Formats s = toFormats x | ||
Formats t = toFormats y | ||
|
||
-- | Test if the given 'Format' matches against a pattern. | ||
-- | ||
-- The 'matches' relation is reflexive (@x \`matches\` x@ is always | ||
-- @True@) and transitive (if @x \`matches\` y@ and @y \`matches\` z@ | ||
-- are @True@, then @x \`matches\` z@ is @True@). | ||
matches :: ToFormats a => Format -> a -> Bool | ||
matches f p = f `Set.member` s where Formats s = toFormats p | ||
|
||
-- | The 'listMatches' function defines how other formats will match | ||
-- against the given 'Format', considered as a 'Formats' pattern. It | ||
-- is intended to capture a sub-format/super-format relation on | ||
-- 'Format'. | ||
-- | ||
-- A general guideline is that if @x@ matches @'listMatches' y@, then | ||
-- whenever a raw element of format @y@ can be included in a document, | ||
-- a raw element of format @x@ can be included in the same place and | ||
-- in the same way. It is also generally true that @y@ will be a | ||
-- subset of @x@, or will be based on @x@ in some way. So | ||
-- @'listMatches' y@ lists all the formats not-less-general than @y@ | ||
-- in that sense (the super-formats of @y@). | ||
-- | ||
-- Some examples: | ||
-- | ||
-- @ | ||
-- listMatches Beamer = exactly [Beamer, LaTeX, TeX] | ||
-- listMatches Epub3 = exactly [Epub, Epub3, Html, Html5] | ||
-- listMatches Markdown = exactly | ||
-- [Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict] | ||
-- listMatches TeX = exactly [TeX] | ||
-- @ | ||
listMatches :: Format -> Formats | ||
listMatches AsciiDoc = exactly [AsciiDoc] | ||
listMatches AsciiDoctor = exactly [AsciiDoctor] | ||
listMatches Beamer = exactly [Beamer, LaTeX, TeX] | ||
listMatches CommonMark = exactly [CommonMark] | ||
listMatches CommonMarkX = exactly [CommonMarkX] | ||
listMatches ConTeXt = exactly [ConTeXt, TeX] | ||
listMatches Creole = exactly [Creole] | ||
listMatches Csv = exactly [Csv] | ||
listMatches DocBook = exactly [DocBook] | ||
listMatches DocBook4 = exactly [DocBook4] | ||
listMatches DocBook5 = exactly [DocBook5] | ||
listMatches Docx = exactly [Docx] | ||
listMatches DokuWiki = exactly [DokuWiki] | ||
listMatches Dzslides = exactly [Dzslides] | ||
listMatches Epub = exactly [Epub, Html] | ||
listMatches Epub2 = exactly [Epub, Epub2, Html, Html4] | ||
listMatches Epub3 = exactly [Epub, Epub3, Html, Html5] | ||
listMatches FictionBook = exactly [FictionBook] | ||
listMatches Gfm = exactly [Gfm] | ||
listMatches Haddock = exactly [Haddock] | ||
listMatches Html = exactly [Html] | ||
listMatches Html4 = exactly [Html, Html4] | ||
listMatches Html5 = exactly [Html, Html5] | ||
listMatches Icml = exactly [Icml] | ||
listMatches Jats = exactly [Jats] | ||
listMatches JatsArchiving = exactly [JatsArchiving] | ||
listMatches JatsArticleAuthoring = exactly [JatsArticleAuthoring] | ||
listMatches JatsPublishing = exactly [JatsPublishing] | ||
listMatches Jira = exactly [Jira] | ||
listMatches Json = exactly [Json] | ||
listMatches Jupyter = exactly [Jupyter] | ||
listMatches LaTeX = exactly [LaTeX, TeX] | ||
listMatches Man = exactly [Man] | ||
listMatches Markdown = exactly | ||
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict] | ||
listMatches MarkdownGithub = exactly | ||
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict] | ||
listMatches MarkdownMmd = exactly | ||
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict] | ||
listMatches MarkdownPhpExtra = exactly | ||
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict] | ||
listMatches MarkdownStrict = exactly | ||
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict] | ||
listMatches MediaWiki = exactly [MediaWiki] | ||
listMatches Ms = exactly [Ms] | ||
listMatches Muse = exactly [Muse] | ||
listMatches Native = exactly [Native] | ||
listMatches Odt = exactly [Odt] | ||
listMatches OpenDocument = exactly [OpenDocument] | ||
listMatches OpenXml = exactly [OpenXml] | ||
listMatches Opml = exactly [Opml] | ||
listMatches Org = exactly [Org] | ||
listMatches Pdf = exactly [Pdf] | ||
listMatches Plain = exactly [Plain] | ||
listMatches Pptx = exactly [Pptx] | ||
listMatches RevealJS = exactly [RevealJS] | ||
listMatches Rst = exactly [Rst] | ||
listMatches Rtf = exactly [Rtf] | ||
listMatches S5 = exactly [S5] | ||
listMatches Slideous = exactly [Slideous] | ||
listMatches Slidy = exactly [Slidy] | ||
listMatches T2T = exactly [T2T] | ||
listMatches TWiki = exactly [TWiki] | ||
listMatches TeX = exactly [TeX] | ||
listMatches Tei = exactly [Tei] | ||
listMatches Texinfo = exactly [Texinfo] | ||
listMatches Textile = exactly [Textile] | ||
listMatches TikiWiki = exactly [TikiWiki] | ||
listMatches VimWiki = exactly [VimWiki] | ||
listMatches XWiki = exactly [XWiki] | ||
listMatches ZimWiki = exactly [ZimWiki] | ||
|
||
-- | Convert a 'Formats' into its underlying list of successful | ||
-- matches. | ||
toList :: Formats -> [Format] | ||
toList (Formats s) = Set.toList s | ||
|
||
instance NFData Format | ||
instance NFData Formats |
Oops, something went wrong.