Skip to content

Commit

Permalink
Write new Format and Formats types, some helper functions
Browse files Browse the repository at this point in the history
A new Format type enumerates exactly what formats Pandoc can recognize
in some way (input, output, raw content).

A new Formats type and related functions make it easier for writers to
determine when they can include raw content in their output.

None of the types in Definitions have been changed yet.
  • Loading branch information
despresc committed Sep 4, 2020
1 parent 8e9ca37 commit bfbc5fc
Show file tree
Hide file tree
Showing 4 changed files with 399 additions and 0 deletions.
1 change: 1 addition & 0 deletions pandoc-types.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Library
Text.Pandoc.Builder
Text.Pandoc.JSON
Text.Pandoc.Arbitrary
Text.Pandoc.Format
Other-modules: Paths_pandoc_types
Autogen-modules: Paths_pandoc_types
Build-depends: base >= 4.5 && < 5,
Expand Down
9 changes: 9 additions & 0 deletions src/Text/Pandoc/Arbitrary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ where
import Test.QuickCheck
import Control.Applicative (Applicative ((<*>), pure), (<$>))
import Control.Monad (forM)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import qualified Text.Pandoc.Format as F

realString :: Gen Text
realString = fmap T.pack $ resize 8 $ listOf $ frequency [ (9, elements [' '..'\127'])
Expand Down Expand Up @@ -398,3 +400,10 @@ instance Arbitrary ListNumberDelim where
2 -> return OneParen
3 -> return TwoParens
_ -> error "FATAL ERROR: Arbitrary instance, logic bug"

instance Arbitrary F.Format where
arbitrary = arbitraryBoundedEnum

instance Arbitrary F.Formats where
arbitrary = F.exactly <$> arbitrary
shrink (F.Formats s) = F.exactly <$> shrink (Set.toList s)
358 changes: 358 additions & 0 deletions src/Text/Pandoc/Format.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,358 @@
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE NoImplicitPrelude #-}

{- |
Module : Text.Pandoc.Format
Copyright : Copyright 2020 Christian Despres
License : BSD3
Maintainer : Christian Despres <christian.j.j.despres@gmail.com>
Stability : alpha
Portability : portable
Definition of a 'Format' type that enumerates the formats that Pandoc
recognizes, and a 'Formats' type and combinators to implement 'Format'
patterns and matching. This module was designed to have its functions
imported qualified.
-}

module Text.Pandoc.Format
( -- * The 'Format' type
Format(..)

-- * The 'Formats' pattern language
-- $formats
, Formats(..)
, ToFormats(..)
, anyOf
, noneOf
, anything
, nothing
, and
, or
, except
, not

-- * Matching against 'Formats'
, matches

-- * Direct 'Formats' conversions
, listMatches
, exactly
, toList
)
where

import Control.DeepSeq
import Data.Foldable ( foldl' )
import Data.Generics ( Data
, Typeable
)
import Data.Set ( Set )
import qualified Data.Set as Set
import GHC.Generics ( Generic )
import Prelude hiding ( not
, any
, or
, and
)

-- | An enumeration of the formats that Pandoc can recognize in some
-- way. The formats are listed with the string that will specify them
-- in input, output, or as the attribute of a
-- 'Text.Pandoc.Definition.RawBlock' or
-- 'Text.Pandoc.Definition.RawInline' (when applicable).
data Format
= AsciiDoc -- ^ asciidoc
| AsciiDoctor -- ^ asciidoctor
| Beamer -- ^ beamer
| CommonMark -- ^ commonmark
| CommonMarkX -- ^ commonmark_x
| ConTeXt -- ^ context
| Creole -- ^ creole
| Csv -- ^ csv
| DocBook -- ^ docbook
| DocBook4 -- ^ docbook4
| DocBook5 -- ^ docbook5
| Docx -- ^ docx
| DokuWiki -- ^ dokuwiki
| Dzslides -- ^ dzslides
| Epub -- ^ epub
| Epub2 -- ^ epub2
| Epub3 -- ^ epub3
| FictionBook -- ^ fb2
| Gfm -- ^ gfm
| Haddock -- ^ haddock
| Html -- ^ html
| Html4 -- ^ html4
| Html5 -- ^ html5
| Icml -- ^ icml
| Jats -- ^ jats
| JatsArchiving -- ^ jats_archiving
| JatsArticleAuthoring -- ^ jats_articleauthoring
| JatsPublishing -- ^ jats_publishing
| Jira -- ^ jira
| Json -- ^ json
| Jupyter -- ^ ipynb
| LaTeX -- ^ latex
| Man -- ^ man
| Markdown -- ^ markdown
| MarkdownGithub -- ^ markdown_github (deprecated)
| MarkdownMmd -- ^ markdown_mmd
| MarkdownPhpExtra -- ^ markdown_phpextra
| MarkdownStrict -- ^ markdown_strict
| MediaWiki -- ^ mediawiki
| Ms -- ^ ms
| Muse -- ^ muse
| Native -- ^ native
| Odt -- ^ odt
| OpenDocument -- ^ opendocument
| OpenXml -- ^ openxml
| Opml -- ^ opml
| Org -- ^ org
| Pdf -- ^ pdf
| Plain -- ^ plain
| Pptx -- ^ pptx
| RevealJS -- ^ revealjs
| Rst -- ^ rst
| Rtf -- ^ rtf
| S5 -- ^ s5
| Slideous -- ^ slideous
| Slidy -- ^ slidy
| T2T -- ^ t2t
| TWiki -- ^ twiki
| TeX -- ^ tex
| Tei -- ^ tei
| Texinfo -- ^ texinfo
| Textile -- ^ textile
| TikiWiki -- ^ tikiwiki
| VimWiki -- ^ vimwiki
| XWiki -- ^ xwiki
| ZimWiki -- ^ zimwiki
deriving (Eq, Ord, Read, Show, Enum, Bounded, Typeable, Data, Generic)

-- $formats
-- The 'Formats' type expresses patterns that a particular 'Format'
-- can be matched against. These patterns are intended to capture the
-- fuzziness and sub-format relationships between the different
-- elements of 'Format', so that writers can figure out when to
-- include raw elements in their output. This happens in the 'Formats'
-- combinators, which take @'ToFormats'@ arguments that can interpret
-- a particular 'Format' as representing multiple 'Format's.
--
-- When a writer encounters a @'Text.Pandoc.Definition.RawBlock' f
-- str@, it can use 'matches' to test what @str@ is, like this:
--
-- > f `matches` LaTeX -- can str be included in LaTeX content?
-- > f `matches` Markdown -- can str be included in any Markdown content?
--
-- but the Boolean algebra operations on 'Formats' can construct more
-- general patterns. Some concrete examples:
--
-- > TeX `matches` TeX = True
-- > TeX `matches` LaTeX = True
-- > LaTeX `matches` TeX = False
-- > TeX `matches` (LaTeX `or` Html) = True
-- > LaTeX `matches` (LaTeX `except` Beamer) = False
-- > LaTeX `matches` (Beamer `except` TeX) = True
-- > TeX `matches` (LaTeX `and` Html) = False
-- > LaTeX `matches` not Html = True

-- | A 'Formats' value is a pattern that a particular 'Format' can be
-- matched against using 'matches'. At a lower level, the expression
-- @Formats x@ means "any of the formats in @x@".
--
-- The 'Monoid' instance has @mempty = 'nothing'@ and @(<>) = 'or'@.
newtype Formats = Formats (Set Format)
deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Semigroup, Monoid)

-- | The class of types that can be interpreted as a 'Formats'
-- pattern. The instance for 'Format' is "fuzzy", in that a 'Format'
-- will become a pattern that matches against multiple formats (see
-- 'listMatches'). So, for instance,
--
-- @
-- 'toFormats' 'Beamer' = 'exactly' ['Beamer', 'LaTeX', 'TeX']
-- @
--
-- and for a 'Format' in general,
--
-- @
-- 'toFormats' = 'listMatches'
-- @
--
-- Note that the instance for 'Formats' is simply @'toFormats' = id@,
-- i.e. a 'Formats' pattern is never itself interpreted fuzzily.
class ToFormats a where
toFormats :: a -> Formats

instance ToFormats Formats where
toFormats = id

instance ToFormats Format where
toFormats = listMatches

-- | 'anything' matches any format.
anything :: Formats
anything = exactly [minBound .. maxBound]

-- | 'nothing' matches no format. Note that @nothing = mempty@.
nothing :: Formats
nothing = mempty

-- | 'anyOf' matches any of the given patterns.
anyOf :: (Foldable t, ToFormats a) => t a -> Formats
anyOf = foldl' go mempty where go x y = x `or` toFormats y

-- | 'noneOf' matches anything except the given patterns.
noneOf :: (Foldable t, ToFormats a) => t a -> Formats
noneOf = foldl' go anything where go x y = x `except` toFormats y

-- | 'exactly' matches exactly the given formats. You will normally
-- want to use the functions accepting 'ToFormats' types to construct
-- a 'Formats' pattern instead of using this directly.
exactly :: [Format] -> Formats
exactly = Formats . Set.fromList

-- | @'and' x y@ matches a format @f@ when @f@ matches both @x@ and
-- @y@.
and :: (ToFormats a, ToFormats b) => a -> b -> Formats
and x y = Formats $ s `Set.intersection` t
where
Formats s = toFormats x
Formats t = toFormats y

-- | @'or' x y@ matches a format @f@ when @f@ matches either @x@ or
-- @y@.
or :: (ToFormats a, ToFormats b) => a -> b -> Formats
or x y = Formats $ s `Set.union` t
where
Formats s = toFormats x
Formats t = toFormats y

-- | @'not' x@ matches a format @f@ when @f@ does not match @x@.
not :: (ToFormats a) => a -> Formats
not t = anything `except` (toFormats t)

-- | @'except' x y@ matches a format @f@ when @f@ matches @x@ and
-- not @y@.
--
-- > x `except` y = x `and` not y
except :: (ToFormats a, ToFormats b) => a -> b -> Formats
except x y = Formats $ s `Set.difference` t
where
Formats s = toFormats x
Formats t = toFormats y

-- | Test if the given 'Format' matches against a pattern.
--
-- The 'matches' relation is reflexive (@x \`matches\` x@ is always
-- @True@) and transitive (if @x \`matches\` y@ and @y \`matches\` z@
-- are @True@, then @x \`matches\` z@ is @True@).
matches :: ToFormats a => Format -> a -> Bool
matches f p = f `Set.member` s where Formats s = toFormats p

-- | The 'listMatches' function defines how other formats will match
-- against the given 'Format', considered as a 'Formats' pattern. It
-- is intended to capture a sub-format/super-format relation on
-- 'Format'.
--
-- A general guideline is that if @x@ matches @'listMatches' y@, then
-- whenever a raw element of format @y@ can be included in a document,
-- a raw element of format @x@ can be included in the same place and
-- in the same way. It is also generally true that @y@ will be a
-- subset of @x@, or will be based on @x@ in some way. So
-- @'listMatches' y@ lists all the formats not-less-general than @y@
-- in that sense (the super-formats of @y@).
--
-- Some examples:
--
-- @
-- listMatches Beamer = exactly [Beamer, LaTeX, TeX]
-- listMatches Epub3 = exactly [Epub, Epub3, Html, Html5]
-- listMatches Markdown = exactly
-- [Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict]
-- listMatches TeX = exactly [TeX]
-- @
listMatches :: Format -> Formats
listMatches AsciiDoc = exactly [AsciiDoc]
listMatches AsciiDoctor = exactly [AsciiDoctor]
listMatches Beamer = exactly [Beamer, LaTeX, TeX]
listMatches CommonMark = exactly [CommonMark]
listMatches CommonMarkX = exactly [CommonMarkX]
listMatches ConTeXt = exactly [ConTeXt, TeX]
listMatches Creole = exactly [Creole]
listMatches Csv = exactly [Csv]
listMatches DocBook = exactly [DocBook]
listMatches DocBook4 = exactly [DocBook4]
listMatches DocBook5 = exactly [DocBook5]
listMatches Docx = exactly [Docx]
listMatches DokuWiki = exactly [DokuWiki]
listMatches Dzslides = exactly [Dzslides]
listMatches Epub = exactly [Epub, Html]
listMatches Epub2 = exactly [Epub, Epub2, Html, Html4]
listMatches Epub3 = exactly [Epub, Epub3, Html, Html5]
listMatches FictionBook = exactly [FictionBook]
listMatches Gfm = exactly [Gfm]
listMatches Haddock = exactly [Haddock]
listMatches Html = exactly [Html]
listMatches Html4 = exactly [Html, Html4]
listMatches Html5 = exactly [Html, Html5]
listMatches Icml = exactly [Icml]
listMatches Jats = exactly [Jats]
listMatches JatsArchiving = exactly [JatsArchiving]
listMatches JatsArticleAuthoring = exactly [JatsArticleAuthoring]
listMatches JatsPublishing = exactly [JatsPublishing]
listMatches Jira = exactly [Jira]
listMatches Json = exactly [Json]
listMatches Jupyter = exactly [Jupyter]
listMatches LaTeX = exactly [LaTeX, TeX]
listMatches Man = exactly [Man]
listMatches Markdown = exactly
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict]
listMatches MarkdownGithub = exactly
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict]
listMatches MarkdownMmd = exactly
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict]
listMatches MarkdownPhpExtra = exactly
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict]
listMatches MarkdownStrict = exactly
[Markdown, MarkdownGithub, MarkdownMmd, MarkdownPhpExtra, MarkdownStrict]
listMatches MediaWiki = exactly [MediaWiki]
listMatches Ms = exactly [Ms]
listMatches Muse = exactly [Muse]
listMatches Native = exactly [Native]
listMatches Odt = exactly [Odt]
listMatches OpenDocument = exactly [OpenDocument]
listMatches OpenXml = exactly [OpenXml]
listMatches Opml = exactly [Opml]
listMatches Org = exactly [Org]
listMatches Pdf = exactly [Pdf]
listMatches Plain = exactly [Plain]
listMatches Pptx = exactly [Pptx]
listMatches RevealJS = exactly [RevealJS]
listMatches Rst = exactly [Rst]
listMatches Rtf = exactly [Rtf]
listMatches S5 = exactly [S5]
listMatches Slideous = exactly [Slideous]
listMatches Slidy = exactly [Slidy]
listMatches T2T = exactly [T2T]
listMatches TWiki = exactly [TWiki]
listMatches TeX = exactly [TeX]
listMatches Tei = exactly [Tei]
listMatches Texinfo = exactly [Texinfo]
listMatches Textile = exactly [Textile]
listMatches TikiWiki = exactly [TikiWiki]
listMatches VimWiki = exactly [VimWiki]
listMatches XWiki = exactly [XWiki]
listMatches ZimWiki = exactly [ZimWiki]

-- | Convert a 'Formats' into its underlying list of successful
-- matches.
toList :: Formats -> [Format]
toList (Formats s) = Set.toList s

instance NFData Format
instance NFData Formats
Loading

0 comments on commit bfbc5fc

Please sign in to comment.