diff --git a/src/Text/XmlHtml/Common.hs b/src/Text/XmlHtml/Common.hs index 36c9fae..8bfe19e 100644 --- a/src/Text/XmlHtml/Common.hs +++ b/src/Text/XmlHtml/Common.hs @@ -36,13 +36,13 @@ data Document = XmlDocument { -- element. XML processing instructions are intentionally omitted as a -- simplification, and CDATA and plain text are both text nodes, since they -- ought to be semantically interchangeable. -data Node = TextNode !Text - | Comment !Text - | Element { +data Node = Element { elementTag :: !Text, elementAttrs :: ![(Text, Text)], elementChildren :: ![Node] } + | TextNode !Text + | Comment !Text deriving (Eq, Show) diff --git a/src/Text/XmlHtml/HTML/Render.hs b/src/Text/XmlHtml/HTML/Render.hs index 02c90ec..9f9569b 100644 --- a/src/Text/XmlHtml/HTML/Render.hs +++ b/src/Text/XmlHtml/HTML/Render.hs @@ -1,45 +1,248 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} module Text.XmlHtml.HTML.Render where import Blaze.ByteString.Builder -import Control.Applicative +import Blaze.ByteString.Builder.Char8 (fromChar) +import qualified Blaze.ByteString.Builder.Html.Utf8 as Utf +import Blaze.ByteString.Builder.Internal +import Data.Char +import Data.List import Data.Maybe import Data.Monoid -import qualified Text.Parsec as P import Text.XmlHtml.Common -import Text.XmlHtml.TextParser import Text.XmlHtml.HTML.Meta -import qualified Text.XmlHtml.HTML.Parse as P +import qualified Text.XmlHtml.XML.Parse as P import Text.XmlHtml.XML.Render (docTypeDecl, entity) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as LT import qualified Data.HashSet as S ------------------------------------------------------------------------------ --- | And, the rendering code. +-- | Render a node list into a builder using the given encoding. render :: Encoding -> Maybe DocType -> [Node] -> Builder -render e dt ns = byteOrder - `mappend` docTypeDecl e dt - `mappend` nodes - where byteOrder | isUTF16 e = fromText e "\xFEFF" -- byte order mark - | otherwise = mempty - nodes | null ns = mempty - | otherwise = firstNode e (head ns) - `mappend` (mconcat $ map (node e) (tail ns)) +render e dt ns = case e of + UTF8 -> utf8Render dt ns + _ -> utf16Render e dt ns + + +------------------------------------------------------------------------------ +utf8Render :: Maybe DocType -> [Node] -> Builder +utf8Render dt ns = docTypeDecl UTF8 dt `mappend` nodes + where + nodes = case ns of + [] -> mempty + (z:zs) -> utf8FirstNode z `mappend` + foldr (\x b -> utf8Node x `mappend` b) mempty zs +{-# INLINE utf8Render #-} + + +------------------------------------------------------------------------------ +utf16Render :: Encoding -> Maybe DocType -> [Node] -> Builder +utf16Render e dt ns = + fromText e "\xFEFF" `mappend` docTypeDecl e dt `mappend` nodes + where + nodes = foldr (\x b -> utf16Node e x `mappend` b) mempty ns + + +------------------------------------------------------------------------------ +utf8Node :: Node -> Builder +utf8Node (TextNode t) = Utf.fromHtmlEscapedText t + +utf8Node (Comment t) + | commentIsInvalid t = error "invalid comment" + | otherwise = mconcat [ fromByteString "" ] + +utf8Node (Element t a c) = utf8Element t tbase a c + where + tbase = T.toLower $ snd $ T.breakOnEnd ":" t + + +------------------------------------------------------------------------------ +utf8FirstNode :: Node -> Builder +utf8FirstNode (TextNode "") = mempty +utf8FirstNode (TextNode t) = let (c, t') = fromJust $ T.uncons t + in escaped "<>& \t\r\n" UTF8 (T.singleton c) + `mappend` utf8Node (TextNode t') +utf8FirstNode n = utf8Node n + + +------------------------------------------------------------------------------ +commentIsInvalid :: Text -> Bool +commentIsInvalid t + | "--" `T.isInfixOf` t = True + | "-" `T.isSuffixOf` t = True + | otherwise = False + + +------------------------------------------------------------------------------ +utf8Element :: Text -> Text -> [(Text, Text)] -> [Node] -> Builder +utf8Element t tbase a c + | tbase `S.member` voidTags = voidTag + | tbase `S.member` rawTextTags = rawTag + | otherwise = normalTag + + where + -------------------------------------------------------------------------- + tbuild = Utf.fromText t + attributes = foldr (\x b -> utf8Attribute x `mappend` b) mempty a + + -------------------------------------------------------------------------- + voidTag = {-# SCC "utf8Element/voidTag" #-} + if null c + then mconcat [ fromChar '<' + , tbuild + , attributes + , fromByteString " />" ] + + else error $ T.unpack t ++ " must be empty" + + -------------------------------------------------------------------------- + rawTag = {-# SCC "utf8Element/rawTag" #-} + if (all isTextNode c) && ok + then mconcat [ fromChar '<' + , tbuild + , attributes + , fromChar '>' + , Utf.fromLazyText haystack + , fromChar '<' + , fromChar '/' + , tbuild + , fromChar '>' ] + + else error $ concat [ + T.unpack t + , " cannot contain non-text children or text looking " + , "like its end tag." ] + where + ok = not (needle `LT.isInfixOf` haystack) + needle = LT.fromChunks [ "' + , foldr (\x b -> utf8Node x `mappend` b) mempty c + , fromChar '<' + , fromChar '/' + , tbuild + , fromChar '>' ] + + +------------------------------------------------------------------------------ +utf8Attribute :: (Text, Text) -> Builder +utf8Attribute (n, v) | T.null v = fromChar ' ' `mappend` nbuild + | not ("\'" `T.isInfixOf` v) = + mconcat [ fromChar ' ' + , nbuild + , fromChar '=' + , fromChar '\'' + , sqEscape v + , fromChar '\'' + ] + | otherwise = + mconcat [ fromChar ' ' + , nbuild + , fromChar '=' + , fromChar '"' + , dqEscape v + , fromChar '"' + ] + where + nbuild = Utf.fromHtmlEscapedText n + + sqSubst c = Utf.fromChar c + + sqEscape = escape sqPred sqSubst + sqPred c = c == '&' + + dqEscape = escape dqPred dqSubst + + dqSubst '\"' = fromByteString """ + dqSubst c = Utf.fromChar c + + dqPred c = c == '"' || c == '&' + + escape p subst = {-# SCC "utf8Attribute/escape" #-} go mempty + where + go bl t = let (a,b) = T.break p t + bl' = bl `mappend` Utf.fromText a + r = T.uncons b + in case r of + Nothing -> bl' + Just ('&',ss) -> + let str = T.unpack b + in if ambiguousAmpersand str + then go (bl' `mappend` + fromByteString "&") ss + else go (bl' `mappend` + fromWord8 0x26) ss + Just (c, ss) -> go (bl' `mappend` subst c) ss + + +------------------------------------------------------------------------------ +-- UTF-16 render code follows; TODO: optimize + +ambiguousAmpersand :: String -> Bool +ambiguousAmpersand [] = False +ambiguousAmpersand ('&':s) = ambig2 s + where + ambig2 [] = False + ambig2 ('#':xs) = ambigCharRefStart xs + ambig2 (x:xs) + | P.isNameStartChar x = ambigEntity xs + | otherwise = False + + ambigCharRefStart [] = False + ambigCharRefStart (x:xs) + | isDigit x = ambigCharRef xs + | x == 'x' || x == 'X' = ambigHexCharRef xs + | otherwise = False + + ambigCharRef [] = False + ambigCharRef (x:xs) + | x == ';' = True + | isDigit x = ambigCharRef xs + | otherwise = False + + ambigHexCharRef [] = False + ambigHexCharRef (x:xs) + | x == ';' = True + | isHexDigit x = ambigCharRef xs + | otherwise = False + + ambigEntity [] = False + ambigEntity (x:xs) + | x == ';' = True + | P.isNameChar x = ambigEntity xs + | otherwise = False +ambiguousAmpersand _ = False ------------------------------------------------------------------------------ -- | Function for rendering HTML nodes without the overhead of creating a -- Document structure. renderHtmlFragment :: Encoding -> [Node] -> Builder -renderHtmlFragment _ [] = mempty -renderHtmlFragment e (n:ns) = - firstNode e n `mappend` (mconcat $ map (node e) ns) +renderHtmlFragment e = + case e of + UTF8 -> goUtf8 + _ -> goUtf16 + + where + goUtf8 = foldr (mappend . utf8Node) mempty + goUtf16 = foldr (mappend . utf16Node e) mempty +{-# INLINE renderHtmlFragment #-} ------------------------------------------------------------------------------ -- | HTML allows & so long as it is not "ambiguous" (i.e., looks like an @@ -52,49 +255,35 @@ escaped bad e t = in fromText e p `mappend` case r of Nothing -> mempty - Just ('&',ss) | isLeft (parseText ambigAmp "" s) + Just ('&',ss) | ambiguousAmpersand $ T.unpack s -> fromText e "&" `mappend` escaped bad e ss Just (c,ss) -> entity e c `mappend` escaped bad e ss - where isLeft = either (const True) (const False) - ambigAmp = P.char '&' *> - (P.finishCharRef *> return () <|> P.finishEntityRef *> return ()) ------------------------------------------------------------------------------ -node :: Encoding -> Node -> Builder -node e (TextNode t) = escaped "<>&" e t -node e (Comment t) | "--" `T.isInfixOf` t = error "Invalid comment" - | "-" `T.isSuffixOf` t = error "Invalid comment" - | otherwise = fromText e "" -node e (Element t a c) = +utf16Node :: Encoding -> Node -> Builder +utf16Node e (TextNode t) = escaped "<>&" e t +utf16Node e (Comment t) | "--" `T.isInfixOf` t = error "Invalid comment" + | "-" `T.isSuffixOf` t = error "Invalid comment" + | otherwise = fromText e "" +utf16Node e (Element t a c) = let tbase = T.toLower $ snd $ T.breakOnEnd ":" t - in element e t tbase a c - - ------------------------------------------------------------------------------- --- | Process the first node differently to encode leading whitespace. This --- lets us be sure that @parseHTML@ is a left inverse to @render@. -firstNode :: Encoding -> Node -> Builder -firstNode e (Comment t) = node e (Comment t) -firstNode e (Element t a c) = node e (Element t a c) -firstNode _ (TextNode "") = mempty -firstNode e (TextNode t) = let (c,t') = fromJust $ T.uncons t - in escaped "<>& \t\r\n" e (T.singleton c) - `mappend` node e (TextNode t') + in utf16Element e t tbase a c ------------------------------------------------------------------------------ -- XXX: Should do something to avoid concatting large CDATA sections before -- writing them to the output. -element :: Encoding -> Text -> Text -> [(Text, Text)] -> [Node] -> Builder -element e t tb a c +utf16Element :: Encoding -> Text -> Text -> [(Text, Text)] -> [Node] + -> Builder +utf16Element e t tb a c | tb `S.member` voidTags && null c = fromText e "<" `mappend` fromText e t - `mappend` (mconcat $ map (attribute e) a) + `mappend` (mconcat $ map (utf16Attribute e) a) `mappend` fromText e " />" | tb `S.member` voidTags = error $ T.unpack t ++ " must be empty" @@ -104,7 +293,7 @@ element e t tb a c not ("" `mappend` fromText e s `mappend` fromText e "" - `mappend` (mconcat $ map (node e) c) + `mappend` (mconcat $ map (utf16Node e) c) `mappend` fromText e "" ------------------------------------------------------------------------------ -attribute :: Encoding -> (Text, Text) -> Builder -attribute e (n,v) +utf16Attribute :: Encoding -> (Text, Text) -> Builder +utf16Attribute e (n,v) | v == "" = fromText e " " `mappend` fromText e n @@ -144,4 +333,3 @@ attribute e (n,v) `mappend` fromText e "=\"" `mappend` escaped "&\"" e v `mappend` fromText e "\"" - diff --git a/test/benchmark/Benchmark.hs b/test/benchmark/Benchmark.hs new file mode 100644 index 0000000..c480cdf --- /dev/null +++ b/test/benchmark/Benchmark.hs @@ -0,0 +1,50 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +------------------------------------------------------------------------------ +import Criterion +import Criterion.Main +------------------------------------------------------------------------------ +import Blaze.ByteString.Builder +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as L +import Data.List +import Data.Text (Text) +import qualified Data.Text as T +import Text.XmlHtml +import Text.Blaze.Html.Renderer.Utf8 +import Text.Blaze.Html (Html) +------------------------------------------------------------------------------ +import BlazeExample + +------------------------------------------------------------------------------ +main :: IO () +main = do + exampleHTML <- parseExample + !_ <- return $! length $! show exampleHTML + + defaultMain [ + bench "renderHtml" $ renderHtmlBenchmark exampleHTML + , bench "renderBlaze" $ renderBlazeBenchmark blazeHtmlExample + ] + + +------------------------------------------------------------------------------ +parseExample :: IO Document +parseExample = do + bytes <- B.readFile "resources/benchmarks/haddock-example.html" + either error return $ parseHTML "haddock-example.html" bytes + + +------------------------------------------------------------------------------ +renderHtmlBenchmark :: Document -> Pure +renderHtmlBenchmark = whnf (toByteString . render) + + +------------------------------------------------------------------------------ +renderBlazeBenchmark :: Html -> Pure +renderBlazeBenchmark = whnf (touch . renderHtml) + where + touch l = foldl' seq "" (L.toChunks l) `seq` () diff --git a/test/benchmark/BlazeExample.hs b/test/benchmark/BlazeExample.hs new file mode 100644 index 0000000..e5e475d --- /dev/null +++ b/test/benchmark/BlazeExample.hs @@ -0,0 +1,325 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-unused-do-bind #-} + +module BlazeExample (blazeHtmlExample) where + +import Prelude +import qualified Prelude as P +import Data.Monoid (mempty) + +import Text.Blaze.Html5 +import qualified Text.Blaze.Html5 as H +import Text.Blaze.Html5.Attributes +import qualified Text.Blaze.Html5.Attributes as A + +blazeHtmlExample :: Html +blazeHtmlExample = do + docTypeHtml ! xmlns "http://www.w3.org/1999/xhtml" $ do + H.head $ do + meta ! httpEquiv "Content-Type" ! content "text/html; charset=UTF-8" + H.title "Data.Text.Lazy.Builder" + link ! href "ocean.css" ! rel "stylesheet" ! type_ "text/css" ! A.title "Ocean" + script ! src "haddock-util.js" ! type_ "text/javascript" $ mempty + script ! type_ "text/javascript" $ "//\nwindow.onload = function () {pageLoad();setSynopsis(\"mini_Data-Text-Lazy-Builder.html\");};\n//" + body $ do + H.div ! A.id "package-header" $ do + ul ! class_ "links" ! A.id "page-menu" $ do + li $ a ! href "src/Data-Text-Lazy-Builder.html" $ "Source" + li $ a ! href "index.html" $ "Contents" + li $ a ! href "doc-index.html" $ "Index" + p ! class_ "caption" $ "text-0.11.1.5: An efficient packed Unicode text type." + H.div ! A.id "content" $ do + H.div ! A.id "module-header" $ do + table ! class_ "info" $ do + tr $ do + th "Portability" + td "portable to Hugs and GHC" + tr $ do + th "Stability" + td "experimental" + tr $ do + th "Maintainer" + td "Johan Tibell " + p ! class_ "caption" $ "Data.Text.Lazy.Builder" + H.div ! A.id "table-of-contents" $ do + p ! class_ "caption" $ "Contents" + ul $ do + li $ a ! href "#g:1" $ "The Builder type" + li $ a ! href "#g:2" $ "Constructing Builders" + li $ a ! href "#g:3" $ "Flushing the buffer state" + H.div ! A.id "description" $ do + p ! class_ "caption" $ "Description" + H.div ! class_ "doc" $ do + p $ do + "Efficient construction of lazy" + code "Text" + "values. The principal\n operations on a" + code "Builder" + "are" + code "singleton" + "," + code "fromText" + ", and" + code "fromLazyText" + ", which construct new builders, and" + code $ a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Monoid.html#v:mappend" $ "mappend" + ", which\n concatenates two builders." + p $ do + "To get maximum performance when building lazy" + code "Text" + "values using a builder, associate" + code "mappend" + "calls to the right. For example, prefer" + pre "singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')" + p "to" + pre "singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'" + p $ do + "as the latter associates" + code "mappend" + "to the left." + H.div ! A.id "synopsis" $ do + p ! A.id "control.syn" ! class_ "caption expander" ! onclick "toggleSection('syn')" $ "Synopsis" + ul ! A.id "section.syn" ! class_ "hide" ! onclick "toggleSection('syn')" $ do + li ! class_ "src short" $ do + H.span ! class_ "keyword" $ "data" + a ! href "#t:Builder" $ "Builder" + li ! class_ "src short" $ do + a ! href "#v:toLazyText" $ "toLazyText" + "::" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + "->" + a ! href "Data-Text-Lazy-Internal.html#t:Text" $ "Text" + li ! class_ "src short" $ do + a ! href "#v:toLazyTextWith" $ "toLazyTextWith" + "::" + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Int.html#t:Int" $ "Int" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + "->" + a ! href "Data-Text-Lazy-Internal.html#t:Text" $ "Text" + li ! class_ "src short" $ do + a ! href "#v:singleton" $ "singleton" + "::" + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Char.html#t:Char" $ "Char" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + li ! class_ "src short" $ do + a ! href "#v:fromText" $ "fromText" + "::" + a ! href "Data-Text-Internal.html#t:Text" $ "Text" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + li ! class_ "src short" $ do + a ! href "#v:fromLazyText" $ "fromLazyText" + "::" + a ! href "Data-Text-Lazy-Internal.html#t:Text" $ "Text" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + li ! class_ "src short" $ do + a ! href "#v:fromString" $ "fromString" + "::" + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Char.html#t:String" $ "String" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + li ! class_ "src short" $ do + a ! href "#v:flush" $ "flush" + "::" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + H.div ! A.id "interface" $ do + h1 ! A.id "g:1" $ "The Builder type" + H.div ! class_ "top" $ do + p ! class_ "src" $ do + H.span ! class_ "keyword" $ "data" + a ! name "t:Builder" ! class_ "def" $ "Builder" + a ! href "src/Data-Text-Lazy-Builder.html#Builder" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ do + p $ do + "A" + code "Builder" + "is an efficient way to build lazy" + code "Text" + "values.\n There are several functions for constructing builders, but only one\n to inspect them: to extract any data, you have to turn them into\n lazy" + code "Text" + "values using" + code "toLazyText" + "." + p $ do + "Internally, a builder constructs a lazy" + code "Text" + "by filling arrays\n piece by piece. As each buffer is filled, it is 'popped' off, to\n become a new chunk of the resulting lazy" + code "Text" + ". All this is\n hidden from the user of the" + code "Builder" + "." + H.div ! class_ "subs instances" $ do + p ! A.id "control.i:Builder" ! class_ "caption collapser" ! onclick "toggleSection('i:Builder')" $ "Instances" + H.div ! A.id "section.i:Builder" ! class_ "show" $ table $ do + tr $ do + td ! class_ "src" $ do + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Eq.html#t:Eq" $ "Eq" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + td ! class_ "doc empty" $ mempty + tr $ do + td ! class_ "src" $ do + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Ord.html#t:Ord" $ "Ord" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + td ! class_ "doc empty" $ mempty + tr $ do + td ! class_ "src" $ do + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Text-Show.html#t:Show" $ "Show" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + td ! class_ "doc empty" $ mempty + tr $ do + td ! class_ "src" $ do + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-String.html#t:IsString" $ "IsString" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + td ! class_ "doc empty" $ mempty + tr $ do + td ! class_ "src" $ do + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Monoid.html#t:Monoid" $ "Monoid" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + td ! class_ "doc empty" $ mempty + H.div ! class_ "top" $ do + p ! class_ "src" $ do + a ! name "v:toLazyText" ! class_ "def" $ "toLazyText" + "::" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + "->" + a ! href "Data-Text-Lazy-Internal.html#t:Text" $ "Text" + a ! href "src/Data-Text-Lazy-Builder.html#toLazyText" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ p $ do + em "O(n)." + "Extract a lazy" + code "Text" + "from a" + code "Builder" + "with a default\n buffer size. The construction work takes place if and when the\n relevant part of the lazy" + code "Text" + "is demanded." + H.div ! class_ "top" $ do + p ! class_ "src" $ do + a ! name "v:toLazyTextWith" ! class_ "def" $ "toLazyTextWith" + "::" + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Int.html#t:Int" $ "Int" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + "->" + a ! href "Data-Text-Lazy-Internal.html#t:Text" $ "Text" + a ! href "src/Data-Text-Lazy-Builder.html#toLazyTextWith" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ do + p $ do + em "O(n)." + "Extract a lazy" + code "Text" + "from a" + code "Builder" + ", using the given\n size for the initial buffer. The construction work takes place if\n and when the relevant part of the lazy" + code "Text" + "is demanded." + p "If the initial buffer is too small to hold all data, subsequent\n buffers will be the default buffer size." + h1 ! A.id "g:2" $ "Constructing Builders" + H.div ! class_ "top" $ do + p ! class_ "src" $ do + a ! name "v:singleton" ! class_ "def" $ "singleton" + "::" + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Char.html#t:Char" $ "Char" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + a ! href "src/Data-Text-Lazy-Builder.html#singleton" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ do + p $ do + em "O(1)." + "A" + code "Builder" + "taking a single character, satisfying" + ul $ li $ pre $ do + code $ a ! href "Data-Text-Lazy-Builder.html#v:toLazyText" $ "toLazyText" + "(" + code $ a ! href "Data-Text-Lazy-Builder.html#v:singleton" $ "singleton" + "c) =" + code $ a ! href "Data-Text-Lazy.html#v:singleton" $ "singleton" + "c" + H.div ! class_ "top" $ do + p ! class_ "src" $ do + a ! name "v:fromText" ! class_ "def" $ "fromText" + "::" + a ! href "Data-Text-Internal.html#t:Text" $ "Text" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + a ! href "src/Data-Text-Lazy-Builder.html#fromText" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ do + p $ do + em "O(1)." + "A" + code "Builder" + "taking a" + code $ a ! href "Data-Text-Internal.html#t:Text" $ "Text" + ", satisfying" + ul $ li $ pre $ do + code $ a ! href "Data-Text-Lazy-Builder.html#v:toLazyText" $ "toLazyText" + "(" + code $ a ! href "Data-Text-Lazy-Builder.html#v:fromText" $ "fromText" + "t) =" + code $ a ! href "Data-Text-Lazy.html#v:fromChunks" $ "fromChunks" + "[t]" + H.div ! class_ "top" $ do + p ! class_ "src" $ do + a ! name "v:fromLazyText" ! class_ "def" $ "fromLazyText" + "::" + a ! href "Data-Text-Lazy-Internal.html#t:Text" $ "Text" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + a ! href "src/Data-Text-Lazy-Builder.html#fromLazyText" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ do + p $ do + em "O(1)." + "A" + code "Builder" + "taking a lazy" + code "Text" + ", satisfying" + ul $ li $ pre $ do + code $ a ! href "Data-Text-Lazy-Builder.html#v:toLazyText" $ "toLazyText" + "(" + code $ a ! href "Data-Text-Lazy-Builder.html#v:fromLazyText" $ "fromLazyText" + "t) = t" + H.div ! class_ "top" $ do + p ! class_ "src" $ do + a ! name "v:fromString" ! class_ "def" $ "fromString" + "::" + a ! href "/Users/markl/Projects/A/platform/hp-mac/src/macos/dist-x86_64/root/Library/Frameworks/GHC.framework/Versions/7.0.4-x86_64/usr/share/doc/ghc/html/libraries/base-4.3.1.0/Data-Char.html#t:String" $ "String" + "->" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + a ! href "src/Data-Text-Lazy-Builder.html#fromString" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ do + p $ do + em "O(1)." + "A Builder taking a" + code "String" + ", satisfying" + ul $ li $ pre $ do + code $ a ! href "Data-Text-Lazy-Builder.html#v:toLazyText" $ "toLazyText" + "(" + code $ a ! href "Data-Text-Lazy-Builder.html#v:fromString" $ "fromString" + "s) =" + code $ a ! href "Data-Text-Lazy.html#v:fromChunks" $ "fromChunks" + "[S.pack s]" + h1 ! A.id "g:3" $ "Flushing the buffer state" + H.div ! class_ "top" $ do + p ! class_ "src" $ do + a ! name "v:flush" ! class_ "def" $ "flush" + "::" + a ! href "Data-Text-Lazy-Builder.html#t:Builder" $ "Builder" + a ! href "src/Data-Text-Lazy-Builder.html#flush" ! class_ "link" $ "Source" + H.div ! class_ "doc" $ p $ do + em "O(1)." + "Pop the strict" + code "Text" + "we have constructed so far, if any,\n yielding a new chunk in the result lazy" + code "Text" + "." + H.div ! A.id "footer" $ p $ do + "Produced by" + a ! href "http://www.haskell.org/haddock/" $ "Haddock" + "version 2.9.2" + diff --git a/test/resources/benchmarks/haddock-example.html b/test/resources/benchmarks/haddock-example.html new file mode 100644 index 0000000..a109466 --- /dev/null +++ b/test/resources/benchmarks/haddock-example.html @@ -0,0 +1,41 @@ +Codestin Search App

text-0.11.1.5: An efficient packed Unicode text type.

Portabilityportable to Hugs and GHC
Stabilityexperimental
MaintainerJohan Tibell <johan.tibell@gmail.com>

Data.Text.Lazy.Builder

Description

Efficient construction of lazy Text values. The principal + operations on a Builder are singleton, fromText, and + fromLazyText, which construct new builders, and mappend, which + concatenates two builders. +

To get maximum performance when building lazy Text values using a builder, associate mappend calls to the right. For example, prefer +

 singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
+

to +

 singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
+

as the latter associates mappend to the left. +

The Builder type +

data Builder Source

A Builder is an efficient way to build lazy Text values. + There are several functions for constructing builders, but only one + to inspect them: to extract any data, you have to turn them into + lazy Text values using toLazyText. +

Internally, a builder constructs a lazy Text by filling arrays + piece by piece. As each buffer is filled, it is 'popped' off, to + become a new chunk of the resulting lazy Text. All this is + hidden from the user of the Builder. +

toLazyText :: Builder -> TextSource

O(n). Extract a lazy Text from a Builder with a default + buffer size. The construction work takes place if and when the + relevant part of the lazy Text is demanded. +

toLazyTextWith :: Int -> Builder -> TextSource

O(n). Extract a lazy Text from a Builder, using the given + size for the initial buffer. The construction work takes place if + and when the relevant part of the lazy Text is demanded. +

If the initial buffer is too small to hold all data, subsequent + buffers will be the default buffer size. +

Constructing Builders +

singleton :: Char -> BuilderSource

O(1). A Builder taking a single character, satisfying +

fromText :: Text -> BuilderSource

O(1). A Builder taking a Text, satisfying +

fromLazyText :: Text -> BuilderSource

O(1). A Builder taking a lazy Text, satisfying +

fromString :: String -> BuilderSource

O(1). A Builder taking a String, satisfying +

Flushing the buffer state +

flush :: BuilderSource

O(1). Pop the strict Text we have constructed so far, if any, + yielding a new chunk in the result lazy Text. +

\ No newline at end of file diff --git a/test/xmlhtml-testsuite.cabal b/test/xmlhtml-testsuite.cabal index fc74494..eb48f20 100644 --- a/test/xmlhtml-testsuite.cabal +++ b/test/xmlhtml-testsuite.cabal @@ -9,13 +9,14 @@ Executable testsuite build-depends: HUnit == 1.2.*, - directory >= 1.0 && <1.2, QuickCheck >= 2.3.0.2, + attoparsec >= 0.10 && < 0.11, base == 4.*, blaze-builder >= 0.2 && <0.4, blaze-html >= 0.5 && <0.6, blaze-markup >= 0.5 && <0.6, bytestring == 0.9.*, + directory >= 1.0 && < 1.3, containers >= 0.3 && <0.5, parsec >= 3.1.2 && <3.2, test-framework >= 0.6 && <0.7, @@ -26,3 +27,29 @@ Executable testsuite ghc-options: -O2 -Wall -fhpc -fwarn-tabs -funbox-strict-fields -threaded -fno-warn-unused-do-bind + + ghc-prof-options: -prof -auto-all + + +Executable benchmark + hs-source-dirs: benchmark ../src + main-is: Benchmark.hs + + ghc-options: -O2 -Wall -fwarn-tabs -funbox-strict-fields -threaded + -fno-warn-unused-do-bind -rtsopts + + ghc-prof-options: -prof -auto-all + + build-depends: + attoparsec >= 0.10 && < 0.11, + base == 4.*, + blaze-builder >= 0.2 && < 0.4, + blaze-html >= 0.5 && <0.6, + blaze-markup >= 0.5 && <0.6, + bytestring == 0.9.*, + containers >= 0.3 && < 0.5, + criterion >= 0.6 && < 0.7, + directory >= 1.0 && < 1.3, + parsec >= 3.0 && < 3.2, + text >= 0.11 && < 0.12, + unordered-containers >= 0.1.4 && < 0.3 diff --git a/xmlhtml.cabal b/xmlhtml.cabal index 28ae8a8..c2afec1 100644 --- a/xmlhtml.cabal +++ b/xmlhtml.cabal @@ -820,6 +820,7 @@ Library Text.XmlHtml.HTML.Render Build-depends: base >= 4 && < 5, + attoparsec >= 0.10 && < 0.11, blaze-builder >= 0.2 && < 0.4, blaze-html >= 0.5 && < 0.6, blaze-markup >= 0.5 && < 0.6,