{-# LANGUAGE CPP #-}
module Text.XML.HXT.Parser.TagSoup
( parseHtmlTagSoup
)
where
#if MIN_VERSION_base(4,8,0)
#else
import Control.Applicative (Applicative (..))
#endif
import Control.Monad (ap, liftM)
import Data.Char (toLower)
import Data.Char.Properties.XMLCharProps (isXmlSpaceChar)
import Data.Maybe
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Entity (lookupNumericEntity)
import Text.XML.HXT.DOM.Interface (NsEnv, QName, XmlTrees,
a_xml, a_xmlns, c_warn,
isWellformedQualifiedName,
mkName, newQName, newXName,
nullXName, toNsEnv,
xmlNamespace,
xmlnsNamespace)
import Text.XML.HXT.DOM.XmlNode (isElem, mkAttr', mkCmt',
mkElement, mkError',
mkText')
import Text.XML.HXT.Parser.HtmlParsec (closesHtmlTag,
isEmptyHtmlTag,
isInnerHtmlTagOf)
import Text.XML.HXT.Parser.XhtmlEntities
type STag = Tag String
type Tags = [STag]
type Context = ([String], NsEnv)
type State = Tags
newtype Parser a = P { forall a. Parser a -> State -> (a, State)
parse :: State -> (a, State)}
instance Functor Parser where
fmap :: forall a b. (a -> b) -> Parser a -> Parser b
fmap = (a -> b) -> Parser a -> Parser b
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM
instance Applicative Parser where
pure :: forall a. a -> Parser a
pure = a -> Parser a
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return
<*> :: forall a b. Parser (a -> b) -> Parser a -> Parser b
(<*>) = Parser (a -> b) -> Parser a -> Parser b
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap
instance Monad Parser where
return :: forall a. a -> Parser a
return a
x = (State -> (a, State)) -> Parser a
forall a. (State -> (a, State)) -> Parser a
P ((State -> (a, State)) -> Parser a)
-> (State -> (a, State)) -> Parser a
forall a b. (a -> b) -> a -> b
$ \ State
ts -> (a
x, State
ts)
Parser a
p >>= :: forall a b. Parser a -> (a -> Parser b) -> Parser b
>>= a -> Parser b
f = (State -> (b, State)) -> Parser b
forall a. (State -> (a, State)) -> Parser a
P ((State -> (b, State)) -> Parser b)
-> (State -> (b, State)) -> Parser b
forall a b. (a -> b) -> a -> b
$ \ State
ts -> let
(a
res, State
ts') = Parser a -> State -> (a, State)
forall a. Parser a -> State -> (a, State)
parse Parser a
p State
ts
in
Parser b -> State -> (b, State)
forall a. Parser a -> State -> (a, State)
parse (a -> Parser b
f a
res) State
ts'
runParser :: Parser a -> Tags -> a
runParser :: forall a. Parser a -> State -> a
runParser Parser a
p State
ts = (a, State) -> a
forall a b. (a, b) -> a
fst ((a, State) -> a) -> (State -> (a, State)) -> State -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser a -> State -> (a, State)
forall a. Parser a -> State -> (a, State)
parse Parser a
p (State -> a) -> State -> a
forall a b. (a -> b) -> a -> b
$ State
ts
cond :: Parser Bool -> Parser a -> Parser a -> Parser a
cond :: forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
c Parser a
t Parser a
e = do
Bool
p <- Parser Bool
c
if Bool
p then Parser a
t else Parser a
e
lookAhead :: (STag -> Bool) -> Parser Bool
lookAhead :: (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
p = (State -> (Bool, State)) -> Parser Bool
forall a. (State -> (a, State)) -> Parser a
P ((State -> (Bool, State)) -> Parser Bool)
-> (State -> (Bool, State)) -> Parser Bool
forall a b. (a -> b) -> a -> b
$ \ State
s -> (Bool -> Bool
not (State -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null State
s) Bool -> Bool -> Bool
&& STag -> Bool
p (State -> STag
forall a. HasCallStack => [a] -> a
head State
s), State
s)
isEof :: Parser Bool
isEof :: Parser Bool
isEof = (State -> (Bool, State)) -> Parser Bool
forall a. (State -> (a, State)) -> Parser a
P ((State -> (Bool, State)) -> Parser Bool)
-> (State -> (Bool, State)) -> Parser Bool
forall a b. (a -> b) -> a -> b
$ \ State
s -> (State -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null State
s, State
s)
isText :: Parser Bool
isText :: Parser Bool
isText = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
where
is :: Tag str -> Bool
is (TagText str
_) = Bool
True
is Tag str
_ = Bool
False
isCmt :: Parser Bool
isCmt :: Parser Bool
isCmt = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
where
is :: Tag str -> Bool
is (TagComment str
_) = Bool
True
is Tag str
_ = Bool
False
isWarn :: Parser Bool
isWarn :: Parser Bool
isWarn = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
where
is :: Tag str -> Bool
is (TagWarning str
_) = Bool
True
is Tag str
_ = Bool
False
isPos :: Parser Bool
isPos :: Parser Bool
isPos = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
where
is :: Tag str -> Bool
is (TagPosition Row
_ Row
_) = Bool
True
is Tag str
_ = Bool
False
isCls :: Parser Bool
isCls :: Parser Bool
isCls = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
where
is :: Tag str -> Bool
is (TagClose str
_) = Bool
True
is Tag str
_ = Bool
False
isOpn :: Parser Bool
isOpn :: Parser Bool
isOpn = (STag -> Bool) -> Parser Bool
lookAhead STag -> Bool
forall {str}. Tag str -> Bool
is
where
is :: Tag str -> Bool
is (TagOpen str
_ [Attribute str]
_) = Bool
True
is Tag str
_ = Bool
False
getTag :: Parser STag
getTag :: Parser STag
getTag = (State -> (STag, State)) -> Parser STag
forall a. (State -> (a, State)) -> Parser a
P ((State -> (STag, State)) -> Parser STag)
-> (State -> (STag, State)) -> Parser STag
forall a b. (a -> b) -> a -> b
$ \ State
t -> (State -> STag
forall a. HasCallStack => [a] -> a
head State
t, State -> State
forall a. HasCallStack => [a] -> [a]
tail State
t)
getSym :: (STag -> a) -> Parser a
getSym :: forall a. (STag -> a) -> Parser a
getSym STag -> a
f = do
STag
t <- Parser STag
getTag
a -> Parser a
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (STag -> a
f STag
t)
getText :: Parser String
getText :: Parser String
getText = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
where
sym :: Tag str -> str
sym (TagText str
t) = str
t
sym Tag str
_ = str
forall a. HasCallStack => a
undefined
getCmt :: Parser String
getCmt :: Parser String
getCmt = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
where
sym :: Tag str -> str
sym (TagComment str
c) = str
c
sym Tag str
_ = str
forall a. HasCallStack => a
undefined
getWarn :: Parser String
getWarn :: Parser String
getWarn = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
where
sym :: Tag str -> str
sym (TagWarning str
w) = str
w
sym Tag str
_ = str
forall a. HasCallStack => a
undefined
getPos :: Parser (Int, Int)
getPos :: Parser (Row, Row)
getPos = (STag -> (Row, Row)) -> Parser (Row, Row)
forall a. (STag -> a) -> Parser a
getSym STag -> (Row, Row)
forall {str}. Tag str -> (Row, Row)
sym
where
sym :: Tag str -> (Row, Row)
sym (TagPosition Row
l Row
c) = (Row
l, Row
c)
sym Tag str
_ = (Row, Row)
forall a. HasCallStack => a
undefined
getCls :: Parser String
getCls :: Parser String
getCls = (STag -> String) -> Parser String
forall a. (STag -> a) -> Parser a
getSym STag -> String
forall {str}. Tag str -> str
sym
where
sym :: Tag str -> str
sym (TagClose str
n) = str
n
sym Tag str
_ = str
forall a. HasCallStack => a
undefined
getOpn :: Parser (String, [(String,String)])
getOpn :: Parser (String, [(String, String)])
getOpn = (STag -> (String, [(String, String)]))
-> Parser (String, [(String, String)])
forall a. (STag -> a) -> Parser a
getSym STag -> (String, [(String, String)])
forall {a}. Tag a -> (a, [Attribute a])
sym
where
sym :: Tag a -> (a, [Attribute a])
sym (TagOpen a
n [Attribute a]
al) = (a
n, [Attribute a]
al)
sym Tag a
_ = (a, [Attribute a])
forall a. HasCallStack => a
undefined
pushBack :: STag -> Parser ()
pushBack :: STag -> Parser ()
pushBack STag
t = (State -> ((), State)) -> Parser ()
forall a. (State -> (a, State)) -> Parser a
P ((State -> ((), State)) -> Parser ())
-> (State -> ((), State)) -> Parser ()
forall a b. (a -> b) -> a -> b
$ \ State
ts -> ((), STag
tSTag -> State -> State
forall a. a -> [a] -> [a]
:State
ts)
insCls :: String -> Parser ()
insCls :: String -> Parser ()
insCls String
n = STag -> Parser ()
pushBack (String -> STag
forall str. str -> Tag str
TagClose String
n)
insOpn :: String -> [(String, String)] -> Parser ()
insOpn :: String -> [(String, String)] -> Parser ()
insOpn String
n [(String, String)]
al = STag -> Parser ()
pushBack (String -> [(String, String)] -> STag
forall str. str -> [Attribute str] -> Tag str
TagOpen String
n [(String, String)]
al)
mkQN :: Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN :: Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN Bool
withNamespaces Bool
isAttr NsEnv
env String
s
| Bool
withNamespaces
= QName -> Parser QName
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return QName
qn1
| Bool
otherwise
= QName -> Parser QName
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return QName
qn0
where
qn1 :: QName
qn1
| Bool
isAttr Bool -> Bool -> Bool
&& Bool
isSimpleName = QName
s'
| Bool
isSimpleName = XName -> XName -> XName -> QName
newQName (String -> XName
newXName String
s) XName
nullXName (XName -> XName
nsUri XName
nullXName)
| String -> Bool
isWellformedQualifiedName String
s = XName -> XName -> XName -> QName
newQName XName
lp' XName
px' (XName -> XName
nsUri XName
px')
| Bool
otherwise = QName
s'
qn0 :: QName
qn0 = QName
s'
nsUri :: XName -> XName
nsUri XName
x = XName -> Maybe XName -> XName
forall a. a -> Maybe a -> a
fromMaybe XName
nullXName (Maybe XName -> XName) -> (NsEnv -> Maybe XName) -> NsEnv -> XName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XName -> NsEnv -> Maybe XName
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup XName
x (NsEnv -> XName) -> NsEnv -> XName
forall a b. (a -> b) -> a -> b
$ NsEnv
env
isSimpleName :: Bool
isSimpleName = (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':') String
s
(String
px, (Char
_ : String
lp)) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span(Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':') String
s
px' :: XName
px' = String -> XName
newXName String
px
lp' :: XName
lp' = String -> XName
newXName String
lp
s' :: QName
s' = String -> QName
mkName String
s
extendNsEnv :: Bool -> [(String, String)] -> NsEnv -> NsEnv
extendNsEnv :: Bool -> [(String, String)] -> NsEnv -> NsEnv
extendNsEnv Bool
withNamespaces [(String, String)]
al1 NsEnv
env
| Bool
withNamespaces
= [(String, String)] -> NsEnv
toNsEnv (((String, String) -> [(String, String)])
-> [(String, String)] -> [(String, String)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((String -> String -> [(String, String)])
-> (String, String) -> [(String, String)]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry String -> String -> [(String, String)]
forall {b}. String -> b -> [(String, b)]
addNs) [(String, String)]
al1) NsEnv -> NsEnv -> NsEnv
forall a. [a] -> [a] -> [a]
++ NsEnv
env
| Bool
otherwise
= NsEnv
env
where
addNs :: String -> b -> [(String, b)]
addNs String
n b
v
| String
px String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
a_xmlns
Bool -> Bool -> Bool
&&
(String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
lp Bool -> Bool -> Bool
|| (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (String -> Bool) -> (String -> String) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. HasCallStack => [a] -> [a]
tail (String -> Bool) -> String -> Bool
forall a b. (a -> b) -> a -> b
$ String
lp))
= [(Row -> String -> String
forall a. Row -> [a] -> [a]
drop Row
1 String
lp, b
v)]
| Bool
otherwise
= []
where
(String
px, String
lp) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':') String
n
lookupEntity :: Bool -> Bool -> (String, Bool) -> Tags
lookupEntity :: Bool -> Bool -> (String, Bool) -> State
lookupEntity Bool
withWarnings Bool
_asHtml (e0 :: String
e0@(Char
'#':String
e), Bool
withSemicolon)
= case String -> Maybe String
lookupNumericEntity String
e of
Just String
c -> (String -> STag
forall str. str -> Tag str
TagText String
c)
STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
Maybe String
Nothing -> ( String -> STag
forall str. str -> Tag str
TagText (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e0 String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
withSemicolon])
STag -> State -> State
forall a. a -> [a] -> [a]
: if Bool
withWarnings
then (String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"illegal char reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";")
STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
else []
where
missingSemi :: State
missingSemi
| Bool
withWarnings
Bool -> Bool -> Bool
&&
Bool -> Bool
not Bool
withSemicolon = [String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"missing \";\" at end of char reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e]
| Bool
otherwise = []
lookupEntity Bool
withWarnings Bool
asHtml (String
e, Bool
withSemicolon)
= case (String -> [(String, Row)] -> Maybe Row
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
e [(String, Row)]
entities) of
Just Row
x -> (String -> STag
forall str. str -> Tag str
TagText [Row -> Char
forall a. Enum a => Row -> a
toEnum Row
x])
STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
Maybe Row
Nothing -> (String -> STag
forall str. str -> Tag str
TagText (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
withSemicolon])
STag -> State -> State
forall a. a -> [a] -> [a]
: if Bool
withWarnings
then (String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"Unknown entity reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";")
STag -> State -> State
forall a. a -> [a] -> [a]
: State
missingSemi
else []
where
entities :: [(String, Row)]
entities
| Bool
asHtml = [(String, Row)]
xhtmlEntities
| Bool
otherwise = [(String, Row)]
xhtmlEntities
missingSemi :: State
missingSemi
| Bool
withWarnings
Bool -> Bool -> Bool
&&
Bool -> Bool
not Bool
withSemicolon = [String -> STag
forall str. str -> Tag str
TagWarning (String -> STag) -> String -> STag
forall a b. (a -> b) -> a -> b
$ String
"missing \";\" at end of entity reference: &" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e]
| Bool
otherwise = []
lookupEntityAttr :: Bool -> Bool -> (String, Bool) -> (String, Tags)
lookupEntityAttr :: Bool -> Bool -> (String, Bool) -> (String, State)
lookupEntityAttr Bool
withWarnings Bool
asHtml (String
e, Bool
b)
| State -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null State
r = (String
s, State
r)
| Bool
otherwise = (String
"&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
';' | Bool
b], State
r)
where
(TagText String
s) : State
r = Bool -> Bool -> (String, Bool) -> State
lookupEntity Bool
withWarnings Bool
asHtml (String
e, Bool
b)
lowerCaseNames :: Tags -> Tags
lowerCaseNames :: State -> State
lowerCaseNames
= (STag -> STag) -> State -> State
forall a b. (a -> b) -> [a] -> [b]
map STag -> STag
f
where
f :: STag -> STag
f (TagOpen String
name [(String, String)]
attrs)
= String -> [(String, String)] -> STag
forall str. str -> [Attribute str] -> Tag str
TagOpen (String -> String
nameToLower String
name) (((String, String) -> (String, String))
-> [(String, String)] -> [(String, String)]
forall a b. (a -> b) -> [a] -> [b]
map (String, String) -> (String, String)
forall {b}. (String, b) -> (String, b)
attrToLower [(String, String)]
attrs)
f (TagClose String
name)
= String -> STag
forall str. str -> Tag str
TagClose (String -> String
nameToLower String
name)
f STag
a = STag
a
nameToLower :: String -> String
nameToLower = (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower
attrToLower :: (String, b) -> (String, b)
attrToLower (String
an, b
av) = (String -> String
nameToLower String
an, b
av)
parseHtmlTagSoup :: Bool -> Bool -> Bool -> Bool -> Bool -> String -> String -> XmlTrees
parseHtmlTagSoup :: Bool
-> Bool -> Bool -> Bool -> Bool -> String -> String -> XmlTrees
parseHtmlTagSoup Bool
withNamespaces Bool
withWarnings Bool
withComment Bool
removeWhiteSpace Bool
asHtml String
doc
= ( XmlTrees -> XmlTrees
docRootElem
(XmlTrees -> XmlTrees)
-> (String -> XmlTrees) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser XmlTrees -> State -> XmlTrees
forall a. Parser a -> State -> a
runParser (Context -> Parser XmlTrees
buildCont Context
forall {a}. ([a], NsEnv)
initContext)
(State -> XmlTrees) -> (String -> State) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ( if Bool
asHtml
then State -> State
lowerCaseNames
else State -> State
forall a. a -> a
id
)
(State -> State) -> (String -> State) -> String -> State
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> State
tagsoupParse
)
where
tagsoupParse :: String -> Tags
tagsoupParse :: String -> State
tagsoupParse = ParseOptions String -> String -> State
forall str. StringLike str => ParseOptions str -> str -> [Tag str]
parseTagsOptions ParseOptions String
tagsoupOptions
tagsoupOptions :: ParseOptions String
tagsoupOptions :: ParseOptions String
tagsoupOptions = ParseOptions String
parseOptions' { optTagWarning :: Bool
optTagWarning = Bool
withWarnings
, optEntityData :: (String, Bool) -> State
optEntityData = Bool -> Bool -> (String, Bool) -> State
lookupEntity Bool
withWarnings Bool
asHtml
, optEntityAttrib :: (String, Bool) -> (String, State)
optEntityAttrib = Bool -> Bool -> (String, Bool) -> (String, State)
lookupEntityAttr Bool
withWarnings Bool
asHtml
}
where
parseOptions' :: ParseOptions String
parseOptions' :: ParseOptions String
parseOptions' = ParseOptions String
forall str. StringLike str => ParseOptions str
parseOptions
docRootElem :: XmlTrees -> XmlTrees
docRootElem
= Row -> XmlTrees -> XmlTrees
forall a. Row -> [a] -> [a]
take Row
1 (XmlTrees -> XmlTrees)
-> (XmlTrees -> XmlTrees) -> XmlTrees -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NTree XNode -> Bool) -> XmlTrees -> XmlTrees
forall a. (a -> Bool) -> [a] -> [a]
filter NTree XNode -> Bool
forall a. XmlNode a => a -> Bool
isElem
initContext :: ([a], NsEnv)
initContext = ( []
, [(String, String)] -> NsEnv
toNsEnv ([(String, String)] -> NsEnv) -> [(String, String)] -> NsEnv
forall a b. (a -> b) -> a -> b
$
[ (String
a_xml, String
xmlNamespace)
, (String
a_xmlns, String
xmlnsNamespace)
]
)
wrap :: a -> [a]
wrap = (a -> [a] -> [a]
forall a. a -> [a] -> [a]
:[])
warn :: String -> XmlTrees
warn
| Bool
withWarnings = NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Row -> String -> NTree XNode
mkError' Row
c_warn (String -> NTree XNode)
-> (String -> String) -> String -> NTree XNode
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. Show a => a -> String
show (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
doc String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++)
| Bool
otherwise = XmlTrees -> String -> XmlTrees
forall a b. a -> b -> a
const []
cmt :: String -> XmlTrees
cmt
| Bool
withComment = NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkCmt'
| Bool
otherwise = XmlTrees -> String -> XmlTrees
forall a b. a -> b -> a
const []
txt :: String -> XmlTrees
txt
| Bool
removeWhiteSpace
= \ String
t ->
if (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isXmlSpaceChar String
t
then []
else NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkText' (String -> XmlTrees) -> String -> XmlTrees
forall a b. (a -> b) -> a -> b
$ String
t
| Bool
otherwise = NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkText'
isEmptyElem :: String -> Bool
isEmptyElem
| Bool
asHtml = String -> Bool
isEmptyHtmlTag
| Bool
otherwise = Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
False
isInnerElem :: String -> String -> Bool
isInnerElem
| Bool
asHtml = String -> String -> Bool
isInnerHtmlTagOf
| Bool
otherwise = (String -> Bool) -> String -> String -> Bool
forall a b. a -> b -> a
const (Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
False)
closesElem :: [String] -> String -> Bool
closesElem
| Bool
asHtml = \ [String]
ns String
n1 ->
Bool -> Bool
not ([String] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
ns)
Bool -> Bool -> Bool
&&
String
n1 String -> String -> Bool
`closesHtmlTag` ([String] -> String
forall a. HasCallStack => [a] -> a
head [String]
ns)
| Bool
otherwise = (String -> Bool) -> [String] -> String -> Bool
forall a b. a -> b -> a
const (Bool -> String -> Bool
forall a b. a -> b -> a
const Bool
False)
buildCont :: Context -> Parser XmlTrees
buildCont :: Context -> Parser XmlTrees
buildCont Context
ns
= Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isText ( do
String
t <- Parser String
getText
XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns
XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
txt String
t XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl)
)
( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isOpn ( do
(String
n,[(String, String)]
al) <- Parser (String, [(String, String)])
getOpn
Context -> String -> [(String, String)] -> Parser XmlTrees
openTag Context
ns String
n [(String, String)]
al
)
( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isCls ( do
String
n <- Parser String
getCls
Context -> String -> Parser XmlTrees
closeTag Context
ns String
n
)
( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isCmt ( do
String
c <- Parser String
getCmt
XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns
XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
cmt String
c XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl)
)
( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isWarn ( do
String
w <- Parser String
getWarn
XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns
XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
warn String
w XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl)
)
( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isPos ( do
(Row, Row)
_ <- Parser (Row, Row)
getPos
Context -> Parser XmlTrees
buildCont Context
ns
)
( Parser Bool
-> Parser XmlTrees -> Parser XmlTrees -> Parser XmlTrees
forall a. Parser Bool -> Parser a -> Parser a -> Parser a
cond Parser Bool
isEof ( do
Bool
_ <- Parser Bool
isEof
Context -> Parser XmlTrees
closeAll Context
ns
)
( XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> XmlTrees
warn String
"parse error in tagsoup tree construction")
)
)
)
)
)
)
)
where
closeTag :: Context -> String -> Parser XmlTrees
closeTag :: Context -> String -> Parser XmlTrees
closeTag ((String
n':[String]
_), NsEnv
_) String
n1
| String
n' String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
n1 = XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return []
closeTag ns' :: Context
ns'@((String
n':[String]
_), NsEnv
_) String
n1
| String
n' String -> String -> Bool
`isInnerElem` String
n1
= do
String -> Parser ()
insCls String
n1
String -> Parser ()
insCls String
n'
Context -> Parser XmlTrees
buildCont Context
ns'
closeTag Context
ns' String
n1
| String -> Bool
isEmptyElem String
n1 = Context -> Parser XmlTrees
buildCont Context
ns'
closeTag ns' :: Context
ns'@((String
n':[String]
ns1'), NsEnv
_) String
n1
| String
n1 String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String]
ns1' = do
String -> Parser ()
insCls String
n1
String -> Parser ()
insCls String
n'
XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns'
XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> XmlTrees
warn (String
"closing tag " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
n' String -> String -> String
forall a. [a] -> [a] -> [a]
++
String
" expected, but " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
n1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" found")
XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl
)
closeTag Context
ns' String
n1
= do
XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
ns'
XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return ( String -> XmlTrees
warn (String
"no opening tag for closing tag " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
n1)
XmlTrees -> XmlTrees -> XmlTrees
forall a. [a] -> [a] -> [a]
++ XmlTrees
rl
)
openTag :: Context -> String -> [(String, String)] -> Parser XmlTrees
openTag :: Context -> String -> [(String, String)] -> Parser XmlTrees
openTag cx' :: Context
cx'@([String]
ns',NsEnv
env') String
n1 [(String, String)]
al1
| String -> Bool
isPiDT String
n1 = Context -> Parser XmlTrees
buildCont Context
cx'
| String -> Bool
isEmptyElem String
n1
= do
QName
qn <- NsEnv -> String -> Parser QName
mkElemQN NsEnv
nenv String
n1
XmlTrees
al <- [(String, String)] -> Parser XmlTrees
mkAttrs [(String, String)]
al1
XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
cx'
XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> XmlTrees -> NTree XNode
mkElement QName
qn XmlTrees
al [] NTree XNode -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
: XmlTrees
rl)
| [String] -> String -> Bool
closesElem [String]
ns' String
n1 = do
String -> [(String, String)] -> Parser ()
insOpn String
n1 [(String, String)]
al1
String -> Parser ()
insCls ([String] -> String
forall a. HasCallStack => [a] -> a
head [String]
ns')
Context -> Parser XmlTrees
buildCont Context
cx'
| Bool
otherwise = do
QName
qn <- NsEnv -> String -> Parser QName
mkElemQN NsEnv
nenv String
n1
XmlTrees
al <- [(String, String)] -> Parser XmlTrees
mkAttrs [(String, String)]
al1
XmlTrees
cs <- Context -> Parser XmlTrees
buildCont ((String
n1 String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
ns'), NsEnv
nenv)
XmlTrees
rl <- Context -> Parser XmlTrees
buildCont Context
cx'
XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> XmlTrees -> NTree XNode
mkElement QName
qn XmlTrees
al XmlTrees
cs NTree XNode -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
: XmlTrees
rl)
where
nenv :: NsEnv
nenv = Bool -> [(String, String)] -> NsEnv -> NsEnv
extendNsEnv Bool
withNamespaces [(String, String)]
al1 NsEnv
env'
mkElemQN :: NsEnv -> String -> Parser QName
mkElemQN = Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN Bool
withNamespaces Bool
False
mkAttrQN :: NsEnv -> String -> Parser QName
mkAttrQN = Bool -> Bool -> NsEnv -> String -> Parser QName
mkQN Bool
withNamespaces Bool
True
isPiDT :: String -> Bool
isPiDT (Char
'?':String
_) = Bool
True
isPiDT (Char
'!':String
_) = Bool
True
isPiDT String
_ = Bool
False
mkAttrs :: [(String, String)] -> Parser XmlTrees
mkAttrs = ((String, String) -> Parser (NTree XNode))
-> [(String, String)] -> Parser XmlTrees
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM ((String -> String -> Parser (NTree XNode))
-> (String, String) -> Parser (NTree XNode)
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry String -> String -> Parser (NTree XNode)
mkA)
mkA :: String -> String -> Parser (NTree XNode)
mkA String
an String
av = do
QName
qan <- NsEnv -> String -> Parser QName
mkAttrQN NsEnv
nenv String
an
NTree XNode -> Parser (NTree XNode)
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> XmlTrees -> NTree XNode
mkAttr' QName
qan (NTree XNode -> XmlTrees
forall {a}. a -> [a]
wrap (NTree XNode -> XmlTrees)
-> (String -> NTree XNode) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NTree XNode
mkText' (String -> XmlTrees) -> String -> XmlTrees
forall a b. (a -> b) -> a -> b
$ String
av))
closeAll :: ([String], NsEnv) -> Parser XmlTrees
closeAll :: Context -> Parser XmlTrees
closeAll ([String]
ns',NsEnv
_) = XmlTrees -> Parser XmlTrees
forall a. a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return ((String -> XmlTrees) -> [String] -> XmlTrees
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap String -> XmlTrees
wrn [String]
ns')
where
wrn :: String -> XmlTrees
wrn = String -> XmlTrees
warn (String -> XmlTrees) -> (String -> String) -> String -> XmlTrees
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"insert missing closing tag " String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. Show a => a -> String
show