moduleText.BibTeX.ParsewhereimportqualifiedText.BibTeX.EntryasEntryimportText.ParserCombinators.Parsec(Parser,(<|>),)importqualifiedText.ParserCombinators.ParsecasParsecimportControl.Monad(liftM,liftM2,liftM3,)-- import Control.Applicative ((<*), )importData.Maybe(catMaybes,)importData.List.HT(chop,)importData.String.HT(trim,){- |
Beware that this and all other parsers do not accept leading spaces,
cf. 'skippingSpace'.
That is when encountering leading white spaces
the parser will just return an empty list.
If you want to parse a file that contains entirely of BibTeX data
you better call @skippingLeadingSpace file@ instead.
However, the @file@ parser is more combinable
and can be used for files that contain both BibTeX and other data
or it can be used for automated filetype checking.
-}file::Parser[Entry.T]file=fmapcatMaybes$Parsec.many(skippingSpace-- ((fmap Just entry <* Parsec.optional (Parsec.char ',')))((doe<-entry;Parsec.optional(Parsec.char',');return(Juste))<|>fmap(constNothing)comment)){- |
Parse a line that starts with a hash like
> # this is a comment
.
-}comment::ParserStringcomment=doParsec.char'#'fmaptrim$Parsec.manyTillParsec.anyCharParsec.newline{- |
Parse a BibTeX entry like
> @article{author2010title,
> author = {Firstname Surname},
> title = {Title},
> year = 2010,
> month = jul,
> }
.
-}entry::ParserEntry.Tentry=doParsec.char'@'entryType<-skippingSpaceidentifierskippingSpace(Parsec.char'{')bibId<-skippingSpace(bibIdentifier<|>return"")skippingSpace(Parsec.char',')assigns<-assignment`Parsec.sepEndBy`skippingSpace(Parsec.char',')skippingSpace(Parsec.char'}')return(Entry.ConsentryTypebibIdassigns){- |
Parse an assignment like
> author = {Firstname Surname}
.
-}assignment::Parser(String,String)assignment=dofield<-skippingSpacebibIdentifierskippingSpace(Parsec.char'=')val<-skippingSpacevaluereturn(field,trimval){- |
Parse a value like
> jul
or
> 2010
or
> {Firstname Surname}
or
> "Firstname Surname"
.
-}value::ParserStringvalue=Parsec.many1Parsec.letter<|>-- for fields like: month = julParsec.many1Parsec.digit<|>-- for fields like: year = 2010Parsec.between(Parsec.char'{')(Parsec.char'}')(texSequence'}')<|>Parsec.between(Parsec.char'"')(Parsec.char'"')(texSequence'"'){- |
Parse a sequence of 'texBlock's until the occurrence of a closing character.
The closing character is not part of the result.
-}texSequence::Char->ParserStringtexSequencecloseChar=liftMconcat(Parsec.many(texBlockcloseChar)){- |
Parse a single character like @a@,
a LaTeX macro call like @\\alpha@
or a block enclosed in curly braces like @{\\\"{a}bc}@.
-}texBlock::Char->ParserStringtexBlockcloseChar=liftM3(\openbodyclose->open:body++close:[])(Parsec.char'{')(texSequence'}')(Parsec.char'}')<|>sequence[Parsec.char'\\',Parsec.oneOf"_{}'`^&%\".,~# "<|>Parsec.letter]<|>fmap(:[])(Parsec.noneOf[closeChar]){- |
Parse a type of a BibTeX entry like @article@.
-}identifier::ParserStringidentifier=liftM2(:)Parsec.letter(Parsec.manyParsec.alphaNum){- |
Parse a name of a BibTeX entry like @author2010title@.
-}bibIdentifier::ParserStringbibIdentifier=Parsec.many1(Parsec.alphaNum<|>Parsec.oneOf"&;:-_.?+/"){- |
Extends a parser, such that all trailing spaces are skipped.
It might be more comfortable to skip all leading zeros,
but parser written that way are hard to combine.
This is so, since if you run two parsers in parallel
and both of them expect leading spaces,
then the parser combinator does not know
which one of the parallel parsers to choose.
-}skippingSpace::Parsera->ParseraskippingSpacep=dox<-pParsec.skipManyParsec.spacereturnxskippingLeadingSpace::Parsera->ParseraskippingLeadingSpacep=Parsec.skipManyParsec.space>>p-- * Convert contents of BibTeX fields into lists{- |
Split a string at the commas and remove leading spaces.
-}splitCommaSepList::String->[String]splitCommaSepList=splitSepList','{- |
Split a string containing a list of authors in BibTeX notation.
-}splitAuthorList::String->[String]splitAuthorList=mapunwords.chop("and"==).wordssplitSepList::Char->String->[String]splitSepListsep=map(dropWhile(' '==)).chop(sep==)