ScalaのパーザコンビネータでParsing Expression Grammar(PEG)の文法を定義してみました。PEGのインタプリタ作る副産物としてできたもので、構文木も作ってくれます。まあ、使いたい人が居るかは激しく疑問ですが、パーサコンビネータの使い方の参考になるかもしれません。ちなみに、
- 各ルールはセミコロンで終端する
- ルールの定義に、
<-
だけでなく、=
も使える
点は、Fordの論文に載っているPEGの定義と異なっている部分です。
object Ast { trait HasPosition { def pos: Pos } case class Pos(line: Int, column: Int) case class Grammar(pos: Pos, start: Symbol, rules: List[Rule]) extends HasPosition case class Rule(pos: Pos, name: Symbol, body: Exp) extends HasPosition sealed trait Exp extends HasPosition case class Seq(pos: Pos, lhs: Exp, rhs: Exp) extends Exp case class Alt(pos: Pos, lhs: Exp, rhs: Exp) extends Exp case class Rep0(pos: Pos, body: Exp) extends Exp case class Rep1(pos: Pos, body: Exp) extends Exp case class Opt(pos: Pos, body: Exp) extends Exp case class AndPred(pos: Pos, body: Exp) extends Exp case class NotPred(pos: Pos, body: Exp) extends Exp case class Str(pos: Pos, target: String) extends Exp case class Wildcard(pos: Pos) extends Exp case class CharClass(pos: Pos, elems: List[CharClassElement]) extends Exp case class Ident(pos: Pos, name: Symbol) extends Exp sealed trait CharClassElement case class OneChar(ch: Char) extends CharClassElement case class CharRange(from: Char, to: Char) extends CharClassElement }
import scala.util.parsing.combinator._ import scala.util.parsing.input import input._ import java.io._ import Ast._ object PegParser { case class ParseException(pos: Pos, msg: String) extends Exception(pos + msg) /* * See * Bryan Ford, "Parsing Expression Grammars: A Recognition-Based Syntactic Foundation", * Symposium on Principles of Programming Languages,2004. */ object ParserCore extends Parsers { type Elem = Char private val any: Parser[Char] = elem(".", c => c != CharSequenceReader.EofCh) private def chr(c: Char): Parser[Char] = c private def crange(f: Char, t: Char): Parser[Char] = elem("[]", c => f <= c && c <= t) private def cset(cs: Char*): Parser[Char] = elem("[]", c => cs.findIndexOf(_ == c) >= 0) private val escapeMap: Map[Char, Char] = Map( 'n' -> '\n', 'r' -> '\r', 't' -> '\t', '\'' -> '\'', '"' -> '"', '[' -> '[', ']' -> ']', '\\' -> '\\' ) lazy val GRAMMER: Parser[Grammar] = (loc <~ Spacing) ~ Definition.+ <~ EndOfFile ^^ { case pos ~ rules => Grammar(Pos(pos.line, pos.column), rules.head.name, rules) } lazy val Definition: Parser[Rule] = (Identifier <~ (LEFTARROW | EQ)) ~ Expression <~ SEMI_COLON ^^ { case n ~ b => Rule(n.pos, n.name, b) } lazy val Expression: Parser[Exp] = Sequence ~ (SLASH ~> Sequence).* ^^ { case x ~ xs => xs.foldLeft(x){(a, y) => Alt(y.pos, a, y)} } lazy val Sequence: Parser[Exp] = Prefix.+ ^^ {case x::xs => xs.foldLeft(x){(a, y) => Seq(y.pos, a, y)} } lazy val Prefix: Parser[Exp] = ( (loc <~ AND) ~ Suffix ^^ { case pos ~ e => AndPred(Pos(pos.line, pos.column), e) } | (loc <~ NOT) ~ Suffix ^^ { case pos ~ e => NotPred(Pos(pos.line, pos.column), e) } | Suffix ) lazy val Suffix: Parser[Exp] = ( loc ~ Primary <~ QUESTION ^^ { case pos ~ e => Opt(Pos(pos.line, pos.column), e) } | loc ~ Primary <~ STAR ^^ { case pos ~ e => Rep0(Pos(pos.line, pos.column), e) } | loc ~ Primary <~ PLUS ^^ { case pos ~ e => Rep1(Pos(pos.line, pos.column), e) } | Primary ) lazy val Primary: Parser[Exp] = ( Identifier /*<~ not(LEFTARROW | EQ)*/ | OPEN ~> Expression <~ CLOSE | Literal | CLASS | loc <~ DOT ^^ { case pos => Wildcard(Pos(pos.line, pos.column)) } ) lazy val loc: Parser[Position] = Parser{reader => Success(reader.pos, reader)} lazy val Identifier: Parser[Ident] = loc ~ IdentStart ~ IdentCont.* <~Spacing ^^ { case pos ~ s ~ c => Ident(Pos(pos.line, pos.column), Symbol("" + s + c.foldLeft("")(_ + _))) } lazy val IdentStart: Parser[Char] = crange('a','z') | crange('A','Z') | '_' lazy val IdentCont: Parser[Char] = IdentStart | crange('0','9') lazy val Literal: Parser[Str] = loc ~ ( chr('\'') ~> (not('\'') ~> CHAR).* <~ chr('\'') <~ Spacing | chr('"') ~> (not('"') ~> CHAR).* <~ chr('"') <~ Spacing ) ^^ { case pos ~ cs => Str(Pos(pos.line, pos.column), cs.foldLeft(""){(acc, n) => acc + n}) } lazy val CLASS: Parser[CharClass] = (loc <~ chr('[')) ~ (not(chr(']')) ~> Range).* <~ ']' ~> Spacing ^^ { case (pos ~ rs) => CharClass(Pos(pos.line, pos.column), rs); } lazy val Range: Parser[CharClassElement] = ( CHAR ~ '-' ~ CHAR ^^ { case f~_~t => CharRange(f, t) } | CHAR ^^ { case c => OneChar(c) } ) lazy val CHAR: Parser[Char] = ( chr('\\') ~ cset('n','r','t','\'','"','[',']','\\') ^^ { case _ ~ c => escapeMap(c) } | chr('\\') ~ crange('0','2') ~ crange('0','7') ~ crange('0','7') ^^ { case _ ~ a ~ b ~ c => Integer.parseInt("" + a + b + c, 8).toChar } | chr('\\') ~ crange('0','7') ~ opt(crange('0','7')) ^^ { case _ ~ a ~ Some(b) => Integer.parseInt("" + a + b, 8).toChar case _ ~ a ~ _ => Integer.parseInt("" + a, 8).toChar } | not('\\') ~ any ^^ { case _ ~ c => c} ) lazy val SEMI_COLON = ';' <~ Spacing lazy val EQ = chr('=') <~ Spacing lazy val LEFTARROW = chr('<') ~ '-' <~ Spacing lazy val SLASH = '/' <~ Spacing lazy val AND = '&' <~ Spacing lazy val NOT = '!' <~ Spacing lazy val QUESTION = '?' <~ Spacing lazy val STAR = '*' <~ Spacing lazy val PLUS = '+' <~ Spacing lazy val OPEN = '(' <~ Spacing lazy val CLOSE = ')' <~ Spacing lazy val DOT = '.' <~ Spacing lazy val Spacing = (Space | Comment).* lazy val Comment = chr('#') ~ (not(EndOfLine) ~ any).* ~ EndOfLine lazy val Space = chr(' ') | chr('\t') | EndOfLine lazy val EndOfLine = chr('\r') ~ chr('\n') | chr('\n') | chr('\r') lazy val EndOfFile = not(any) } def parse(fileName: String, content: java.io.Reader): Grammar = { ParserCore.GRAMMER(input.StreamReader(content)) match { case ParserCore.Success(node, _) => node case ParserCore.Failure(msg, rest) => val pos = rest.pos throw new ParseException(Pos(pos.line, pos.column), msg) case ParserCore.Error(msg, rest) => val pos = rest.pos throw new ParseException(Pos(pos.line, pos.column), msg) } } def main(args: Array[String]) { val g = parse(args(0), new FileReader(args(0))) println(g) } }