Scala: Haskell 本の引き写しでパーサジェネレータ
「プログラミング Haskell」の第 8 章「関数型パーサ」の写経をしようかと思ったのですが、そのまま引き写しても面白くないので、Scala で書いてみました。せっかくですので、Haskell のモナド連結の糖衣構文 do を、極力そのまま Scala の for 式で書けるよう、Parser[A] には、map(), flatMap() を備え、Scala の型とします。
…できた。Haskell 版から変えたところとしては、入力文字列にパーサを適用した際には、Scala らしく Option[(A, String)] を返すようにした、Haskell で [Char] であるところは、適宜 String にした、程度か。案外そのまま行けますね、というよりも、Scala が Haskell を色濃く反映しているんですね。
abstract trait Parser[A] {
def apply(inp: String): Option[(A, String)]
def parse(inp: String) = apply(inp)
def map[B](f: (A) => B) = {
def p_parse = parse _
new Parser[B] {
def apply(inp: String) = {
p_parse(inp) match {
case None => None
case Some((v, out)) => Some((f(v), out))
}
}
}
}
def flatMap[B](f: (A) => Parser[B]): Parser[B] = {
def p_parse = parse _
new Parser[B] {
def apply(inp: String) = {
p_parse(inp) match {
case None => None
case Some((v, out)) => f(v)(out)
}
}
}
}
def +++(q: Parser[A]) = {
def p_parse = parse _
new Parser[A] {
def apply(inp: String) = {
p_parse(inp) match {
case None => q.parse(inp)
case s @ Some(_) => s
}
}
}
}
}
def return_[A](v: A): Parser[A] = {
new Parser[A] { def apply(inp: String) = { Some((v, inp)) } }
}
def failure[A]: Parser[A] = {
new Parser[A] { def apply(inp: String) = { None } }
}
def item: Parser[Char] = new Parser[Char] {
def apply(inp: String) = inp match {
case "" => None
case _ => Some((inp.head, inp.tail))
}
}
def parse[A](p: Parser[A], inp: String): Option[(A, String)] = p(inp)
/*
parse(return_(1), "abc")
res1: Option[(Int, String)] = Some((1,abc))
parse(failure, "abc")
res2: Option[(Nothing, String)] = None
parse(item, "abc")
res3: Option[(Char, String)] = Some((a,bc))
*/
/*
def p = for {
x <- item
_ <- item
y <- item
} yield (x, y)
p: Parser[(Char, Char)]
def p_dash = item.flatMap(x =>
item.flatMap(__ =>
item.map(y =>
(x, y) )))
p_dash: Parser[(Char, Char)]
parse(p, "abcdef")
res4: Option[((Char, Char), String)] = Some(((a,c),def))
parse(p, "ab")
res5: Option[((Char, Char), String)] = None
*/
/*
parse(item +++ return_('d'), "abc")
res6: Option[(Char, String)] = Some((a,bc))
parse(failure +++ return_('d'), "abc")
res7: Option[(Char, String)] = Some((d,abc))
parse(failure +++ failure, "abc")
res8: Option[(Nothing, String)] = None
*/
def sat(p: (Char) => Boolean): Parser[Char] = for {
x <- item
r <- if (p(x)) return_(x) else failure
} yield (r)
def digit = sat(_.isDigit)
def lower = sat(_.isLower)
def upper = sat(_.isUpper)
def letter = sat(_.isLetter)
def alphanum = letter +++ digit
def char(x: Char) = sat(x.==)
/*
parse(digit, "123")
res9: Option[(Char, String)] = Some((1,23))
parse(digit, "abc")
res10: Option[(Char, String)] = None
parse(char('a'), "abc")
res11: Option[(Char, String)] = Some((a,bc))
parse(char('a'), "123")
res12: Option[(Char, String)] = None
*/
// これは文字列を返すパーサとする。Scala の String は List[Char] ではない
def string(s: String): Parser[String] = s match {
case "" => return_("")
case _ => for {
x <- char(s.head)
xs <- string(s.tail)
} yield (x +: xs)
}
/*
parse(string("abc"), "abcdef")
res13: Option[(String, String)] = Some((abc,def))
parse(string("abc"), "ab1234")
res14: Option[(String, String)] = None
*/
object Many {
def many[A](p: Parser[A]): Parser[List[A]] = {
many1(p) +++ return_(Nil)
}
def many1[A](p: Parser[A]): Parser[List[A]] = {
for {
v <- p
vs <- many(p)
} yield (v :: vs)
}
}
import Many._
/*
parse(many(digit), "123abc")
res15: Option[(List[Char], String)] = Some((List(1, 2, 3),abc))
parse(many(digit), "abcdef")
res16: Option[(List[Char], String)] = Some((List(),abcdef))
parse(many1(digit), "abcdef")
res17: Option[(List[Char], String)] = None
*/
// 仕方ないので文字列にしておく
def ident = for {
x <- lower
xs <- many(alphanum)
} yield ((x :: xs).foldLeft("")(_ + _))
// これも
def nat: Parser[Int] = for {
xs <- many1(digit)
} yield (xs.foldLeft("")(_ + _).toInt)
def space: Parser[Unit] = for {
_ <- many(sat(_.isWhitespace))
} yield (Unit)
/*
parse(ident, "abc def")
res0: Option[(java.lang.String, String)] = Some((abc, def))
parse(nat, "123 abc")
res1: Option[(Int, String)] = Some((123, abc))
parse(space, " abc")
res2: Option[(Unit, String)] = Some(((),abc))
*/
def token[A](p: Parser[A]): Parser[A] = for {
_ <- space
v <- p
_ <- space
} yield (v)
def identifier: Parser[String] = token(ident)
def natural: Parser[Int] = token(nat)
def symbol(xs: String): Parser[String] = token(string(xs))
/*
def p: Parser[List[Int]] = for {
_ <- symbol("[")
n <- natural
ns <- many(
for {
_ <- symbol(",")
x <- natural
} yield (x)
)
_ <- symbol("]")
} yield (n :: ns)
parse(p, " [1, 2, 3] ")
res19: Option[(List[Int], String)] = Some((List(1, 2, 3),))
parse(p, "[1, 2,]")
res20: Option[(List[Int], String)] = None
*/
def int: Parser[Int] = (
for {
_ <- symbol("-")
n <- natural
} yield (- n)
) +++ natural
object Expr {
def expr: Parser[Int] = for {
t <- term
r <- (
for {
_ <- symbol("+")
e <- expr
} yield (t + e)
) +++ (
for {
_ <- symbol("-")
e <- expr
} yield (t - e)
) +++ return_(t)
} yield (r)
def term: Parser[Int] = for {
f <- factor
r <- (
for {
_ <- symbol("*")
t <- term
} yield (f * t)
) +++ (
for {
_ <- symbol("/")
t <- term
} yield (f / t)
) +++ return_(f)
} yield (r)
def factor: Parser[Int] = (
for {
_ <- symbol("(")
e <- expr
_ <- symbol(")")
} yield (e)
) +++ int
}
import Expr._
/*
parse(expr, "2*3+4")
res26: Option[(Int, String)] = Some((10,))
parse(expr, "2*(3+4)")
res28: Option[(Int, String)] = Some((14,))
parse(expr, "2 * (3 + 4)")
res30: Option[(Int, String)] = Some((14,))
parse(expr, "2*3-4")
res33: Option[(Int, String)] = Some((2,))
parse(expr, "-1")
res36: Option[(Int, String)] = Some((-1,))
parse(expr, "(-3 * 4 * - 2) / 6")
res43: Option[(Int, String)] = Some((4,))
*/