Skip to content

Commit 1201aca

Browse files
committed
fix antlr error recovery
1. report all lexer errors before parsing 2. catch parser error and interrupt parsing immediately after an error occurs
1 parent df632b7 commit 1201aca

File tree

2 files changed

+66
-47
lines changed

2 files changed

+66
-47
lines changed

src/main/scala/decaf/frontend/parsing/Lexer.scala

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package decaf.frontend.parsing
22

3-
import decaf.driver.error.UnrecogCharError
3+
import decaf.driver.error._
4+
import decaf.frontend.parsing.Util.getPos
45
import decaf.frontend.parsing.antlr.DecafLexer
56
import org.antlr.v4.runtime.{CharStream, Token}
67

@@ -10,19 +11,65 @@ import org.antlr.v4.runtime.{CharStream, Token}
1011
*
1112
* @param in input stream
1213
*/
13-
class Lexer(in: CharStream) extends DecafLexer(in) {
14+
class Lexer(in: CharStream, errorIssuer: ErrorIssuer) extends DecafLexer(in) {
15+
16+
// Temporary variables for parsing a string literal
17+
private val buffer: StringBuilder = new StringBuilder
18+
private var startPos: Pos = NoPos
1419

1520
/**
16-
* Catch unrecognized character error (override the generated code).
17-
* This error will '''immediately interrupt''' the parsing.
21+
* Lexer error handler and string literal parser.
22+
* This method will be invoked before a token is returned to the parser.
23+
*
24+
* A special case is that unrecognized character error will '''immediately interrupt''' the parsing.
25+
* Other lexer errors can be recovered.
1826
*
19-
* @return the emitted token if no error occurs
27+
* @return the emitted token
2028
*/
21-
override def emit(): Token = getType match {
29+
override def emit: Token = getType match {
30+
// unrecognized char: immediately interrupt
2231
case DecafLexer.UNRECOG_Char =>
23-
val token = super.emit()
24-
throw new UnrecogCharError(token.getText.head, Util.getPos(token))
25-
case _ => super.emit()
32+
val token = super.emit
33+
throw new UnrecogCharError(token.getText.head, getPos(token))
34+
// integer too large
35+
case DecafLexer.INT_LIT =>
36+
val token = super.emit
37+
var literal = "0"
38+
try {
39+
literal = token.getText.toInt.toString
40+
} catch {
41+
case _: NumberFormatException => // not a valid 32-bit integer
42+
errorIssuer.issue(new IntTooLargeError(token.getText, getPos(token)))
43+
}
44+
setText(literal)
45+
super.emit
46+
// string literal
47+
case DecafLexer.UNTERM_STRING =>
48+
errorIssuer.issue(new UntermStrError(buffer.toString, startPos))
49+
super.emit
50+
case DecafLexer.OPEN_STRING =>
51+
buffer.clear()
52+
buffer += '"'
53+
val token = super.emit
54+
startPos = getPos(token)
55+
token
56+
case DecafLexer.ERROR_NEWLINE =>
57+
val token = super.emit
58+
errorIssuer.issue(new NewlineInStrError(buffer.toString, getPos(token)))
59+
token
60+
case DecafLexer.BAD_ESC =>
61+
val token = super.emit
62+
errorIssuer.issue(new BadEscCharError(getPos(token)))
63+
token
64+
case DecafLexer.ESC | DecafLexer.VALID_CHAR =>
65+
val token = super.emit
66+
buffer ++= token.getText
67+
token
68+
case DecafLexer.CLOSE_STRING =>
69+
buffer += '"'
70+
setText(buffer.toString)
71+
super.emit
72+
case _ => super.emit
2673
}
2774

2875
/**
@@ -41,7 +88,7 @@ class Lexer(in: CharStream) extends DecafLexer(in) {
4188
if (_mode == DecafLexer.IN_STRING) {
4289
setType(DecafLexer.UNTERM_STRING)
4390
setText("UNTERM_STRING")
44-
val t = super.emit()
91+
val t = emit
4592
popMode()
4693
t
4794
} else {

src/main/scala/decaf/frontend/parsing/Parser.scala

Lines changed: 9 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
6161
*/
6262
override def transform(in: InputStream): Tree = {
6363
val stream = CharStreams.fromStream(in)
64-
val lexer = new decaf.frontend.parsing.Lexer(stream)
64+
val lexer = new decaf.frontend.parsing.Lexer(stream, this)
6565
val tokens = new CommonTokenStream(lexer)
6666
val parser = new DecafParser(tokens)
6767
parser.addErrorListener(ErrorListener)
@@ -98,7 +98,7 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
9898

9999
override def syntaxError(recognizer: Recognizer[_, _], offendingSymbol: Any, lineNumber: Int,
100100
charPositionInLine: Int, msg: String, e: RecognitionException): Unit = {
101-
issue(new SyntaxError(msg, new Pos(lineNumber, charPositionInLine + 1)))
101+
throw new SyntaxError(msg, new Pos(lineNumber, charPositionInLine + 1))
102102
}
103103
}
104104

@@ -108,20 +108,20 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
108108

109109
object TopLevelVisitor extends DecafParserBaseVisitor[TopLevel] {
110110

111-
override def visitTopLevel(ctx: DecafParser.TopLevelContext): TopLevel = positioned(ctx) {
111+
override def visitTopLevel(ctx: DecafParser.TopLevelContext): TopLevel = {
112112
val classes = ctx.classDef.map(_.accept(ClassDefVisitor))
113-
TopLevel(classes)
113+
TopLevel(classes).setPos(classes.head.pos)
114114
}
115115
}
116116

117117
object ClassDefVisitor extends DecafParserBaseVisitor[ClassDef] {
118118

119-
override def visitClassDef(ctx: DecafParser.ClassDefContext): ClassDef = positioned(ctx) {
119+
override def visitClassDef(ctx: DecafParser.ClassDefContext): ClassDef = {
120120
val id = ctx.id.accept(IdVisitor)
121121
// NOTE: if an optional symbol (like extendsClause) is undefined, its corresponding field is null.
122122
val parent = if (ctx.extendsClause != null) Some(ctx.extendsClause.id.accept(IdVisitor)) else None
123123
val fields = ctx.field.map(_.accept(FieldVisitor))
124-
ClassDef(id, parent, fields)
124+
ClassDef(id, parent, fields).setPos(getPos(ctx.CLASS.getSymbol))
125125
}
126126
}
127127

@@ -275,43 +275,15 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
275275
override def visitLiteral(ctx: DecafParser.LiteralContext): Expr = ctx.lit.accept(this)
276276

277277
override def visitIntLit(ctx: DecafParser.IntLitContext): Expr = positioned(ctx) {
278-
val literal = ctx.getText
279-
var value = -1
280-
try {
281-
value = literal.toInt
282-
} catch {
283-
case _: NumberFormatException => // not a valid 32-bit integer
284-
issue(new IntTooLargeError(literal, getPos(ctx.INT_LIT.getSymbol)))
285-
}
286-
287-
IntLit(value)
278+
IntLit(ctx.getText.toInt)
288279
}
289280

290281
override def visitBoolLit(ctx: DecafParser.BoolLitContext): Expr = positioned(ctx) {
291282
BoolLit(ctx.getText.toBoolean)
292283
}
293284

294-
override def visitStringLit(ctx: DecafParser.StringLitContext): Expr = {
295-
val buffer = new StringBuilder
296-
val startPos = getPos(ctx.OPEN_STRING.getSymbol)
297-
buffer += '"'
298-
ctx.stringChar.foreach { node =>
299-
if (node.ERROR_NEWLINE != null) { // handle new line in string
300-
issue(new NewlineInStrError(buffer.toString, getPos(node.ERROR_NEWLINE.getSymbol)))
301-
}
302-
if (node.BAD_ESC != null) { // handle bad escape character
303-
issue(new BadEscCharError(getPos(node.BAD_ESC.getSymbol)))
304-
}
305-
buffer ++= node.getText
306-
}
307-
308-
if (ctx.UNTERM_STRING != null) { // handle unterminated string
309-
issue(new UntermStrError(buffer.toString, startPos))
310-
}
311-
312-
buffer += '"'
313-
StringLit(buffer.toString).setPos(startPos)
314-
}
285+
override def visitStringLit(ctx: DecafParser.StringLitContext): Expr =
286+
StringLit(ctx.CLOSE_STRING.getText).setPos(getPos(ctx.OPEN_STRING.getSymbol))
315287

316288
override def visitNullLit(ctx: DecafParser.NullLitContext): Expr = positioned(ctx) { NullLit() }
317289

0 commit comments

Comments
 (0)