Skip to content

Commit 994b52e

Browse files
authored
Merge pull request #1 from decaf-lang/dev
fix antlr error recovery
2 parents df632b7 + 1201aca commit 994b52e

File tree

2 files changed

+66
-47
lines changed

2 files changed

+66
-47
lines changed

src/main/scala/decaf/frontend/parsing/Lexer.scala

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package decaf.frontend.parsing
22

3-
import decaf.driver.error.UnrecogCharError
3+
import decaf.driver.error._
4+
import decaf.frontend.parsing.Util.getPos
45
import decaf.frontend.parsing.antlr.DecafLexer
56
import org.antlr.v4.runtime.{CharStream, Token}
67

@@ -10,19 +11,65 @@ import org.antlr.v4.runtime.{CharStream, Token}
1011
*
1112
* @param in input stream
1213
*/
13-
class Lexer(in: CharStream) extends DecafLexer(in) {
14+
class Lexer(in: CharStream, errorIssuer: ErrorIssuer) extends DecafLexer(in) {
15+
16+
// Temporary variables for parsing a string literal
17+
private val buffer: StringBuilder = new StringBuilder
18+
private var startPos: Pos = NoPos
1419

1520
/**
16-
* Catch unrecognized character error (override the generated code).
17-
* This error will '''immediately interrupt''' the parsing.
21+
* Lexer error handler and string literal parser.
22+
* This method will be invoked before a token is returned to the parser.
23+
*
24+
* A special case is that unrecognized character error will '''immediately interrupt''' the parsing.
25+
* Other lexer errors can be recovered.
1826
*
19-
* @return the emitted token if no error occurs
27+
* @return the emitted token
2028
*/
21-
override def emit(): Token = getType match {
29+
override def emit: Token = getType match {
30+
// unrecognized char: immediately interrupt
2231
case DecafLexer.UNRECOG_Char =>
23-
val token = super.emit()
24-
throw new UnrecogCharError(token.getText.head, Util.getPos(token))
25-
case _ => super.emit()
32+
val token = super.emit
33+
throw new UnrecogCharError(token.getText.head, getPos(token))
34+
// integer too large
35+
case DecafLexer.INT_LIT =>
36+
val token = super.emit
37+
var literal = "0"
38+
try {
39+
literal = token.getText.toInt.toString
40+
} catch {
41+
case _: NumberFormatException => // not a valid 32-bit integer
42+
errorIssuer.issue(new IntTooLargeError(token.getText, getPos(token)))
43+
}
44+
setText(literal)
45+
super.emit
46+
// string literal
47+
case DecafLexer.UNTERM_STRING =>
48+
errorIssuer.issue(new UntermStrError(buffer.toString, startPos))
49+
super.emit
50+
case DecafLexer.OPEN_STRING =>
51+
buffer.clear()
52+
buffer += '"'
53+
val token = super.emit
54+
startPos = getPos(token)
55+
token
56+
case DecafLexer.ERROR_NEWLINE =>
57+
val token = super.emit
58+
errorIssuer.issue(new NewlineInStrError(buffer.toString, getPos(token)))
59+
token
60+
case DecafLexer.BAD_ESC =>
61+
val token = super.emit
62+
errorIssuer.issue(new BadEscCharError(getPos(token)))
63+
token
64+
case DecafLexer.ESC | DecafLexer.VALID_CHAR =>
65+
val token = super.emit
66+
buffer ++= token.getText
67+
token
68+
case DecafLexer.CLOSE_STRING =>
69+
buffer += '"'
70+
setText(buffer.toString)
71+
super.emit
72+
case _ => super.emit
2673
}
2774

2875
/**
@@ -41,7 +88,7 @@ class Lexer(in: CharStream) extends DecafLexer(in) {
4188
if (_mode == DecafLexer.IN_STRING) {
4289
setType(DecafLexer.UNTERM_STRING)
4390
setText("UNTERM_STRING")
44-
val t = super.emit()
91+
val t = emit
4592
popMode()
4693
t
4794
} else {

src/main/scala/decaf/frontend/parsing/Parser.scala

Lines changed: 9 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
6161
*/
6262
override def transform(in: InputStream): Tree = {
6363
val stream = CharStreams.fromStream(in)
64-
val lexer = new decaf.frontend.parsing.Lexer(stream)
64+
val lexer = new decaf.frontend.parsing.Lexer(stream, this)
6565
val tokens = new CommonTokenStream(lexer)
6666
val parser = new DecafParser(tokens)
6767
parser.addErrorListener(ErrorListener)
@@ -98,7 +98,7 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
9898

9999
override def syntaxError(recognizer: Recognizer[_, _], offendingSymbol: Any, lineNumber: Int,
100100
charPositionInLine: Int, msg: String, e: RecognitionException): Unit = {
101-
issue(new SyntaxError(msg, new Pos(lineNumber, charPositionInLine + 1)))
101+
throw new SyntaxError(msg, new Pos(lineNumber, charPositionInLine + 1))
102102
}
103103
}
104104

@@ -108,20 +108,20 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
108108

109109
object TopLevelVisitor extends DecafParserBaseVisitor[TopLevel] {
110110

111-
override def visitTopLevel(ctx: DecafParser.TopLevelContext): TopLevel = positioned(ctx) {
111+
override def visitTopLevel(ctx: DecafParser.TopLevelContext): TopLevel = {
112112
val classes = ctx.classDef.map(_.accept(ClassDefVisitor))
113-
TopLevel(classes)
113+
TopLevel(classes).setPos(classes.head.pos)
114114
}
115115
}
116116

117117
object ClassDefVisitor extends DecafParserBaseVisitor[ClassDef] {
118118

119-
override def visitClassDef(ctx: DecafParser.ClassDefContext): ClassDef = positioned(ctx) {
119+
override def visitClassDef(ctx: DecafParser.ClassDefContext): ClassDef = {
120120
val id = ctx.id.accept(IdVisitor)
121121
// NOTE: if an optional symbol (like extendsClause) is undefined, its corresponding field is null.
122122
val parent = if (ctx.extendsClause != null) Some(ctx.extendsClause.id.accept(IdVisitor)) else None
123123
val fields = ctx.field.map(_.accept(FieldVisitor))
124-
ClassDef(id, parent, fields)
124+
ClassDef(id, parent, fields).setPos(getPos(ctx.CLASS.getSymbol))
125125
}
126126
}
127127

@@ -275,43 +275,15 @@ class Parser(implicit config: Config) extends Phase[InputStream, Tree]("parser",
275275
override def visitLiteral(ctx: DecafParser.LiteralContext): Expr = ctx.lit.accept(this)
276276

277277
override def visitIntLit(ctx: DecafParser.IntLitContext): Expr = positioned(ctx) {
278-
val literal = ctx.getText
279-
var value = -1
280-
try {
281-
value = literal.toInt
282-
} catch {
283-
case _: NumberFormatException => // not a valid 32-bit integer
284-
issue(new IntTooLargeError(literal, getPos(ctx.INT_LIT.getSymbol)))
285-
}
286-
287-
IntLit(value)
278+
IntLit(ctx.getText.toInt)
288279
}
289280

290281
override def visitBoolLit(ctx: DecafParser.BoolLitContext): Expr = positioned(ctx) {
291282
BoolLit(ctx.getText.toBoolean)
292283
}
293284

294-
override def visitStringLit(ctx: DecafParser.StringLitContext): Expr = {
295-
val buffer = new StringBuilder
296-
val startPos = getPos(ctx.OPEN_STRING.getSymbol)
297-
buffer += '"'
298-
ctx.stringChar.foreach { node =>
299-
if (node.ERROR_NEWLINE != null) { // handle new line in string
300-
issue(new NewlineInStrError(buffer.toString, getPos(node.ERROR_NEWLINE.getSymbol)))
301-
}
302-
if (node.BAD_ESC != null) { // handle bad escape character
303-
issue(new BadEscCharError(getPos(node.BAD_ESC.getSymbol)))
304-
}
305-
buffer ++= node.getText
306-
}
307-
308-
if (ctx.UNTERM_STRING != null) { // handle unterminated string
309-
issue(new UntermStrError(buffer.toString, startPos))
310-
}
311-
312-
buffer += '"'
313-
StringLit(buffer.toString).setPos(startPos)
314-
}
285+
override def visitStringLit(ctx: DecafParser.StringLitContext): Expr =
286+
StringLit(ctx.CLOSE_STRING.getText).setPos(getPos(ctx.OPEN_STRING.getSymbol))
315287

316288
override def visitNullLit(ctx: DecafParser.NullLitContext): Expr = positioned(ctx) { NullLit() }
317289

0 commit comments

Comments
 (0)