Skip to content

Commit

Permalink
Generate AntlrTokenManager and decouple the lexer instance from the t…
Browse files Browse the repository at this point in the history
…okenizer
  • Loading branch information
Matias Fraga committed Sep 9, 2018
1 parent 1eed099 commit 15bd890
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 54 deletions.
66 changes: 16 additions & 50 deletions pmd-core/src/main/java/net/sourceforge/pmd/cpd/AntlrTokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,86 +4,52 @@

package net.sourceforge.pmd.cpd;

import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.Token;

import net.sourceforge.pmd.lang.AntlrTokenManager;
import net.sourceforge.pmd.lang.ast.TokenMgrError;

/**
* Generic implementation of a {@link Tokenizer} useful to any Antlr grammar.
*/
public abstract class AntlrTokenizer implements Tokenizer {

protected abstract Lexer getLexerForSource(CharStream charStream);
protected abstract AntlrTokenManager getLexerForSource(SourceCode sourceCode);

@Override
public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) {
StringBuilder buffer = sourceCode.getCodeBuffer();

try {
final CharStream charStream = CharStreams.fromString(buffer.toString());

Lexer lexer = getLexerForSource(charStream);

lexer.removeErrorListeners();
lexer.addErrorListener(new ErrorHandler());
AntlrTokenManager tokenManager = getLexerForSource(sourceCode);
tokenManager.resetListeners();

Token token = lexer.nextToken();
try {
Token token = (Token) tokenManager.getNextToken();

while (token.getType() != Token.EOF) {
if (token.getChannel() != Lexer.HIDDEN) {
final TokenEntry tokenEntry =
new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine());
new TokenEntry(token.getText(), tokenManager.getFileName(), token.getLine());

tokenEntries.add(tokenEntry);
}
token = lexer.nextToken();
token = (Token) tokenManager.getNextToken();
}
} catch (final ANTLRSyntaxError err) {
// Wrap exceptions of the ANTLR tokenizer in a TokenMgrError, so
// they are correctly handled
// when CPD is executed with the '--skipLexicalErrors' command line
// option
throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + err.getLine()
+ ", column " + err.getColumn() + ". Encountered: " + err.getMessage(),
} catch (final AntlrTokenManager.ANTLRSyntaxError err) {
// Wrap exceptions of the ANTLR tokenizer in a TokenMgrError, so they are correctly handled
// when CPD is executed with the '--skipLexicalErrors' command line option
throw new TokenMgrError("Lexical error in file " + tokenManager.getFileName() + " at line "
+ err.getLine() + ", column " + err.getColumn() + ". Encountered: " + err.getMessage(),
TokenMgrError.LEXICAL_ERROR);
} finally {
tokenEntries.add(TokenEntry.getEOF());
}
}

private static class ErrorHandler extends BaseErrorListener {

@Override
public void syntaxError(final Recognizer<?, ?> recognizer, final Object offendingSymbol, final int line,
final int charPositionInLine, final String msg, final RecognitionException ex) {
throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
}
}

private static class ANTLRSyntaxError extends RuntimeException {
private static final long serialVersionUID = 1L;
private final int line;
private final int column;

/* default */ ANTLRSyntaxError(final String msg, final int line, final int column,
final RecognitionException cause) {
super(msg, cause);
this.line = line;
this.column = column;
}

public int getLine() {
return line;
}

public int getColumn() {
return column;
}
/* default */ static CharStream getCharStreamFromSourceCode(final SourceCode sourceCode) {
StringBuilder buffer = sourceCode.getCodeBuffer();
return CharStreams.fromString(buffer.toString());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/

package net.sourceforge.pmd.lang;

import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;

/**
* Generic token manager implementation for all Antlr lexers.
*/
public class AntlrTokenManager implements TokenManager {
private final Lexer lexer;
private String fileName;

/**
* Constructor
*
* @param lexer The lexer
* @param fileName The file name
*/
public AntlrTokenManager(final Lexer lexer, final String fileName) {
this.lexer = lexer;
this.fileName = fileName;
}

@Override
public Object getNextToken() {
return lexer.nextToken();
}

@Override
public void setFileName(String fileName) {
this.fileName = fileName;
}

public String getFileName() {
return fileName;
}

public void resetListeners() {
lexer.removeErrorListeners();
lexer.addErrorListener(new ErrorHandler());
}


private static class ErrorHandler extends BaseErrorListener {

@Override
public void syntaxError(final Recognizer<?, ?> recognizer, final Object offendingSymbol, final int line,
final int charPositionInLine, final String msg, final RecognitionException ex) {
throw new ANTLRSyntaxError(msg, line, charPositionInLine, ex);
}
}

public static class ANTLRSyntaxError extends RuntimeException {
private static final long serialVersionUID = 1L;
private final int line;
private final int column;

/* default */ ANTLRSyntaxError(final String msg, final int line, final int column,
final RecognitionException cause) {
super(msg, cause);
this.line = line;
this.column = column;
}

public int getLine() {
return line;
}

public int getColumn() {
return column;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
package net.sourceforge.pmd.cpd;

import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Lexer;

import net.sourceforge.pmd.lang.AntlrTokenManager;
import net.sourceforge.pmd.lang.swift.antlr4.SwiftLexer;

/**
* SwiftTokenizer
*/

public class SwiftTokenizer extends AntlrTokenizer {

@Override
protected Lexer getLexerForSource(final CharStream charStream) {
return new SwiftLexer(charStream);
protected AntlrTokenManager getLexerForSource(final SourceCode sourceCode) {
CharStream charStream = AntlrTokenizer.getCharStreamFromSourceCode(sourceCode);
return new AntlrTokenManager(new SwiftLexer(charStream), sourceCode.getFileName());
}
}

0 comments on commit 15bd890

Please sign in to comment.