Interactive Antlr

I am trying to write a simple interactive (using the System.in language as a source) using antlr, and I have several problems with it. The examples I found on the Internet are used in every linear loop, for example:

while(readline)
  result = parse(line)
  doStuff(result)

But what if I write something like pascal / smtp / etc, with the "first line" looks like an X requirment? I know that it can be checked in doStuff, but I think that logically is part of the syntax.

Or what if the command is split across multiple lines? I can try

while(readline)
  lines.add(line)
  try
    result = parse(lines)
    lines = []
    doStuff(result)
  catch
    nop

But with this, I also hide real errors.

Or I could repeat all the lines every time, but:

  • he will be slow
  • there are instructions that I do not want to run twice

Can this be done with ANTLR, or if not, with something else?

+4
source share
4

Dutow ():

, :

, ANTLR, , - ?

, ANTLR . , , , . .

, , program uses statement.

program, uses, statement s. uses statement program.

a statement : a = 4 b = a.

ANTLR :

grammar REPL;

parse
  :  programDeclaration EOF
  |  usesDeclaration EOF
  |  statement EOF
  ;

programDeclaration
  :  PROGRAM ID
  ;

usesDeclaration
  :  USES idList
  ;

statement
  :  ID '=' (INT | ID)
  ;

idList
  :  ID (',' ID)*
  ;

PROGRAM : 'program';
USES    : 'uses';
ID      : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*;
INT     : '0'..'9'+;
SPACE   : (' ' | '\t' | '\r' | '\n') {skip();};

. , , , . lexer parser, @parser::members { ... } @lexer::members { ... } . , , program , uses . , process(String source), , .

:

@parser::members {

  boolean programDeclDone;
  boolean usesDeclAllowed;

  public REPLParser() {
    super(null);
    programDeclDone = false;
    usesDeclAllowed = true;
  }

  public void process(String source) throws Exception {
    ANTLRStringStream in = new ANTLRStringStream(source);
    REPLLexer lexer = new REPLLexer(in);
    CommonTokenStream tokens = new CommonTokenStream(lexer);
    super.setTokenStream(tokens);
    this.parse(); // the entry point of our parser
  } 
}

, . , . @after { ... }, ( ) .

( System.out.println ):

grammar REPL;

@parser::members {

  boolean programDeclDone;
  boolean usesDeclAllowed;

  public REPLParser() {
    super(null);
    programDeclDone = false;
    usesDeclAllowed = true;
  }

  public void process(String source) throws Exception {
    ANTLRStringStream in = new ANTLRStringStream(source);
    REPLLexer lexer = new REPLLexer(in);
    CommonTokenStream tokens = new CommonTokenStream(lexer);
    super.setTokenStream(tokens);
    this.parse();
  } 
}

parse
  :  programDeclaration EOF
  |  {programDeclDone}? (usesDeclaration | statement) EOF
  ;

programDeclaration
@after{
  programDeclDone = true;
}
  :  {!programDeclDone}? PROGRAM ID {System.out.println("\t\t\t program <- " + $ID.text);}
  ;

usesDeclaration
  :  {usesDeclAllowed}? USES idList {System.out.println("\t\t\t uses <- " + $idList.text);}
  ;

statement
@after{
  usesDeclAllowed = false; 
}
  :  left=ID '=' right=(INT | ID) {System.out.println("\t\t\t " + $left.text + " <- " + $right.text);}
  ;

idList
  :  ID (',' ID)*
  ;

PROGRAM : 'program';
USES    : 'uses';
ID      : ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*;
INT     : '0'..'9'+;
SPACE   : (' ' | '\t' | '\r' | '\n') {skip();};

:

import org.antlr.runtime.*;
import java.util.Scanner;

public class Main {
    public static void main(String[] args) throws Exception {
        Scanner keyboard = new Scanner(System.in);
        REPLParser parser = new REPLParser();
        while(true) {
            System.out.print("\n> ");
            String input = keyboard.nextLine();
            if(input.equals("quit")) {
                break;
            }
            parser.process(input);
        }
        System.out.println("\nBye!");
    }
}

, :

# generate a lexer and parser:
java -cp antlr-3.2.jar org.antlr.Tool REPL.g

# compile all .java source files:
javac -cp antlr-3.2.jar *.java

# run the main class on Windows:
java -cp .;antlr-3.2.jar Main 
# or on Linux/Mac:
java -cp .:antlr-3.2.jar Main

, program :

> program A
                         program <- A

> program B
line 1:0 rule programDeclaration failed predicate: {!programDeclDone}?

uses statement s:

> program X
                         program <- X

> uses a,b,c
                         uses <- a,b,c

> a = 666
                         a <- 666

> uses d,e
line 1:0 rule usesDeclaration failed predicate: {usesDeclAllowed}?

program:

> uses foo
line 1:0 rule parse failed predicate: {programDeclDone}?
+4

, System.in . ANTLR 3.4. ANTLR 4 . , ANTLR 3, , , - .

, , , , , :

  • ANTLR, CharStream, . , ( ) .
  • BufferedTokenStream . , (, , ), , EOF .
  • , .

:

statement: 'verb' 'noun' ('and' 'noun')*
         ;
WS: //etc...

statement ( statement) . statement ( "" ), , , . ';'.

  • . , $channel = HIDDEN skip(), .
  • .

, - :

script    
    : statement* EOF -> ^(STMTS statement*) 
    ;

script, EOF. statement, STMTS .

, :

interactive
    : statement -> ^(STMTS statement)
    ;

" ", , - . , :

interactive_start
    : first_line
    ;
  • (, , ), , , . . , ( - ), . , , .

, , CharStream, . ANTLRStringStream , CharStream. , data , , . ( ) dataAt . , . , .

public class MyInputStream extends ANTLRStringStream {
    private InputStream in;

    public MyInputStream(InputStream in) {
        super(new char[0], 0);
        this.in = in;
    }

    @Override
    // copied almost verbatim from ANTLRStringStream
    public void consume() {
        if (p < n) {
            charPositionInLine++;
            if (dataAt(p) == '\n') {
                line++;
                charPositionInLine = 0;
            }
            p++;
        }
    }

    @Override
    // copied almost verbatim from ANTLRStringStream
    public int LA(int i) {
        if (i == 0) {
            return 0; // undefined
        }
        if (i < 0) {
            i++; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
            if ((p + i - 1) < 0) {
                return CharStream.EOF; // invalid; no char before first char
            }
        }

        // Read ahead
        return dataAt(p + i - 1);
    }

    @Override
    public String substring(int start, int stop) {
        if (stop >= n) {
            //Read ahead.
            dataAt(stop);
        }
        return new String(data, start, stop - start + 1);
    }

    private int dataAt(int i) {
        ensureRead(i);

        if (i < n) {
            return data[i];
        } else {
            // Nothing to read at that point.
            return CharStream.EOF;
        }
    }

    private void ensureRead(int i) {
        if (i < n) {
            // The data has been read.
            return;
        }

        int distance = i - n + 1;

        ensureCapacity(n + distance);

        // Crude way to copy from the byte stream into the char array.
        for (int pos = 0; pos < distance; ++pos) {
            int read;
            try {
                read = in.read();
            } catch (IOException e) {
                // TODO handle this better.
                throw new RuntimeException(e);
            }

            if (read < 0) {
                break;
            } else {
                data[n++] = (char) read;
            }
        }
    }

    private void ensureCapacity(int capacity) {
        if (capacity > n) {
            char[] newData = new char[capacity];
            System.arraycopy(data, 0, newData, 0, n);
            data = newData;
        }
    }
}

, , UnbufferedTokenStream, :

    MyLexer lex = new MyLexer(new MyInputStream(System.in));
    TokenStream tokens = new UnbufferedTokenStream(lex);

    //Handle "first line" parser rule(s) here.

    while (true) {
        MyParser parser = new MyParser(tokens);
        //Set up the parser here.

        MyParser.interactive_return r = parser.interactive();

        //Do something with the return value.
        //Break on some meaningful condition.
    }

? , .:)

+2

If you use System.in as a source, which is an input stream, why not just ANTLR tokenize the input stream as you read it, and then parse the tokens?

0
source

You have to put it in doStuff ....

For example, if you declare a function, will parsing return the function correctly? without a body, so it’s beautiful because the body will come later. You would do what most REPL do.

0
source

All Articles