Using Antlr to analyze data from an endless stream

Is Antlr suitable for parsing data from streams that do not have EOF right after the text for parsing? According to my observation, the lexer does not allocate the current token until the first character of the next token is received. In addition, the parser does not seem to have to generate a rule until the first token of the next rule has been accepted. Here is a simple grammar I tried:

fox: 'quick' 'brown' 'fox' '\r'? '\n' ;

Then I used the generated parser with UnbufferedCharStream and UnbufferedTokenStream:

  CharStream input = new UnbufferedCharStream(is);
  MyLexer lex = new MyLexer(input);
  lex.setTokenFactory(new CommonTokenFactory(true));
  TokenStream tokens = new UnbufferedTokenStream(lex);
  MyParser parser = new MyParser(tokens);
  MyParser.FoxContext fox = parser.fox();

when a thread receives " fast " - nothing happens.

when b enters the fox rule

then ' roun' - (2 - !)

' f' : 'quick'

- ' ox'

(unix): ""

(4 ), 2 .

, , , 2 , , . . " fox" "\n" , "", .

? ?

!

+5
3

ANTLR 4 , - , - - .

ANTLR 4 ( , ), , , , . , .

  • ( ) String char[].
  • ANTLRInputStream .
  • lex/ , EOF .

, , , :

  • , , , .

  • EOF, , ( ..).

  • , EOF, - , .

+5

, , , , , . , , .

, . System.in , .

Streaming.g

grammar Streaming;

fox   : 'quick' NL 'brown' NL 'fox' NL DONE NL;
DONE  : 'done';
NL    : '\r'? '\n';

StreamingTest.java

import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.CommonTokenFactory;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.UnbufferedCharStream;
import org.antlr.v4.runtime.UnbufferedTokenStream;
import org.antlr.v4.runtime.tree.TerminalNode;

public class StreamingTest {
    public static void main(String[] args) throws Exception {
        lex();
        parse();
    }

    private static void lex() {
        System.out.println("-> Reading from lexer:");
        UnbufferedCharStream input = new UnbufferedCharStream(System.in);
        StreamingLexer lexer = new StreamingLexer(input);
        lexer.setTokenFactory(new CommonTokenFactory(true));

        Token t;

        //read each token until hitting input "done"
        while ((t = lexer.nextToken()).getType() != StreamingLexer.DONE){
            if (t.getText().trim().length() == 0){
                System.out.println("-> " + StreamingLexer.tokenNames[t.getType()]);
            } else { 
                System.out.println("-> " + t.getText());
            }
        }
    }

    private static void parse() {
        System.out.println("-> Reading from parser:");
        UnbufferedCharStream input = new UnbufferedCharStream(System.in);
        StreamingLexer lexer = new StreamingLexer(input);
        lexer.setTokenFactory(new CommonTokenFactory(true));

        StreamingParser parser = new StreamingParser(new UnbufferedTokenStream<CommonToken>(lexer));
        parser.addParseListener(new StreamingBaseListener(){
            @Override
            public void visitTerminal(TerminalNode t) {
                if (t.getText().trim().length() == 0){
                    System.out.println("-> " + StreamingLexer.tokenNames[t.getSymbol().getType()]);
                } else { 
                    System.out.println("-> " + t.getText());
                }
            }
        });

        parser.fox();
    }
}

, / . ->. , .

-> Reading from lexer:
quick
-> quick
brown
-> NL
-> brown
fox
-> NL
-> fox
done
-> NL
-> Reading from parser:
quick
brown
-> quick
-> NL
fox
-> brown
-> NL
done
-> fox
-> NL

-> done

-> NL

, , , quick NL , quick. , UnbufferedCharStream ( NL, !), . , . Javadoc :

"" , , , char.

, , .

. ? : UnbufferedTokenStream . , lexer b) lexer UnbufferedCharStream . , "" "".

, "" ANTLR v4 . , , .


Antlr , EOF ?

ANTLR 4. -, , ( UnbufferedTokenStream consume, sync), , . . , , .

+3

-, Unbuffered * Streams. , , LexerATNSimulator.execATN(). , , next. ParserATNSimulator, , Lexer. , . , , Antlr 4, , . Flex/Bison, lexer , , , . - parse() , , . , , .

+2

All Articles