While this has nothing to do with Boost, let me assure you that parsing PDF (and PostScript) is about as trivial as you might want. Let's say you have a scanner object that returns a series of tokens. Types of tokens you will receive from the scanner:
- Line
- Dict begin (<<)
- Dict End (โ)
- Name (/ whatever)
- amount
- Hex array
- Left corner (<)
- Right corner (>)
- The beginning of the array ([)
- Massive End (])
- Start of procedure ({)
- The end of the procedure (})
- (% foo)
- , , , , , DictEnd Done.
, PDF, , . , "" ( #):
while (true) {
MLPdfToken = scanner.GetToken();
if (token == null)
return MachineExit.EndOfFile;
PdfObject obj = PdfObject.FromToken(token);
PdfProcedure proc = obj as PdfProcedure;
if (proc != null)
{
if (IsExecuting())
{
if (token.Type == PdfTokenType.RBrace)
proc.Execute(this);
else
Push(obj);
}
else {
proc.Execute(this);
}
if (proc.IsTerminal)
return Machine.ParseComplete;
}
else {
Push(obj);
}
}
, PdfObject Execute(), machine.Push(this) IsTerminal, false, REPL :
while (true) {
MLPdfToken = scanner.GetToken();
if (token == null)
return MachineExit.EndOfFile;
PdfObject obj = PdfObject.FromToken(token);
if (IsExecuting())
{
if (token.Type == PdfTokenType.RBrace)
obj.Execute(this);
else
Push(obj);
}
else {
obj.Execute(this);
if (obj.IsTerminal)
return Machine.ParseComplete;
}
}
Machine - Machine PdfObject (Push, Pop, Mark, CountToMark, Index, Dup, Swap), ExecProcBegin ExecProcEnd.
, . , , , PdfObject.FromToken , (, , , , bool), PdfObject. "proc set" , PdfProcedure. , <<, proc :
void DictBegin(PdfMachine machine)
{
machine.Push(new PdfMark(PdfMarkType.Dictionary));
}
, << " . >> :
void DictEnd(PdfMachine machine)
{
PdfDict dict = new PdfDict();
PdfObject[] arr = machine.PopThroughMark(PdfMarkType.Dictionary);
if ((arr.Length & 1) != 0)
throw new PdfException("dictionaries need an even number of objects.");
for (int i=0; i < arr.Length; i += 2)
{
PdfObject key = arr[i], val = arr[i + 1];
if (key.Type != PdfObjectType.Name)
throw new PdfException("dictionaries need a /name for the key.");
dict.put((PdfName)key, val);
}
machine.Push(dict);
}
So >> , . . -, , , .
, PDF , PostScript . , , . ( read-eval- (push) ) - '}'.
, PDF 14 0 obj << /Type /Annot /SubType /Square >> endobj, , , - :
- Push 14
- 0
- obj ( " " ).
- Push/Type
- Push/Annot
- Push/SubType
- Push/Square
- endobj ( , ( ) . - , "" , ).
"endobj" , .
, 14 PDF, , , . , .
, , PDF, :
<< [/key value]* >> stream ...raw data... endstream endobj
, . stream , PdfDict. , ( ), , dict ( , ) . endstream - -op. PdfDict PdfStream , PdfStream bool, , , .
PostScript , , . , : , . , exec, PS.
, ++, , , , , , , , , ++.
PDF- .NET, Acrobat 1-4, , , , Acrobat (, - C, ++, ).
xref ( xref), - , EOF , xref. ( CS 101), , /Prev, , , /Prev . xref .
- , . , , , xref. , , . , (.. 14 0 R). , , xref, <objnum> <generation> obj <object contents> endobj. , :
public override ToStream(PdfStreamingContext context)
{
if (context.HasReference(this))
{
PdfUtils.WriteObjectDefinitionBegin(this, context);
}
context.Writer.Indent();
context.Writer.WriteLine("<<");
WriteContents(context);
context.Writer.Exdent();
context.Writer.Writeline(">>");
if (context.HasReference(this))
{
PdfUtils.WriteObjectDefinitionEnd(this, context);
}
}
, . - , xref, , , , ..
, , , . , : " , Acrobat ? , , , PDF , 't spec compliant Acrobat . , , . , , , - , , -. PDF PDF .
(, , ) , . , .
tl; dr. PDF, . , , REPL.