tokenize : Python, . ElementTree module , , html:
from tokenize import NAME, INDENT, DEDENT, ENDMARKER, NEWLINE, generate_tokens
from xml.etree import ElementTree as etree
def parse(file, TreeBuilder=etree.TreeBuilder):
tb = TreeBuilder()
tb.start('ul', {})
for type_, text, start, end, line in generate_tokens(file.readline):
if type_ == NAME:
tb.start('li', {})
tb.data(text)
tb.end('li')
elif type_ == NEWLINE:
continue
elif type_ == INDENT:
tb.start('ul', {})
elif type_ == DEDENT:
tb.end('ul')
elif type_ == ENDMARKER:
tb.end('ul')
break
else:
assert 0, (type_, text, start, end, line)
return tb.close()
, .start(), .end(), .data(), .close(), TreeBuilder, , html , .
stdin html stdout, ElementTree.write():
import sys
etree.ElementTree(parse(sys.stdin)).write(sys.stdout, method='html')
:
<ul><li>A</li><ul><li>B</li><li>C</li><ul><li>D</li><li>E</li></ul></ul></ul>
, sys.stdin/sys.stdout.
. stdout Python 3 sys.stdout.buffer encoding="unicode" - /Unicode.