Filter spaces and newlines with Pygments

I am trying to add syntax highlighting to my django site. The problem is that I get the characters &nbsp;and <br />. Is there any way to save these characters? Here is the code I'm using:

from BeautifulSoup  import BeautifulSoup
from django import template
from django.template.defaultfilters import stringfilter
import pygments
import pygments.formatters
import pygments.lexers


register = template.Library()

@register.filter
@stringfilter
def pygmentized(html):
    soup = BeautifulSoup(html)
    codeblocks = soup.findAll('code')
    for block in codeblocks:
        if block.has_key('class'):
            try:
                code = ''.join([unicode(item) for item in block.contents])
                lexer = pygments.lexers.get_lexer_by_name(block['class'], stripall=True)
                formatter = pygments.formatters.HtmlFormatter()
                code_hl = pygments.highlight(code, lexer, formatter)
                block.contents = [BeautifulSoup(code_hl)]
                block.name = 'code'
            except:
                raise
    return unicode(soup)
+3
source share
1 answer

Well, Petri is right, pre is for code blocks. Before he pointed it out, I just wrote a function to clear the first output, this is messy, but maybe for someone who just needs to remove certain things from the final output, it might seem like this is normal:

from BeautifulSoup  import BeautifulSoup
from django import template
from django.template.defaultfilters import stringfilter
import pygments
import pygments.formatters
import pygments.lexers


register = template.Library()
wanted = {'br': '<br />', 'BR': '<BR />', 'nbsp': '&nbsp;', 'NBSP': '&NBSP;', '/&gt;': ''}

def uglyfilter(html):
    content = BeautifulSoup(html)
    for node in content.findAll('span'):
        data = ''.join(node.findAll(text=True))
        if wanted.has_key(data):
            node.replaceWith(wanted.get(data))
    return unicode(content)     


@register.filter
@stringfilter
def pygmentized(html):
    soup = BeautifulSoup(html)
    codeblocks = soup.findAll('pre')
    for block in codeblocks:
        if block.has_key('class'):
            try:
                code = ''.join([unicode(item) for item in block.contents])
                lexer = pygments.lexers.get_lexer_by_name(block['class'], stripall=True)
                formatter = pygments.formatters.HtmlFormatter()
                code_hl = pygments.highlight(code, lexer, formatter)
                clean = uglyfilter(code_hl)
                block.contents = [BeautifulSoup(clean)]
                block.name = 'pre'
            except:
                raise
    return unicode(soup)
+1
source

All Articles