This seems to work as expected.
>>> import re
>>> words=re.findall('\w+', open('/usr/share/dict/words').read().lower())
>>> len(words)
234936
>>>
bash-3.2$ wc /usr/share/dict/words
234936 234936 2486813 /usr/share/dict/words
? ?
, :
words=re.findall(r'\w+', open('/usr/share/dict/words').read())