( ) -
.
-
Python2:
import re
persian_numbers = u'۱۲۳۴۵۶۷۸۹۰'
arabic_numbers = u'١٢٣٤٥٦٧٨٩٠'
english_numbers = u'1234567890'
persian_regexp = u"(%s)" % u"|".join(persian_numbers)
arabic_regexp = u"(%s)" % u"|".join(arabic_numbers)
def _sub(match_object, digits):
return english_numbers[digits.find(match_object.group(0))]
def _sub_arabic(match_object):
return _sub(match_object, arabic_numbers)
def _sub_persian(match_object):
return _sub(match_object, persian_numbers)
def replace_arabic(text):
return re.sub(arabic_regexp, _sub_arabic, text)
def replace_persian(text):
return re.sub(arabic_regexp, _sub_persian, text)
, "" .
(
lambdas , , )
, , , ,
-
, unicode ( u char), Python:
>>> persian_numbers = u'۱۲۳۴۵۶۷۸۹۰'
>>> english_numbers = u'1234567890'
>>> arabic_numbers = u'١٢٣٤٥٦٧٨٩٠'
>>>
>>> print int(persian_numbers)
1234567890
>>> print int(english_numbers)
1234567890
>>> print int(arabic_numbers)
1234567890
>>> persian_numbers.isdigit()
True
>>>
, "maketrans" unicode ( Python2 - . ).
unicode - , , , , char 26 . , , - , , , .
http://www.joelonsoftware.com/articles/Unicode.html - .
, , Python "" "encode" unicode.
>>> arabic_numbers = u'١٢٣٤٥٦٧٨٩٠'
>>> len(arabic_numbers)
10
>>> enc_arabic = arabic_numbers.encode("utf-8")
>>> print enc_arabic
١٢٣٤٥٦٧٨٩٠
>>> len(enc_arabic)
20
>>> int(enc_arabic)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: invalid literal for int() with base 10: '\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9\xd9\xa0'
, " " - ( str Python 2.x) justa strrng , , , - , GUI, , html- ..