String.maketrans for English and Persian numbers

I have a function like this:

persian_numbers = '۱۲۳۴۵۶۷۸۹۰'
english_numbers = '1234567890'
arabic_numbers  = '١٢٣٤٥٦٧٨٩٠'

english_trans   = string.maketrans(english_numbers, persian_numbers)
arabic_trans    = string.maketrans(arabic_numbers, persian_numbers)

text.translate(english_trans)
text.translate(arabic_trans)

I want him to translate all Arabic and English numbers into Persian. But Python says:

english_translate = string.maketrans(english_numbers, persian_numbers)
ValueError: maketrans arguments must have same length

I tried to encode strings using Unicode utf-8, but I always had some errors! Sometimes the problem is the Arabic line! Do you know the best solution for this job?

EDIT:

The problem seems to be the length of Unicode characters in ASCII. An Arabic number like "1" is the two characters that I found with ord(). And the length problem starts here: - (

+6
source share
4 answers

unidecode, UTF8. .

Python 2:

>>> from unidecode import unidecode
>>> a = unidecode(u"۰۱۲۳۴۵۶۷۸۹")
>>> a
'0123456789'
>>> unidecode(a)
'0123456789'

Python 3:

>>> from unidecode import unidecode
>>> a = unidecode("۰۱۲۳۴۵۶۷۸۹")
>>> a
'0123456789'
>>> unidecode(a)
'0123456789'
+10

( ) - .

- Python2:

# coding: utf-8

import re

# Attention: while the characters for the strings bellow are 
# dislplayed indentically, inside they are represented
# by distinct unicode codepoints

persian_numbers = u'۱۲۳۴۵۶۷۸۹۰'
arabic_numbers  = u'١٢٣٤٥٦٧٨٩٠'
english_numbers = u'1234567890'


persian_regexp = u"(%s)" %  u"|".join(persian_numbers)
arabic_regexp = u"(%s)" % u"|".join(arabic_numbers)

def _sub(match_object, digits):
    return english_numbers[digits.find(match_object.group(0))]

def _sub_arabic(match_object):
    return _sub(match_object, arabic_numbers)

def _sub_persian(match_object):
    return _sub(match_object, persian_numbers)


def replace_arabic(text):
    return re.sub(arabic_regexp, _sub_arabic, text)

def replace_persian(text):
    return re.sub(arabic_regexp, _sub_persian, text)

, "" .

( lambdas , , )

, , , ,

-

, unicode ( u char), Python:

>>> persian_numbers = u'۱۲۳۴۵۶۷۸۹۰'
>>> english_numbers = u'1234567890'
>>> arabic_numbers  = u'١٢٣٤٥٦٧٨٩٠'
>>> 
>>> print int(persian_numbers)
1234567890
>>> print int(english_numbers)
1234567890
>>> print int(arabic_numbers)
1234567890
>>> persian_numbers.isdigit()
True
>>> 

, "maketrans" unicode ( Python2 - . ).

unicode - , , , , char 26 . , , - , , , .

http://www.joelonsoftware.com/articles/Unicode.html - . , , Python "" "encode" unicode.

>>> arabic_numbers  = u'١٢٣٤٥٦٧٨٩٠'
>>> len(arabic_numbers)
10
>>> enc_arabic = arabic_numbers.encode("utf-8")
>>> print enc_arabic
١٢٣٤٥٦٧٨٩٠
>>> len(enc_arabic)
20
>>> int(enc_arabic)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: invalid literal for int() with base 10: '\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9\xd9\xa0'

, " " - ( str Python 2.x) justa strrng , , , - , GUI, , html- ..

+11

Unicode:

persian_numbers = u'۱۲۳۴۵۶۷۸۹۰'
english_numbers = u'1234567890'
arabic_numbers  = u'١٢٣٤٥٦٧٨٩٠'

, Python .

+1

persiantools:

:

>>> from persiantools import digits

>>> digits.en_to_fa("0987654321")
'۰۹۸۷۶۵۴۳۲۱'

>>> digits.ar_to_fa("٠٩٨٧٦٥٤٣٢١")   # or digits.ar_to_fa(u"٠٩٨٧٦٥٤٣٢١")
'۰۹۸۷۶۵۴۳۲۱'
0

All Articles