JavaScript equivalent of Java Charset / String class combination for decoding byte arrays

In Java, if we know the encoding for a byte array, we can decode it and get the corresponding characters as follows:

Charset charset = Charset.forName(encoding);
String decodedString = new String(byteArray, charset);

How can I achieve the same result in JavaScript?

Suppose I read a file that I know is encoded by Windows-1253 (Greek). To correctly display the contents of a file, I would have to decode the bytes in the file.

If we don’t decode (or do not open the file in a text editor that does not know the encoding), we can see something like this -

ÁõôÞ åßíáé ç åëëçíéêÞ.

But when this text (i.e. bytes) is decoded, we get

Αυτή είναι η ελληνική.
+3
source share
2 answers

JavaScript UTF-16. ECMAScript

0

, :

var getString = function (strBytes) {

    var MAX_SIZE = 0x4000;
    var codeUnits = [];
    var highSurrogate;
    var lowSurrogate;
    var index = -1;

    var result = '';

    while (++index < strBytes.length) {
        var codePoint = Number(strBytes[index]);

    if (codePoint === (codePoint & 0x7F)) {


    } else if (0xF0 === (codePoint & 0xF0)) {
        codePoint ^= 0xF0;
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
    } else if (0xE0 === (codePoint & 0xE0)) {
        codePoint ^= 0xE0;
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
    } else if (0xC0 === (codePoint & 0xC0)) {
        codePoint ^= 0xC0;
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
    } 

        if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
            throw RangeError('Invalid code point: ' + codePoint);

        if (codePoint <= 0xFFFF)
            codeUnits.push(codePoint);
        else {
            codePoint -= 0x10000;
            highSurrogate = (codePoint >> 10) | 0xD800;
            lowSurrogate = (codePoint % 0x400) | 0xDC00;
            codeUnits.push(highSurrogate, lowSurrogate);
        }
        if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
            result += String.fromCharCode.apply(null, codeUnits);
            codeUnits.length = 0;
        }
    }

    return result;
}

!

0

All Articles