Sort string with non-western characters

I wanted to print the sorted Polish names of all available languages.

import java.util.*;

public class Tmp
{
  public static void main(String... args)
  {
    Locale.setDefault(new Locale("pl","PL"));
    Locale[] locales = Locale.getAvailableLocales();
    ArrayList<String> langs = new ArrayList<String>();
    for(Locale loc: locales) {
      String  lng = loc.getDisplayLanguage();
      if(!lng.trim().equals("") && ! langs.contains(lng)){
        langs.add(lng);
      }
    }
    Collections.sort(langs);
    for(String str: langs){
      System.out.println(str);
    }
  }
}

Unfortunately, I have a problem with the sort part. Output:

:
:
kataloński
koreański
litewski
macedoński
:
:
węgierski
włoski
łotewski

Unfortunately, in Polish łappears after land before m, so the conclusion should be:

:
:
kataloński
koreański
litewski
łotewski
macedoński
:
:
węgierski
włoski

How can i do this? Is there a universal non-language method (say, now I want to display this and sort it in another language using other sorting rules).

+5
source share
5 answers

You must pass Collator to the sort method:

// sort according to default locale
Collections.sort(langs, Collator.getInstance());

The default sort order is determined by the Unicode code points in the string and is not the correct alphabetical order in any language.

+4
source

try

Collections.sort(langs, Collator.getInstance(new Locale("pl", "PL")));

he will produce

...
litewski
łotewski
...

. API Collator

+6

java.text.Collator.newInstance(Locale). . Collators Comparator, API , TreeSet.

+2
+1

. , android 7.0, Android. . ( 3000 ) Android.

public class SortBasedOnName implements Comparator {

    private Map<Character, Integer> myCharMap;
    private final static Map<Character, Integer>myPolCharTable = new HashMap<Character, Integer>();
    static {
        myPolCharTable.put(' ',0x0020);
        myPolCharTable.put('!',0x0021);
        myPolCharTable.put('"',0x0022);


        myPolCharTable.put('a',0x0040);
        myPolCharTable.put('ą',0x0041);
        myPolCharTable.put('b',0x0042);
        myPolCharTable.put('c',0x0043);
        myPolCharTable.put('ć',0x0044);


        myPolCharTable.put('{',0x0066);
        myPolCharTable.put('|',0x0067);
        myPolCharTable.put('}',0x0068);
    }

    public SortBasedOnName() {}

    public int compare(Object o1, Object o2) {

        Dictionary dd1 = (Dictionary) o1;
        Dictionary dd2 = (Dictionary) o2;

    return strCompareWithDiacritics(dd1.getOriginal(), dd2.getOriginal());
    }

    private  int strCompareWithDiacritics(String s1, String s2) {

        int i = 0;
        int result = 0;
        int length =0;

        s1 = s1.toLowerCase();
        s2 = s2.toLowerCase();
        if (s1.length() > s2.length()) {
            result = 1;
            length = s2.length();
        } else if (s1.length() < s2.length()) {
            result = -1;
            length = s1.length();
        } else if (s1.length() == s2.length()) {
            result = 0;
            length = s1.length();
        }

        try {
            while (i <length) {
                if (myPolCharTable.get(s1.charAt(i)) > myPolCharTable.get(s2.charAt(i))) {
                    result = 1;
                    break;
                } else if (myPolCharTable.get(s1.charAt(i)) < myPolCharTable.get(s2.charAt(i))) {
                    result = -1;
                    break;
                }
                i++;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }
}
0
source

All Articles