[漢字/Kanji], - . , 竜, , . , . , "" . hiragana/katakana + kanji, , . , , .
, , , , : kZVariant char. , kSpecializedSemanticVariant .内 內 , - , ( ).
-, , , script. . , . - .
EDIT:
http://pastebin.com/e276zn6y
:
, Unicode.org... , Unihan - CJK. , 3. , kXXX Unihan, A/I , OP, B/it , OP . . . "" , ( "" + "" ), . , , "" + "" (, "" ), . , "" "" / "" .
QUICK TEST
Some code to be used with a function previously associated with it.
function guessLanguage(x) {
var results={};
var s='';
var i,j=x.length;
for(i=0;i<j;i++) {
s=scriptName(x.substr(i,1));
if(results.hasOwnProperty(s)) {
results[s]+=1;
} else {
results[s]=1;
}
}
console.log(results);
mostCount=0;
mostName='';
for(x in results) {
if (results.hasOwnProperty(x)) {
if(results[x]>mostCount) {
mostCount=results[x];
mostName=x;
}
}
}
return mostName;
}
Some tests:
r=guessLanguage("外人だけど、日本語をペラペラしゃべるよ!");
Object
Common: 2
Han: 5
Hiragana: 9
Katakana: 4
__proto__: Object
"Hiragana"
The object rcontains the number of occurrences of each script. Hiragana is the most frequent, while Hiragana + Katakana → 2/3 sentences.
r=guessLanguage("我唔知道,佢講乜話.")
Object
Common: 2
Han: 8
__proto__: Object
"Han"
An obvious case of Chinese (Cantonese in this case).
r=guessLanguage("中國이 韓國보다 훨씬 크지만, 꼭 아름다운 나라가 아니다...");
Object
Common: 11
Han: 4
Hangul: 19
__proto__: Object
"Hangul"
Some khan characters and a lot of hangulas. Korean offer, no doubt.