Update
Back from lunch =)
I'm afraid that the previous won't work this well with any foreign language
So i added another fiddle with a possible way
var UnicodeNsm = [Array 1280] //It holds all escaped Unicode Non Space Marks
function countNSMString(str) {
var chars = str.split("");
var count = 0;
for (var i = 0,ilen = chars.length;i<ilen;i++) {
if(UnicodeNsm.indexOf(escape(chars[i])) == -1) {
count++;
}
}
return count;
}
var English = "Mother";
var Tamil = "????";
var Vietnamese = "m?"
var Hindi = "???"
function logL (str) {
console.log(str + " has " + countNSMString(str) + " visible Characters and " + str.length + " normal Characters" ); //"???? has 3 visible Characters"
}
logL(English) //"Mother has 6 visible Characters and 6 normal Characters"
logL(Tamil) //"???? has 3 visible Characters and 4 normal Characters"
logL(Vietnamese) //"m? has 2 visible Characters and 3 normal Characters"
logL(Hindi) //"??? has 1 visible Characters and 3 normal Characters"
So this just checks if theres any Character in the String which is a Unicode NSM character and ignores the count for this, this should work for the Most languages, not Tamil only,
And an array with 1280 Elements shouldn't be that big of a performance issue
Here is a list with the Unicode NSM's
http://www.fileformat.info/info/unicode/category/Mn/list.htm
Here is the according JSBin
After experimenting a bit with string operations, it turns out
String.indexOf
returns the same for
"??"
and for "?"
meaning
"????".indexOf("??") == "????".indexOf("?" + "?") //true
but
"????".indexOf("?") == "????".indexOf("?" + "?")
//false
I took this opportunity and tried something like this
//??
var char = "?????????";
var char2 = "??????????";
var char3 = "???????????";
function countStr(str) {
var chars = str.split("");
var count = 0;
for(var i = 0, ilen = chars.length;i<ilen;i++) {
var chars2 = chars[i] + chars[i+1];
if (str.indexOf(chars[i]) == str.indexOf(chars2))
i += 1;
count++;
}
return count;
}
console.log("--");
console.log(countStr(char)); //6
console.log(countStr(char2)); //7
console.log(countStr(char3)); //7
Which seems to work for the String above, it may take some adjustments, as i don't know a thing about Encoding and stuff, but maybe its a point you can begin with
Heres the JSBin
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…