So I once again ended with something I don't like very much. I wrote my custom javascript util that could be used like this:
localeHelper.diacritiqueComparison('čerešne', 'citron'); // = 1
localeHelper.diacritiqueComparison('čerešne', 'hrozno'); // = -1
// this is case insensitive
localeHelper.containsDiacritiqueText('štart', 'st'); // = true
localeHelper.containsDiacritiqueText('štart', 'št'); // = true
localeHelper.containsDiacritiqueText('štart', 'tar'); // = true
localeHelper.containsDiacritiqueText('trend', 'tr'); // = true
localeHelper.containsDiacritiqueText('trend', 'ťr'); // = false (as we explicitly want the symbol 'ť' and not 't')
localeHelper.containsDiacritiqueText('trend', 'b'); // = false (symbol 'b' is not present)
The final code looked like this:
function LocaleHelper() { // constructor
var i;
// source: http://en.wikipedia.org/wiki/Alphabets_derived_from_the_Latin
// supported languages :
// austro-bavarian, belarusian, croatian, czech, dutch, estonian, finish, french, german, hungarian, irish, italian,
// latvian, lithuanian, polish, portuquese, romanian, slovak, sorbian, spanish, swedish, turkish
//
// todo: not sure how to or if I should process german letter 'ß'
var localUpperVowelList = "ÁÀÂÄĂĀÃÅĄÆÉÈĖÊËĚĒĘÍÌİÎÏĪĮÓÒÔÖÕŐŒÚÙÛÜŬŪŰŮŲÝŸ";
var latinUpperVowelList = "AAAAAAAAAAEEEEEEEEIIIIIIIOOOOOOOUUUUUUUUUYY";
var localLowerVowelList = "áàâäăāãåąæéèėêëěēęıíìîïīįóòôöõőœúùûüŭūűůųýÿ";
var latinLowerVowelList = "aaaaaaaaaaeeeeeeeeiiiiiiiooooooouuuuuuuuuyy";
var localUpperConsonantList = "ĆČÇĎĐĞĢĶĹĻŁĽŃŇÑŅŔŘŚŠŞȘŤŢṬŹŻŽ";
var latinUpperConsonantList = "CCCDDGGKLLLLNNNNRRSSSSTTTZZZ";
var localLowerConsonantList = "ćčçďđğģķĺļłľńňñņŕřśšşșťţṭźżž";
var latinLowerConsonantList = "cccddggkllllnnnnrrsssstttzzz";
this.charMap = [];
for (i = 0; i < localUpperVowelList.length; i++) {
this.charMap[localUpperVowelList.charAt(i)] = latinUpperVowelList.charAt(i);
}
for (i = 0; i < localLowerVowelList.length; i++) {
this.charMap[localLowerVowelList.charAt(i)] = latinLowerVowelList.charAt(i);
}
for (i = 0; i < localUpperConsonantList.length; i++) {
this.charMap[localUpperConsonantList.charAt(i)] = latinUpperConsonantList.charAt(i);
}
for (i = 0; i < localLowerConsonantList.length; i++) {
this.charMap[localLowerConsonantList.charAt(i)] = latinLowerConsonantList.charAt(i);
}
}
LocaleHelper.prototype = {
removeCharDiacritique : function(charToProcess) {
var result = this.charMap[charToProcess];
if ((result == undefined) || (result == null)) {
result = charToProcess;
}
return result;
},
localeCharCompare : function(charA, charB) {
var newCharA = this.removeCharDiacritique(charA);
var newCharB = this.removeCharDiacritique(charB);
return (newCharA == newCharB) ? 0 : ((newCharA < newCharB) ? -1 : 1);
// removed: doesn't work on every browser
// return charA.localeCompare(charB);
},
isLatinLetter : function(character) {
return (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z');
},
// case sensitivity is used only when the words have the same letters (they are read the same way)
diacritiqueComparison : function(textA, textB) {
// todo: note: in Lithuanian alphabet the 'y' character is just before before 'j' - so this algorithm won't work properly
var result = 0;
var caseDiff = 0; // difference in case sensitiveness
var minLength = Math.min(textA.length, textB.length);
for (var i = 0; i < minLength; i++) {
var charA = textA.charAt(i);
var charB = textB.charAt(i);
var lowerA = charA.toLocaleLowerCase();
var lowerB = charB.toLocaleLowerCase();
result = this.localeCharCompare(lowerA, lowerB);
if (result == 0 && lowerA != lowerB) {
result = (lowerA < lowerB) ? -1 : 1;
}
if (result == 0) {
if (caseDiff == 0 && charA != charB) { // first most left difference in case is the only important one
caseDiff = (charA < charB) ? -1 : 1;
}
} else {
break;
}
}
if (result == 0) {
if (textA.length != textB.length) {
result = (textA.length < textB.length) ? -1 : 1;
} else {
result = caseDiff; // if the strings are identical let the case sensitive difference decide
}
}
return result;
},
containsDiacritiqueText : function (fullText, searchText) {
var textA = fullText;
var textB = searchText;
var result = false;
if (textB.length == 0) {
result = true;
} else if (textA.length >= textB.length) {
for (var i = 0 ; i < textA.length - textB.length + 1; i++) {
var found = true;
for (var j = 0; j < textB.length; j++) {
var charA = textA.charAt(i + j).toLocaleLowerCase();
var charB = textB.charAt(j).toLocaleLowerCase();
if (charA != charB) {
if (this.localeCharCompare(charA, charB) != 0) {
found = false;
break;
} else if (!this.isLatinLetter(charB)) {
found = false;
break;
}
}
}
if (found === true) {
result = true;
break;
}
}
}
return result;
}
};
var localeHelper = new LocaleHelper();
To make the translation of Java strings a less hellish process, you could use the software localization platform https://poeditor.com/ to collaboratively translate the strings.
ReplyDelete