जावा, 3416 बाइट्स, 62%
यह मेरा समाधान है, मैं दिए गए शब्दों की सूची का विश्लेषण करता हूं और प्रत्येक भाषा के लिए 60 सबसे अधिक कॉमन्स बिगोग्राम और ट्रिगर्स ढूंढता हूं। अब मैं शब्द के खिलाफ अपने n- ग्राम की जाँच कर रहा हूं, और शब्द में अधिकांश n-ग्राम के साथ भाषा चुन रहा हूं।
public class Classificator {
String[][] triGr = {
{"ing","ion","ent","tio","ted","nce","ter","res","ati","con","ess","ate","pro","ain","est","ons","men","ect","red","rea","com","ere","ers","nte","ine","her","ble","ist","tin","for","per","der","ear","str","ght","pre","ver","int","nde","the","igh","ive","sta","ure","end","enc","ned","ste","dis","ous","all","and","anc","ant","oun","ten","tra","are","sed","cti"},
{"sch","che","ver","gen","ten","cht","ich","ein","ste","ter","hen","nde","nge","ach","ere","ung","den","sse","ers","and","eit","ier","ren","sen","ges","ang","ben","rei","est","nen","nte","men","aus","der","ent","hei","her","lle","ern","ert","uch","ine","ehe","auf","lie","tte","ige","ing","hte","mme","end","wei","len","hre","rau","ite","bes","ken","cha","ebe"},
{"ent","are","ato","nte","ett","ere","ion","chi","con","one","men","nti","gli","pre","ess","att","tto","par","per","sta","tra","zio","and","iam","end","ter","res","est","nto","tta","acc","sci","cia","ver","ndo","amo","ant","str","tro","ssi","pro","era","eri","nta","der","ate","ort","com","man","tor","rat","ell","ale","gio","ont","col","tti","ano","ore","ist"},
{"sze","ere","meg","ett","gye","ele","ond","egy","enn","ott","tte","ete","unk","ban","tem","agy","zer","esz","tet","ara","nek","hal","dol","mon","art","ala","ato","szt","len","men","ben","kap","ent","min","ndo","eze","sza","isz","fog","kez","ind","ten","tam","nak","fel","ene","all","asz","gon","mar","zem","szo","tek","zet","elm","het","eve","ssz","hat","ell"}
};
static String[][] biGr = {
{"in","ed","re","er","es","en","on","te","ng","st","nt","ti","ar","le","an","se","de","at","ea","co","ri","ce","or","io","al","is","it","ne","ra","ro","ou","ve","me","nd","el","li","he","ly","si","pr","ur","th","di","pe","la","ta","ss","ns","nc","ll","ec","tr","as","ai","ic","il","us","ch","un","ct"},
{"en","er","ch","te","ge","ei","st","an","re","in","he","ie","be","sc","de","es","le","au","se","ne","el","ng","nd","un","ra","ar","nt","ve","ic","et","me","ri","li","ss","it","ht","ha","la","is","al","eh","ll","we","or","ke","fe","us","rt","ig","on","ma","ti","nn","ac","rs","at","eg","ta","ck","ol"},
{"re","er","to","ar","en","te","ta","at","an","nt","ra","ri","co","on","ti","ia","or","io","in","st","tt","ca","es","ro","ci","di","li","no","ma","al","am","ne","me","le","sc","ve","sa","si","tr","nd","se","pa","ss","et","ic","na","pe","de","pr","ol","mo","do","so","it","la","ce","ie","is","mi","cc"},
{"el","en","sz","te","et","er","an","me","ta","on","al","ar","ha","le","gy","eg","re","ze","em","ol","at","ek","es","tt","ke","ni","la","ra","ne","ve","nd","ak","ka","in","am","ad","ye","is","ok","ba","na","ma","ed","to","mi","do","om","be","se","ag","as","ez","ot","ko","or","cs","he","ll","nn","ny"}
};
public int guess(String word) {
if (word.length() < 3) {
return 4; // most words below 2 characters on list are hungarians
}
int score[] = { 0, 0, 0, 0 };
for (int i = 0; i < 4; i++) {
for (String s : triGr[i]) {
if (word.contains(s)) {
score[i] = score[i] + 2;
}
}
for (String s : biGr[i]) {
if (word.contains(s)) {
score[i] = score[i] + 1;
}
}
}
int v = -1;
int max = 0;
for (int i = 0; i < 4; i++) {
if (score[i] > max) {
max = score[i];
v = i;
}
}
v++;
return v==0?Math.round(4)+1:v;
}
}
और यह मेरी परीक्षा है
public class Test {
Map<String, List<Integer>> words = new HashMap<String, List<Integer>>();
boolean validate(String word, Integer lang) {
List<Integer> langs = words.get(word);
return langs.contains(lang);
}
public static void main(String[] args) throws FileNotFoundException {
FileReader reader = new FileReader("list.txt");
BufferedReader buf = new BufferedReader(reader);
Classificator cl = new Classificator();
Test test = new Test();
buf.lines().forEach(x -> test.process(x));
int guess = 0, words = 0;
for (String word : test.words.keySet()) {
int lang = cl.guess(word);
if (lang==0){
continue;
}
boolean result = test.validate(word, lang);
words++;
if (result) {
guess++;
}
}
System.out.println(guess+ " "+words+ " "+(guess*100f/words));
}
private void process(String x) {
String arr[] = x.split("\\s+");
String word = arr[0].trim();
List<Integer> langs = words.get(word);
if (langs == null) {
langs = new ArrayList<Integer>();
words.put(word, langs);
}
langs.add(Integer.parseInt(arr[1].trim()));
}
}