363. Bengali.AI Handwritten Grapheme Classification | bengaliai-cv19

我没有对字符的组成部分进行推断。换句话说,我所有的模型都是针对 14784 (168 * 11 * 8) 个类别进行分类的。因此,我需要知道哪些标签组合构成了 Grapheme(字素)。
我从给定训练数据的标签中预测了标签组合与 Grapheme 之间的关系,并创建了以下代码。
class_map = pd.read_csv('../input/bengaliai-cv19/class_map.csv')
grapheme_root = class_map[class_map['component_type'] == 'grapheme_root']
vowel_diacritic = class_map[class_map['component_type'] == 'vowel_diacritic']
consonant_diacritic = class_map[class_map['component_type'] == 'consonant_diacritic']
grapheme_root_list = grapheme_root['component'].tolist()
vowel_diacritic_list = vowel_diacritic['component'].tolist()
consonant_diacritic_list = consonant_diacritic['component'].tolist()
def label_to_grapheme(grapheme_root, vowel_diacritic, consonant_diacritic):
if consonant_diacritic == 0:
if vowel_diacritic == 0:
return grapheme_root_list[grapheme_root]
else:
return grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic]
elif consonant_diacritic == 1:
if vowel_diacritic == 0:
return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
else:
return grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic] + consonant_diacritic_list[consonant_diacritic]
elif consonant_diacritic == 2:
if vowel_diacritic == 0:
return consonant_diacritic_list[consonant_diacritic] + grapheme_root_list[grapheme_root]
else:
return consonant_diacritic_list[consonant_diacritic] + grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic]
elif consonant_diacritic == 3:
if vowel_diacritic == 0:
return consonant_diacritic_list[consonant_diacritic][:2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic][1:]
else:
return consonant_diacritic_list[consonant_diacritic][:2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic][1:] + vowel_diacritic_list[vowel_diacritic]
elif consonant_diacritic == 4:
if vowel_diacritic == 0:
return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
else:
if grapheme_root == 123 and vowel_diacritic == 1:
return grapheme_root_list[grapheme_root] + '\u200d' + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
elif consonant_diacritic == 5:
if vowel_diacritic == 0:
return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
else:
return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
elif consonant_diacritic == 6:
if vowel_diacritic == 0:
return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
else:
return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
elif consonant_diacritic == 7:
if vowel_diacritic == 0:
return consonant_diacritic_list[2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[2][