返回列表

1st place solution /w code

363. Bengali.AI Handwritten Grapheme Classification | bengaliai-cv19

开始: 2019-12-19 结束: 2020-03-16 计算机视觉 数据算法赛
第一名解决方案 --- 基于CycleGAN的零样本学习
作者:deoxy (Master) | 排名:第1名 | 发布时间:2020-03-17

第一名解决方案 --- 基于CycleGAN的零样本学习

类别与标签

我没有对字符的组成部分进行推断。换句话说,我所有的模型都是针对 14784 (168 * 11 * 8) 个类别进行分类的。因此,我需要知道哪些标签组合构成了 Grapheme(字素)。

我从给定训练数据的标签中预测了标签组合与 Grapheme 之间的关系,并创建了以下代码。

class_map = pd.read_csv('../input/bengaliai-cv19/class_map.csv')
grapheme_root = class_map[class_map['component_type'] == 'grapheme_root']
vowel_diacritic = class_map[class_map['component_type'] == 'vowel_diacritic']
consonant_diacritic = class_map[class_map['component_type'] == 'consonant_diacritic']
grapheme_root_list = grapheme_root['component'].tolist()
vowel_diacritic_list = vowel_diacritic['component'].tolist()
consonant_diacritic_list = consonant_diacritic['component'].tolist()

def label_to_grapheme(grapheme_root, vowel_diacritic, consonant_diacritic):
    if consonant_diacritic == 0:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root]
        else:
            return grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic]
    elif consonant_diacritic == 1:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            return grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic] + consonant_diacritic_list[consonant_diacritic]
    elif consonant_diacritic == 2:
        if vowel_diacritic == 0:
            return consonant_diacritic_list[consonant_diacritic] + grapheme_root_list[grapheme_root]
        else:
            return consonant_diacritic_list[consonant_diacritic] + grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic]
    elif consonant_diacritic == 3:
        if vowel_diacritic == 0:
            return consonant_diacritic_list[consonant_diacritic][:2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic][1:]
        else:
            return consonant_diacritic_list[consonant_diacritic][:2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic][1:] + vowel_diacritic_list[vowel_diacritic]
    elif consonant_diacritic == 4:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            if grapheme_root == 123 and vowel_diacritic == 1:
                return grapheme_root_list[grapheme_root] + '\u200d' + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
            return grapheme_root_list[grapheme_root]  + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
    elif consonant_diacritic == 5:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
    elif consonant_diacritic == 6:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic] + vowel_diacritic_list[vowel_diacritic]
    elif consonant_diacritic == 7:
        if vowel_diacritic == 0:
            return consonant_diacritic_list[2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[2][