数据集:
cfilt/iwn_wordlists
我们提供来自知识库的独特词语列表。
from datasets import load_dataset
language = "hindi" // supported languages: assamese, bengali, bodo, gujarati, hindi, kannada, kashmiri, konkani, malayalam, manipuri, marathi, meitei, nepali, oriya, punjabi, sanskrit, tamil, telugu, urdu.
words = load_dataset("cfilt/iwn_wordlists", language)
word_list = words["train"]["word"]
@inproceedings{bhattacharyya2010indowordnet,
title={IndoWordNet},
author={Bhattacharyya, Pushpak},
booktitle={Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)},
year={2010}
}