数据集:
cfilt/iwn_wordlists
 
 我们提供来自知识库的独特词语列表。
from datasets import load_dataset
language = "hindi" // supported languages: assamese, bengali, bodo, gujarati, hindi, kannada, kashmiri, konkani, malayalam, manipuri, marathi, meitei, nepali, oriya, punjabi, sanskrit, tamil, telugu, urdu.
words = load_dataset("cfilt/iwn_wordlists", language)
word_list = words["train"]["word"]
 @inproceedings{bhattacharyya2010indowordnet,
  title={IndoWordNet},
  author={Bhattacharyya, Pushpak},
  booktitle={Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)},
  year={2010}
}