[ Skip to the content ]

Institute of Formal and Applied Linguistics Wiki


[ Back to the navigation ]

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision Both sides next revision
user:zeman:treebanks:hi [2012/10/02 16:13]
zeman HTB 0.5 for the 2012 shared task.
user:zeman:treebanks:hi [2012/10/02 16:25]
zeman HPST 2012 sample.
Line 599: Line 599:
 | <nowiki>36</nowiki> | <nowiki>करेंगे</nowiki> | <nowiki>कर</nowiki> | <nowiki>VM</nowiki> | <nowiki>v</nowiki> | <nowiki>lex-kara|cat-v|gend-m|num-pl|pers-3|case-|vib-gA|tam-gA|posn-360|name-kareMge|chunkId-VGF|chunkType-head:VGF</nowiki> | <nowiki>0</nowiki> | <nowiki>main</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | <nowiki>36</nowiki> | <nowiki>करेंगे</nowiki> | <nowiki>कर</nowiki> | <nowiki>VM</nowiki> | <nowiki>v</nowiki> | <nowiki>lex-kara|cat-v|gend-m|num-pl|pers-3|case-|vib-gA|tam-gA|posn-360|name-kareMge|chunkId-VGF|chunkType-head:VGF</nowiki> | <nowiki>0</nowiki> | <nowiki>main</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 | <nowiki>37</nowiki> | <nowiki>.</nowiki> | <nowiki>.</nowiki> | <nowiki>SYM</nowiki> | <nowiki>punc</nowiki> | <nowiki>lex-.|cat-punc|gend-|num-|pers-|case-|vib-|tam-|posn-370|chunkType-child:VGF|name-.</nowiki> | <nowiki>36</nowiki> | <nowiki>rsym</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | <nowiki>37</nowiki> | <nowiki>.</nowiki> | <nowiki>.</nowiki> | <nowiki>SYM</nowiki> | <nowiki>punc</nowiki> | <nowiki>lex-.|cat-punc|gend-|num-|pers-|case-|vib-|tam-|posn-370|chunkType-child:VGF|name-.</nowiki> | <nowiki>36</nowiki> | <nowiki>rsym</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +
 +The first sentence of the HPST 2012 training data in UTF8 CoNLL format with gold-standard morphology:
 +
 +| 1 | <nowiki>गुजरात</nowiki> | <nowiki>गुजरात</nowiki> | NNP | n | <nowiki>lex-गुजरात|cat-n|gen-m|num-sg|pers-3|case-o|vib-0_का|tam-0|chunkId-NP|chunkType-head|stype-|voicetype-</nowiki> | 3 | r6 | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 2 | <nowiki>के</nowiki> | <nowiki>का</nowiki> | PSP | psp | <nowiki>lex-का|cat-psp|gen-m|num-sg|pers-|case-o|vib-|tam-|chunkId-NP|chunkType-child|stype-|voicetype-</nowiki> | 1 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 3 | <nowiki>मुख्यमंत्री</nowiki> | <nowiki>मुख्यमंत्री</nowiki> | NNP | n | <nowiki>lex-मुख्यमंत्री|cat-n|gen-m|num-sg|pers-3|case-o|vib-0|tam-0|chunkId-NP2|chunkType-head|stype-|voicetype-</nowiki> | 5 | nmod | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 4 | <nowiki>नरेंद्र</nowiki> | <nowiki>नरेंद्र</nowiki> | NNPC | n | <nowiki>lex-नरेंद्र|cat-n|gen-m|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP3|chunkType-child|stype-|voicetype-</nowiki> | 5 | <nowiki>pof__cn</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 5 | <nowiki>मोदी</nowiki> | <nowiki>मोदी</nowiki> | NNP | n | <nowiki>lex-मोदी|cat-n|gen-m|num-sg|pers-3|case-o|vib-0_ने|tam-0|chunkId-NP3|chunkType-head|stype-|voicetype-</nowiki> | 32 | k1 | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 6 | <nowiki>ने</nowiki> | <nowiki>ने</nowiki> | PSP | psp | <nowiki>lex-ने|cat-psp|gen-|num-|pers-|case-|vib-|tam-|chunkId-NP3|chunkType-child|stype-|voicetype-</nowiki> | 5 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 7 | <nowiki>मंगलवार</nowiki> | <nowiki>मंगलवार</nowiki> | NNP | n | <nowiki>lex-मंगलवार|cat-n|gen-m|num-sg|pers-3|case-o|vib-0_को|tam-0|chunkId-NP4|chunkType-head|stype-|voicetype-</nowiki> | 32 | k7t | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 8 | <nowiki>को</nowiki> | <nowiki>को</nowiki> | PSP | psp | <nowiki>lex-को|cat-psp|gen-|num-|pers-|case-|vib-|tam-|chunkId-NP4|chunkType-child|stype-|voicetype-</nowiki> | 7 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 9 | <nowiki>गृह</nowiki> | <nowiki>गृह</nowiki> | NNPC | n | <nowiki>lex-गृह|cat-n|gen-m|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP5|chunkType-child|stype-|voicetype-</nowiki> | 10 | <nowiki>pof__cn</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 10 | <nowiki>मंत्री</nowiki> | <nowiki>मंत्री</nowiki> | NNP | n | <nowiki>lex-मंत्री|cat-n|gen-m|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP5|chunkType-child|stype-|voicetype-</nowiki> | 12 | <nowiki>nmod__adj</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 11 | <nowiki>शिवराज</nowiki> | <nowiki>शिवराज</nowiki> | NNPC | n | <nowiki>lex-शिवराज|cat-n|gen-m|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP5|chunkType-child|stype-|voicetype-</nowiki> | 12 | <nowiki>pof__cn</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 12 | <nowiki>पाटिल</nowiki> | <nowiki>पाटिल</nowiki> | NNP | n | <nowiki>lex-पाटिल|cat-n|gen-m|num-sg|pers-3|case-o|vib-0_से|tam-0|chunkId-NP5|chunkType-head|stype-|voicetype-</nowiki> | 32 | k4 | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 13 | <nowiki>से</nowiki> | <nowiki>से</nowiki> | PSP | psp | <nowiki>lex-से|cat-psp|gen-|num-|pers-|case-|vib-|tam-|chunkId-NP5|chunkType-child|stype-|voicetype-</nowiki> | 12 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 14 | <nowiki>मुलाकात</nowiki> | <nowiki>मुलाकात</nowiki> | NN | n | <nowiki>lex-मुलाकात|cat-n|gen-f|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP6|chunkType-head|stype-|voicetype-</nowiki> | 15 | pof | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 15 | कर | कर | VM | v | <nowiki>lex-कर|cat-v|gen-any|num-any|pers-any|case-|vib-0|tam-0|chunkId-VGNF|chunkType-head|stype-|voicetype-</nowiki> | 32 | vmod | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 16 | आईएएस | आईएएस | NNP | n | <nowiki>lex-आईएएस|cat-n|gen-m|num-sg|pers-3|case-o|vib-0|tam-0|chunkId-NP7|chunkType-head|stype-|voicetype-</nowiki> | 17 | ccof | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 17 | और | और | CC | avy | <nowiki>lex-और|cat-avy|gen-|num-|pers-|case-|vib-|tam-|chunkId-CCP|chunkType-head|stype-|voicetype-</nowiki> | 20 | r6 | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 18 | <nowiki>आईपीएस</nowiki> | <nowiki>आईपीएस</nowiki> | NNP | n | <nowiki>lex-आईपीएस|cat-n|gen-m|num-sg|pers-3|case-o|vib-0_का|tam-0|chunkId-NP8|chunkType-head|stype-|voicetype-</nowiki> | 17 | ccof | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 19 | <nowiki>की</nowiki> | <nowiki>का</nowiki> | PSP | psp | <nowiki>lex-का|cat-psp|gen-f|num-sg|pers-|case-o|vib-|tam-|chunkId-NP8|chunkType-child|stype-|voicetype-</nowiki> | 18 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 20 | <nowiki>तर्ज</nowiki> | <nowiki>तर्ज</nowiki> | NN | n | <nowiki>lex-तर्ज|cat-n|gen-f|num-sg|pers-3|case-o|vib-0_पर|tam-0|chunkId-NP9|chunkType-head|stype-|voicetype-</nowiki> | 32 | k7 | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 21 | पर | पर | PSP | psp | <nowiki>lex-पर|cat-psp|gen-|num-|pers-|case-|vib-|tam-|chunkId-NP9|chunkType-child|stype-|voicetype-</nowiki> | 20 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 22 | <nowiki>राष्ट्रीय</nowiki> | <nowiki>राष्ट्रीय</nowiki> | JJ | adj | <nowiki>lex-राष्ट्रीय|cat-adj|gen-any|num-any|pers-|case-o|vib-|tam-|chunkId-NP10|chunkType-child|stype-|voicetype-</nowiki> | 23 | <nowiki>nmod__adj</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 23 | <nowiki>स्तर</nowiki> | <nowiki>स्तर</nowiki> | NN | n | <nowiki>lex-स्तर|cat-n|gen-m|num-sg|pers-3|case-o|vib-0_पर|tam-0|chunkId-NP10|chunkType-head|stype-|voicetype-</nowiki> | 32 | k7 | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 24 | पर | पर | PSP | psp | <nowiki>lex-पर|cat-psp|gen-|num-|pers-|case-|vib-|tam-|chunkId-NP10|chunkType-child|stype-|voicetype-</nowiki> | 23 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 25 | एक | एक | QC | num | <nowiki>lex-एक|cat-num|gen-any|num-any|pers-|case-any|vib-|tam-|chunkId-NP11|chunkType-child|stype-|voicetype-</nowiki> | 27 | <nowiki>nmod__adj</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 26 | <nowiki>खुफिया</nowiki> | <nowiki>खुफिया</nowiki> | JJ | adj | <nowiki>lex-खुफिया|cat-adj|gen-any|num-any|pers-|case-d|vib-|tam-|chunkId-NP11|chunkType-child|stype-|voicetype-</nowiki> | 27 | <nowiki>nmod__adj</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 27 | <nowiki>सेवा</nowiki> | <nowiki>सेवा</nowiki> | NN | n | <nowiki>lex-सेवा|cat-n|gen-f|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP11|chunkType-head|stype-|voicetype-</nowiki> | 29 | k2 | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 28 | <nowiki>शुरू</nowiki> | <nowiki>शुरू</nowiki> | NN | n | <nowiki>lex-शुरू|cat-n|gen-m|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP12|chunkType-head|stype-|voicetype-</nowiki> | 29 | pof | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 29 | <nowiki>करने</nowiki> | कर | VM | v | <nowiki>lex-कर|cat-v|gen-any|num-sg|pers-any|case-o|vib-ना_का|tam-nA|chunkId-VGNN|chunkType-head|stype-|voicetype-</nowiki> | 31 | <nowiki>r6-k2</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 30 | <nowiki>का</nowiki> | <nowiki>का</nowiki> | PSP | psp | <nowiki>lex-का|cat-psp|gen-m|num-sg|pers-|case-d|vib-|tam-|chunkId-VGNN|chunkType-child|stype-|voicetype-</nowiki> | 29 | <nowiki>lwg__psp</nowiki> | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 31 | <nowiki>अनुरोध</nowiki> | <nowiki>अनुरोध</nowiki> | NN | n | <nowiki>lex-अनुरोध|cat-n|gen-m|num-sg|pers-3|case-d|vib-0|tam-0|chunkId-NP13|chunkType-head|stype-|voicetype-</nowiki> | 32 | pof | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 32 | <nowiki>किया</nowiki> | कर | VM | v | <nowiki>lex-कर|cat-v|gen-m|num-sg|pers-any|case-|vib-या|tam-yA|chunkId-VGF|chunkType-head|stype-declarative'>|voicetype-active</nowiki> | 0 | main | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
 +| 33 | <nowiki>।</nowiki> | <nowiki>।</nowiki> | SYM | punc | <nowiki>lex-।|cat-punc|gen-|num-|pers-|case-|vib-|tam-|chunkId-BLK|chunkType-head|stype-|voicetype-</nowiki> | 32 | rsym | <nowiki>_</nowiki> | <nowiki>_</nowiki> |
  
 ==== Parsing ==== ==== Parsing ====

[ Back to the navigation ] [ Back to the content ]