Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
user:zeman:treebanks:sk [2014/03/03 11:50] zeman Size. |
user:zeman:treebanks:sk [2014/03/03 12:21] zeman |
||
---|---|---|---|
Line 39: | Line 39: | ||
==== Inside ==== | ==== Inside ==== | ||
- | The original morphosyntactic tags have been converted to fit into the three columns | + | The syntactic annotation scheme has been taken from analytical layer of the (Czech) Prague Dependency Treebank 2.0. The set of syntactic tags (dependency relation labels) is identical to the set of analytical functions (afuns) in PDT. Morphosyntactic tagset is that of the Slovak National Corpus. Use [[http:// |
- | The morphological analysis includes lemmas. The morphosyntactic tags have been assigned | + | A significant part of the treebank (but not all) has been syntactically annotated in parallel by two independent annotators. (In the data we have for HamleDT these parallel annotations have not been merged.) |
+ | |||
+ | The morphological analysis includes lemmas. The morphosyntactic tags and lemmas | ||
==== Sample ==== | ==== Sample ==== | ||
- | The first sentence | + | Beginning |
+ | |||
+ | <code xml><? | ||
+ | |||
+ | <wdata xmlns=" | ||
+ | < | ||
+ | <schema href=" | ||
+ | </ | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
- | <code xml> | + | The same sentence on the m-layer: |
- | < | + | |
- | <div type=" | + | |
- | <div type=" | + | |
- | <p id=" | + | |
- | <s id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <c id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <c id=" | + | |
- | </ | + | |
- | The first sentence of the CoNLL 2006 training data: | + | <code xml><? |
- | | 1 | Bil | biti | Verb | <nowiki>Verb-copula</nowiki> | <nowiki>VForm=participle|Tense=past|Number=singular|Gender=masculine|Voice=active</nowiki> | 8 | Pred | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <mdata xmlns=" |
- | | 2 | je | biti | Verb | <nowiki>Verb-copula</nowiki> | <nowiki>VForm=indicative|Tense=present|Person=third|Number=singular|Negative=no</nowiki> | 1 | AuxV | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <head> |
- | | 3 | jasen | jasen | Adjective | <nowiki>Adjective-qualificative</nowiki> | <nowiki>Degree=positive|Gender=masculine|Number=singular|Case=nominative|Definiteness=no</nowiki> | 4 | Atr | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <schema href=" |
- | | 4 | <nowiki>,</nowiki> | <nowiki>,</nowiki> | PUNC | PUNC | <nowiki>_</nowiki> | 7 | Coord | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 5 | mrzel | mrzel | Adjective | <nowiki>Adjective-qualificative</nowiki> | <nowiki>Degree=positive|Gender=masculine|Number=singular|Case=nominative|Definiteness=no</nowiki> | 4 | Atr | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
- | | 6 | aprilski | aprilski | Adjective | <nowiki>Adjective-ordinal</nowiki> | <nowiki>Degree=positive|Gender=masculine|Number=singular|Case=nominative</nowiki> | 7 | Atr | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | </ |
- | | 7 | dan | dan | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=masculine|Number=singular|Case=nominative</nowiki> | 1 | Sb | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | </ |
- | | 8 | in | in | Conjunction | <nowiki>Conjunction-coordinating</nowiki> | <nowiki>Formation=simple</nowiki> | 0 | Coord | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
- | | 9 | ure | ura | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=feminine|Number=plural|Case=nominative</nowiki> | 11 | Sb | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
- | | 10 | so | biti | Verb | <nowiki>Verb-copula</nowiki> | <nowiki>VForm=indicative|Tense=present|Person=third|Number=plural|Negative=no</nowiki> | 11 | AuxV | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | </ |
- | | 11 | bile | biti | Verb | <nowiki>Verb-main</nowiki> | <nowiki>VForm=participle|Tense=past|Number=plural|Gender=feminine|Voice=active</nowiki> | 8 | Pred | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <s id=" |
- | | 12 | trinajst | trinajst | Numeral | <nowiki>Numeral-cardinal</nowiki> | <nowiki>Gender=neuter|Number=plural|Case=nominative|Form=letter</nowiki> | 11 | Obj | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <m id=" |
- | | 13 | <nowiki>.</nowiki> | <nowiki>.</nowiki> | PUNC | PUNC | <nowiki>_</nowiki> | 0 | AuxK | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
+ | <w.rf>w#w-.b.1</w.rf> | ||
+ | <form>:</form> | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | <src.rf> | ||
+ | < | ||
+ | <form>ORWELL</form> | ||
+ | <lemma>orwell</lemma> | ||
+ | <tag>SSms1:r</tag> | ||
+ | </ | ||
+ | <m id=" | ||
+ | <src.rf>manual</ | ||
+ | < | ||
+ | <form>GEORGE</form> | ||
+ | <lemma>george</lemma> | ||
+ | <tag>SSms1:r</tag> | ||
+ | </ | ||
+ | <m id=" | ||
+ | <src.rf>manual</src.rf> | ||
+ | <w.rf>w#w-.b.4</w.rf> | ||
+ | <form>deväťsto</form> | ||
+ | <lemma>deväťsto</lemma> | ||
+ | <tag>NX</tag> | ||
+ | </ | ||
+ | <m id=" | ||
+ | <src.rf>manual</ | ||
+ | < | ||
+ | <form>Tisíc</form> | ||
+ | <lemma>tisíc</lemma> | ||
+ | <tag>NX</tag> | ||
+ | </ | ||
+ | <m id=" | ||
+ | <src.rf>manual</ | ||
+ | < | ||
+ | <form>osemdesiatštyri</ | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | <w.rf>w#w-.c.3</w.rf> | ||
+ | <form>Vojtek</form> | ||
+ | | ||
+ | < | ||
+ | </ | ||
+ | <m id="m-.c.4"> | ||
+ | < | ||
+ | <w.rf>w# | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <lemma>i</lemma> | ||
+ | | ||
+ | | ||
+ | <m id=" | ||
+ | < | ||
+ | <w.rf>w#w-.d.2</w.rf> | ||
+ | <form>.</form> | ||
+ | | ||
+ | <tag>Z</ | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | <w.rf>w#w-.e.1</w.rf> | ||
+ | <form>1</form> | ||
+ | | ||
+ | <tag>0</ | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | <w.rf>w#w-.f.4</w.rf> | ||
+ | <form>aprílový</form> | ||
+ | | ||
+ | < | ||
+ | </ | ||
+ | <m id="m-.f.5"> | ||
+ | < | ||
+ | <w.rf>w# | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <lemma>chladný</lemma> | ||
+ | | ||
+ | </ | ||
+ | <m id="m-.f.9"> | ||
+ | < | ||
+ | <w.rf>w# | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <lemma>trinásty</lemma> | ||
+ | | ||
+ | </ | ||
+ | <m id="m-.f.12"> | ||
+ | < | ||
+ | <w.rf>w#w-.f.12</w.rf> | ||
+ | <form>.</form> | ||
+ | <lemma>.</lemma> | ||
+ | <tag> | ||
+ | </ | ||
+ | </ | ||
- | The first sentence of the CoNLL 2006 test data: | + | <code xml><? |
- | | 1 | Na | na | Adposition | <nowiki>Adposition-preposition</nowiki> | <nowiki>Formation=simple|Case=locative</nowiki> | 5 | AuxP | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <adata xmlns=" |
- | | 2 | hrbtu | hrbet | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=masculine|Number=singular|Case=locative</nowiki> | 1 | Adv | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <head> |
- | | 3 | je | biti | Verb | <nowiki>Verb-copula</nowiki> | <nowiki>VForm=indicative|Tense=present|Person=third|Number=singular|Negative=no</nowiki> | 5 | AuxV | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <schema href=" |
- | | 4 | lahko | lahko | Adverb | <nowiki>Adverb-general</nowiki> | <nowiki>Degree=positive</nowiki> | 5 | AuxY | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 5 | čutil | čutiti | Verb | <nowiki>Verb-main</nowiki> | <nowiki>VForm=participle|Tense=past|Number=singular|Gender=masculine|Voice=active</nowiki> | 0 | Pred | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
- | | 6 | <nowiki>,</nowiki> | <nowiki>,</nowiki> | PUNC | PUNC | <nowiki>_</nowiki> | 7 | AuxX | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <reffile id=" |
- | | 7 | da | da | Conjunction | <nowiki>Conjunction-subordinating</nowiki> | <nowiki>Formation=simple</nowiki> | 5 | AuxC | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 8 | vsi | ves | Pronoun | <nowiki>Pronoun-general</nowiki> | <nowiki>Gender=masculine|Number=plural|Case=nominative|Syntactic-Type=nominal</nowiki> | 9 | Sb | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | </head> |
- | | 9 | upirajo | upirati | Verb | <nowiki>Verb-main</nowiki> | <nowiki>VForm=indicative|Tense=present|Person=third|Number=plural|Negative=no</nowiki> | 7 | Obj | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <trees> |
- | | 10 | oči | oči | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=feminine|Number=plural|Case=accusative</nowiki> | 9 | Obj | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 11 | v | v | Adposition | <nowiki>Adposition-preposition</nowiki> | <nowiki>Formation=simple|Case=accusative</nowiki> | 9 | AuxP | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 12 | njegov | njegov | Pronoun | <nowiki>Pronoun-possessive</nowiki> | <nowiki>Person=third|Gender=masculine|Number=singular|Case=accusative|Owner-Number=singular|Owner-Gender=masculine|Syntactic-Type=adjectival|Animate=no</nowiki> | 14 | Atr | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <ord>0</ord> |
- | | 13 | modri | moder | Adjective | <nowiki>Adjective-qualificative</nowiki> | <nowiki>Degree=positive|Gender=masculine|Number=singular|Case=accusative|Definiteness=yes|Animate=no</nowiki> | 14 | Atr | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
- | | 14 | kombinezon | kombinezon | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=masculine|Number=singular|Case=accusative|Animate=no</nowiki> | 11 | Adv | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <LM id=" |
- | | 15 | <nowiki>.</nowiki> | <nowiki>.</nowiki> | PUNC | PUNC | <nowiki>_</nowiki> | 0 | AuxK | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <m.rf>m#m-.b.1</m.rf> |
+ | <afun>Coord</afun> | ||
+ | <ord>3</ord> | ||
+ | < | ||
+ | <LM id="a-.b.2"> | ||
+ | < | ||
+ | <afun>ExD</ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Atr</afun> | ||
+ | <ord>1</ord> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id=" | ||
+ | <m.rf>m#m-.b.4</m.rf> | ||
+ | <afun>ExD</afun> | ||
+ | <is_member>1</is_member> | ||
+ | <ord>5</ord> | ||
+ | < | ||
+ | <LM id=" | ||
+ | <m.rf>m#m-.b.5</m.rf> | ||
+ | <afun>Atr</afun> | ||
+ | <ord>4</ord> | ||
+ | | ||
+ | | ||
+ | < | ||
+ | < | ||
+ | <ord>6</ord> | ||
+ | | ||
+ | </children> | ||
+ | | ||
+ | </children> | ||
+ | | ||
+ | </children> | ||
+ | </LM> | ||
+ | <LM id="a-.c"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Coord</afun> | ||
+ | <ord>2</ord> | ||
+ | | ||
+ | <LM id="a-.c.2"> | ||
+ | < | ||
+ | <afun>ExD</ | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <LM id="a-.c.3"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Atr</afun> | ||
+ | <ord>3</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.d"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>AuxG</afun> | ||
+ | <ord>2</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.e"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>ExD</afun> | ||
+ | <ord>1</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.f"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Coord</afun> | ||
+ | <ord>8</ord> | ||
+ | | ||
+ | <LM id="a-.f.2"> | ||
+ | < | ||
+ | <afun>Pred</ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id="a-.f.3"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id="a-.f.4"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id="a-.f.5"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <ord>2</ord> | ||
+ | </LM> | ||
+ | <LM id="a-.f.7"> | ||
+ | < | ||
+ | <afun>AuxX</ | ||
+ | < | ||
+ | </ | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <ord>5</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.f.9"> | ||
+ | < | ||
+ | <afun>Pred</ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | | ||
+ | </children> | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | <m.rf> | ||
+ | <afun>AuxK</afun> | ||
+ | <ord>12</ord> | ||
+ | | ||
+ | </ | ||
+ | </LM></code> | ||
==== Parsing ==== | ==== Parsing ==== |