Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
user:zeman:treebanks:sk [2014/03/03 11:50] zeman Size. |
user:zeman:treebanks:sk [2014/06/08 08:58] (current) zeman |
||
---|---|---|---|
Line 39: | Line 39: | ||
==== Inside ==== | ==== Inside ==== | ||
- | The original morphosyntactic tags have been converted to fit into the three columns | + | The syntactic annotation scheme has been taken from analytical layer of the (Czech) Prague Dependency Treebank 2.0. The set of syntactic tags (dependency relation labels) is identical to the set of analytical functions (afuns) in PDT. Morphosyntactic tagset is that of the Slovak National Corpus. Use [[http:// |
- | The morphological analysis includes lemmas. The morphosyntactic tags have been assigned | + | A significant part of the treebank (but not all) has been syntactically annotated in parallel by two independent annotators. (In the data we have for HamleDT these parallel annotations have not been merged.) |
+ | |||
+ | The morphological analysis includes lemmas. The morphosyntactic tags and lemmas | ||
==== Sample ==== | ==== Sample ==== | ||
- | The first sentence | + | Beginning |
+ | |||
+ | <code xml><? | ||
+ | |||
+ | <wdata xmlns=" | ||
+ | < | ||
+ | <schema href=" | ||
+ | </ | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | <w id=" | ||
+ | < | ||
+ | </ | ||
+ | |||
+ | The same sentence on the m-layer: | ||
- | <code xml> | + | <code xml><?xml version="1.0" |
- | < | + | |
- | <div type=" | + | |
- | <div type=" | + | |
- | <p id="Osl.1.2.2"> | + | |
- | <s id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <c id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <w id=" | + | |
- | <c id=" | + | |
- | </ | + | |
- | The first sentence of the CoNLL 2006 training data: | + | <mdata xmlns=" |
+ | < | ||
+ | <schema href=" | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <s id=" | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <m id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
- | | 1 | Bil | biti | Verb | < | + | The same sentence on the a-layer: |
- | | 2 | je | biti | Verb | < | + | |
- | | 3 | jasen | jasen | Adjective | < | + | |
- | | 4 | < | + | |
- | | 5 | mrzel | mrzel | Adjective | < | + | |
- | | 6 | aprilski | aprilski | Adjective | < | + | |
- | | 7 | dan | dan | Noun | < | + | |
- | | 8 | in | in | Conjunction | < | + | |
- | | 9 | ure | ura | Noun | < | + | |
- | | 10 | so | biti | Verb | < | + | |
- | | 11 | bile | biti | Verb | < | + | |
- | | 12 | trinajst | trinajst | Numeral | < | + | |
- | | 13 | < | + | |
- | The first sentence of the CoNLL 2006 test data: | + | <code xml><? |
- | | 1 | Na | na | Adposition | <nowiki>Adposition-preposition</nowiki> | <nowiki>Formation=simple|Case=locative</nowiki> | 5 | AuxP | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <adata xmlns=" |
- | | 2 | hrbtu | hrbet | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=masculine|Number=singular|Case=locative</nowiki> | 1 | Adv | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <head> |
- | | 3 | je | biti | Verb | <nowiki>Verb-copula</nowiki> | <nowiki>VForm=indicative|Tense=present|Person=third|Number=singular|Negative=no</nowiki> | 5 | AuxV | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <schema href=" |
- | | 4 | lahko | lahko | Adverb | <nowiki>Adverb-general</nowiki> | <nowiki>Degree=positive</nowiki> | 5 | AuxY | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 5 | čutil | čutiti | Verb | <nowiki>Verb-main</nowiki> | <nowiki>VForm=participle|Tense=past|Number=singular|Gender=masculine|Voice=active</nowiki> | 0 | Pred | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
- | | 6 | <nowiki>,</nowiki> | <nowiki>,</nowiki> | PUNC | PUNC | <nowiki>_</nowiki> | 7 | AuxX | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <reffile id=" |
- | | 7 | da | da | Conjunction | <nowiki>Conjunction-subordinating</nowiki> | <nowiki>Formation=simple</nowiki> | 5 | AuxC | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 8 | vsi | ves | Pronoun | <nowiki>Pronoun-general</nowiki> | <nowiki>Gender=masculine|Number=plural|Case=nominative|Syntactic-Type=nominal</nowiki> | 9 | Sb | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | </head> |
- | | 9 | upirajo | upirati | Verb | <nowiki>Verb-main</nowiki> | <nowiki>VForm=indicative|Tense=present|Person=third|Number=plural|Negative=no</nowiki> | 7 | Obj | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <trees> |
- | | 10 | oči | oči | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=feminine|Number=plural|Case=accusative</nowiki> | 9 | Obj | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 11 | v | v | Adposition | <nowiki>Adposition-preposition</nowiki> | <nowiki>Formation=simple|Case=accusative</nowiki> | 9 | AuxP | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | |
- | | 12 | njegov | njegov | Pronoun | <nowiki>Pronoun-possessive</nowiki> | <nowiki>Person=third|Gender=masculine|Number=singular|Case=accusative|Owner-Number=singular|Owner-Gender=masculine|Syntactic-Type=adjectival|Animate=no</nowiki> | 14 | Atr | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <ord>0</ord> |
- | | 13 | modri | moder | Adjective | <nowiki>Adjective-qualificative</nowiki> | <nowiki>Degree=positive|Gender=masculine|Number=singular|Case=accusative|Definiteness=yes|Animate=no</nowiki> | 14 | Atr | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | < |
- | | 14 | kombinezon | kombinezon | Noun | <nowiki>Noun-common</nowiki> | <nowiki>Gender=masculine|Number=singular|Case=accusative|Animate=no</nowiki> | 11 | Adv | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <LM id=" |
- | | 15 | <nowiki>.</nowiki> | <nowiki>.</nowiki> | PUNC | PUNC | <nowiki>_</nowiki> | 0 | AuxK | <nowiki>_</nowiki> | <nowiki>_</nowiki> | | + | <m.rf>m#m-.b.1</m.rf> |
+ | <afun>Coord</afun> | ||
+ | <ord>3</ord> | ||
+ | < | ||
+ | <LM id="a-.b.2"> | ||
+ | < | ||
+ | <afun>ExD</ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Atr</afun> | ||
+ | <ord>1</ord> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id=" | ||
+ | <m.rf>m#m-.b.4</m.rf> | ||
+ | <afun>ExD</afun> | ||
+ | <is_member>1</is_member> | ||
+ | <ord>5</ord> | ||
+ | < | ||
+ | <LM id=" | ||
+ | <m.rf>m#m-.b.5</m.rf> | ||
+ | <afun>Atr</afun> | ||
+ | <ord>4</ord> | ||
+ | | ||
+ | | ||
+ | < | ||
+ | < | ||
+ | <ord>6</ord> | ||
+ | | ||
+ | </children> | ||
+ | | ||
+ | </children> | ||
+ | | ||
+ | </children> | ||
+ | </LM> | ||
+ | <LM id="a-.c"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Coord</afun> | ||
+ | <ord>2</ord> | ||
+ | | ||
+ | <LM id="a-.c.2"> | ||
+ | < | ||
+ | <afun>ExD</ | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <LM id="a-.c.3"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Atr</afun> | ||
+ | <ord>3</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.d"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>AuxG</afun> | ||
+ | <ord>2</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.e"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>ExD</afun> | ||
+ | <ord>1</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.f"> | ||
+ | < | ||
+ | <ord>0</ | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | <afun>Coord</afun> | ||
+ | <ord>8</ord> | ||
+ | | ||
+ | <LM id="a-.f.2"> | ||
+ | < | ||
+ | <afun>Pred</ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id="a-.f.3"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id="a-.f.4"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id="a-.f.5"> | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <ord>2</ord> | ||
+ | </LM> | ||
+ | <LM id="a-.f.7"> | ||
+ | < | ||
+ | <afun>AuxX</ | ||
+ | < | ||
+ | </ | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <ord>5</ord> | ||
+ | </LM> | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | <LM id="a-.f.9"> | ||
+ | < | ||
+ | <afun>Pred</ | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | </ | ||
+ | <LM id=" | ||
+ | < | ||
+ | < | ||
+ | < | ||
+ | | ||
+ | </children> | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | <m.rf> | ||
+ | <afun>AuxK</afun> | ||
+ | <ord>12</ord> | ||
+ | | ||
+ | </ | ||
+ | </LM></code> | ||
==== Parsing ==== | ==== Parsing ==== | ||
- | Nonprojectivities... | + | Nonprojectivities |
- | Parsing results... | + | Parsing results: we obtained a UAS of 80.73& |