Both sides previous revision
Previous revision
|
Last revision
Both sides next revision
|
user:zeman:treebanks:fi [2011/12/05 14:37] zeman Size. |
user:zeman:treebanks:fi [2011/12/05 14:46] zeman Sample. |
==== Sample ==== | ==== Sample ==== |
| |
The first sentence of the corpus in the TIGER-XML format: | The first two sentences of the corpus in its native XML format: |
| |
<code xml><s id="ratsep-13" ref="ratsep-1" source="id=ratsep-1" forest="1/1" text="Peeter aerutas üle väina saarele puhkama"> | <code xml><treeset name="http://ranneliike.net/blogi.php?nick=Aboa Kirjoitettu: 02.02.2010, 15:41:06"> |
<graph root="ratsep-13_501"> | <sentence txt="Kävelyreitti III"> |
<terminals> | <token charOff="0-12"> |
<t id="ratsep-13_1" word="Peeter" lemma="Peeter+0" pos="prop" morph="prop,sg,nom,.cap"/> | <posreading CG="true" baseform="kävely#reitti" rawtags="N NOM SG <up>" /> |
<t id="ratsep-13_2" word="aerutas" lemma="aeruta+s" pos="v-fin" morph="main,indic,impf,ps3,sg,ps,af,.FinV"/> | </token> |
<t id="ratsep-13_3" word="üle" lemma="üle+0" pos="prp" morph="pre,.gen"/> | <token charOff="13-16"> |
<t id="ratsep-13_4" word="väina" lemma="väin+0" pos="n" morph="com,sg,gen"/> | <posreading CG="true" baseform="III" rawtags="<roman> ABBR NOM SG <up>" /> |
<t id="ratsep-13_5" word="saarele" lemma="saar+le" pos="n" morph="com,sg,all"/> | <posreading CG="true" baseform="iii" rawtags="ABBR <up>" /> |
<t id="ratsep-13_6" word="puhkama" lemma="puhka+ma" pos="v-inf" morph="main,sup,ps,ill,.Part"/> | <posreading CG="true" baseform="iii" rawtags="<roman> ABBR NOM SG <up>" /> |
<t id="ratsep-13_7" word="." lemma="." pos="punc" morph="Fst"/> | </token> |
</terminals> | <dep dep="1" gov="0" type="num" /> |
| </sentence> |
| <sentence txt="Jäällä kävely avaa aina hauskoja ja erikoisia näkökulmia kaupunkiin."> |
| <token charOff="0-6"> |
| <posreading CG="true" baseform="jää" rawtags="N ADE SG <up>" /> |
| </token> |
| <token charOff="7-13"> |
| <posreading CG="true" baseform="kävely" rawtags="DV-U N NOM SG" /> |
| </token> |
| <token charOff="14-18"> |
| <posreading CG="true" baseform="avata" rawtags="V PRES ACT SG3" /> |
| <posreading CG="false" baseform="avata" rawtags="V PRES ACT NEG" /> |
| <posreading CG="false" baseform="avata" rawtags="V IMPV ACT SG2" /> |
| <posreading CG="false" baseform="avata" rawtags="V IMPV ACT NEG" /> |
| </token> |
| <token charOff="19-23"> |
| <posreading CG="true" baseform="aina" rawtags="ADV" /> |
| </token> |
| <token charOff="24-32"> |
| <posreading CG="true" baseform="hauska" rawtags="A POS PTV PL" /> |
| </token> |
| <token charOff="33-35"> |
| <posreading CG="true" baseform="ja" rawtags="COORD C" /> |
| </token> |
| <token charOff="36-45"> |
| <posreading CG="true" baseform="erikoinen" rawtags="A POS PTV PL" /> |
| </token> |
| <token charOff="46-56"> |
| <posreading CG="true" baseform="näkö#kulma" rawtags="N PTV PL" /> |
| </token> |
| <token charOff="57-67"> |
| <posreading CG="true" baseform="kaupunki" rawtags="N ILL SG" /> |
| </token> |
| <token charOff="67-68"> |
| <posreading CG="true" baseform="." rawtags="PUNCT" /> |
| </token> |
| <dep dep="0" gov="1" type="nommod" /> |
| <dep dep="1" gov="2" type="nsubj" /> |
| <dep dep="3" gov="2" type="advmod" /> |
| <dep dep="7" gov="2" type="dobj" /> |
| <dep dep="9" gov="2" type="punct" /> |
| <dep dep="5" gov="4" type="cc" /> |
| <dep dep="6" gov="4" type="conj" /> |
| <dep dep="4" gov="7" type="amod" /> |
| <dep dep="8" gov="7" type="nommod" /> |
| </sentence></code> |
| |
<nonterminals> | The same two sentences in the CoNLL format: |
<nt id="ratsep-13_501" cat="VROOT"> | |
<edge label="STA" idref="ratsep-13_502"/> | | # b101.d.xml/1 |||||||||| |
</nt> | | 1 | Kävelyreitti | kävely<nowiki>|</nowiki>reitti | NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | _ | 0 | ROOT | _ | _ | |
<nt id="ratsep-13_502" cat="fcl"> | | 2 | III | III | roman<nowiki>|</nowiki>NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>ABBR | roman<nowiki>|</nowiki>NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>ABBR | _ | 1 | num | _ | _ | |
<edge label="S" idref="ratsep-13_1"/> | | |||||||||| |
<edge label="P" idref="ratsep-13_2"/> | | # b101.d.xml/2 |||||||||| |
<edge label="A" idref="ratsep-13_503"/> | | 1 | Jäällä | jää | ADE<nowiki>|</nowiki>SG<nowiki>|</nowiki>up<nowiki>|</nowiki>N | ADE<nowiki>|</nowiki>SG<nowiki>|</nowiki>up<nowiki>|</nowiki>N | _ | 2 | nommod | _ | _ | |
<edge label="A" idref="ratsep-13_5"/> | | 2 | kävely | kävely | DV-U<nowiki>|</nowiki>NOM<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | DV-U<nowiki>|</nowiki>NOM<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | _ | 3 | nsubj | _ | _ | |
<edge label="A" idref="ratsep-13_6"/> | | 3 | avaa | avata | SG3<nowiki>|</nowiki>ACT<nowiki>|</nowiki>PRES<nowiki>|</nowiki>V | SG3<nowiki>|</nowiki>ACT<nowiki>|</nowiki>PRES<nowiki>|</nowiki>V | _ | 0 | ROOT | _ | _ | |
<edge label="FST" idref="ratsep-13_7"/> | | 4 | aina | aina | ADV | ADV | _ | 3 | advmod | _ | _ | |
</nt> | | 5 | hauskoja | hauska | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | _ | 8 | amod | _ | _ | |
<nt id="ratsep-13_503" cat="pp"> | | 6 | ja | ja | C<nowiki>|</nowiki>COORD | C<nowiki>|</nowiki>COORD | _ | 5 | cc | _ | _ | |
<edge label="H" idref="ratsep-13_3"/> | | 7 | erikoisia | erikoinen | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | _ | 5 | conj | _ | _ | |
<edge label="D" idref="ratsep-13_4"/> | | 8 | näkökulmia | näkö<nowiki>|</nowiki>kulma | PTV<nowiki>|</nowiki>PL<nowiki>|</nowiki>N | PTV<nowiki>|</nowiki>PL<nowiki>|</nowiki>N | _ | 3 | dobj | _ | _ | |
</nt> | | 9 | kaupunkiin | kaupunki | ILL<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | ILL<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | _ | 8 | nommod | _ | _ | |
</nonterminals> | | 10 | . | . | PUNCT | PUNCT | _ | 3 | punct | _ | _ | |
</graph> | |
</s></code> | |
| |
==== Parsing ==== | ==== Parsing ==== |