[ Skip to the content ]

Institute of Formal and Applied Linguistics Wiki

[ Back to the navigation ]


This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Last revision Both sides next revision
user:zeman:treebanks:fi [2011/12/05 14:37]
zeman Size.
user:zeman:treebanks:fi [2011/12/05 14:46]
zeman Sample.
Line 48: Line 48:
 ==== Sample ==== ==== Sample ====
-The first sentence of the corpus in the TIGER-XML format:+The first two sentences of the corpus in its native XML format:
-<code xml><s id="ratsep-13ref="ratsep-1source="id=ratsep-1forest="1/1text="Peeter aerutas üle väina saarele puhkama"> +<code xml><treeset name="http://ranneliike.net/blogi.php?nick=Aboa Kirjoitettu: 02.02.2010, 15:41:06"
- <graph root="ratsep-13_501"> +  <sentence txt="Kävelyreitti III"> 
- <terminals+    <token charOff="0-12"
- <t id="ratsep-13_1word="Peeterlemma="Peeter+0" pos="propmorph="prop,sg,nom,.cap"/> +      <posreading CG="true" baseform="kävely#reitti" rawtags="N NOM SG &lt;up&gt;" /> 
- <t id="ratsep-13_2word="aerutaslemma="aeruta+spos="v-finmorph="main,indic,impf,ps3,sg,ps,af,.FinV"/> +    </token> 
- <t id="ratsep-13_3word="ülelemma="üle+0pos="prpmorph="pre,.gen"/> +    <token charOff="13-16"
- <t id="ratsep-13_4word="väinalemma="väin+0pos="nmorph="com,sg,gen"/> +      <posreading CG="true" baseform="III" rawtags="&lt;roman&gt; ABBR NOM SG &lt;up&gt;" /
- <t id="ratsep-13_5word="saarelelemma="saar+lepos="nmorph="com,sg,all"/> +      <posreading CG="truebaseform="iiirawtags="ABBR &lt;up&gt;" /
- <t id="ratsep-13_6word="puhkamalemma="puhka+mapos="v-infmorph="main,sup,ps,ill,.Part"/> +      <posreading CG="truebaseform="iii" rawtags="&lt;roman&gt; ABBR NOM SG &lt;up&gt;" /
- <t id="ratsep-13_7word=".lemma="." pos="puncmorph="Fst"/> +    </token
- </terminals>+    <dep dep="1gov="0type="num" /> 
 +  </sentence> 
 +  <sentence txt="Jäällä kävely avaa aina hauskoja ja erikoisia näkökulmia kaupunkiin."> 
 +    <token charOff="0-6"
 +      <posreading CG="truebaseform="jää" rawtags="N ADE SG &lt;up&gt;" /> 
 +    </token> 
 +    <token charOff="7-13"
 +      <posreading CG="truebaseform="kävelyrawtags="DV-U N NOM SG/> 
 +    </token> 
 +    <token charOff="14-18"> 
 +      <posreading CG="true" baseform="avata" rawtags="V PRES ACT SG3" /> 
 +      <posreading CG="falsebaseform="avatarawtags="V PRES ACT NEG/> 
 +      <posreading CG="falsebaseform="avata" rawtags="V IMPV ACT SG2" /> 
 +      <posreading CG="false" baseform="avata" rawtags="V IMPV ACT NEG" /> 
 +    </token> 
 +    <token charOff="19-23"
 +      <posreading CG="truebaseform="ainarawtags="ADV/> 
 +    </token> 
 +    <token charOff="24-32"> 
 +      <posreading CG="true" baseform="hauska" rawtags="A POS PTV PL" /> 
 +    </token> 
 +    <token charOff="33-35"
 +      <posreading CG="truebaseform="jarawtags="COORD C/> 
 +    </token> 
 +    <token charOff="36-45"> 
 +      <posreading CG="true" baseform="erikoinen" rawtags="A POS PTV PL" /> 
 +    </token> 
 +    <token charOff="46-56"
 +      <posreading CG="truebaseform="näkö#kulmarawtags="N PTV PL" /> 
 +    </token> 
 +    <token charOff="57-67"
 +      <posreading CG="true" baseform="kaupunki" rawtags="N ILL SG" /> 
 +    </token> 
 +    <token charOff="67-68"
 +      <posreading CG="truebaseform="." rawtags="PUNCT/> 
 +    </token> 
 +    <dep dep="0" gov="1" type="nommod" /> 
 +    <dep dep="1" gov="2" type="nsubj" /> 
 +    <dep dep="3" gov="2" type="advmod" /> 
 +    <dep dep="7" gov="2" type="dobj" /> 
 +    <dep dep="9" gov="2" type="punct" /> 
 +    <dep dep="5" gov="4" type="cc" /> 
 +    <dep dep="6" gov="4" type="conj" /> 
 +    <dep dep="4" gov="7" type="amod" /> 
 +    <dep dep="8" gov="7" type="nommod" /> 
 +  </sentence></code>
- <nonterminals> +The same two sentences in the CoNLL format: 
- <nt id="ratsep-13_501" cat="VROOT"> + 
- <edge label="STA" idref="ratsep-13_502"/> +| # b101.d.xml/1 |||||||||| 
- </nt+| 1 | Kävelyreitti | kävely<nowiki>|</nowiki>reitti | NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | _ | 0 | ROOT | _ | _ | 
- <nt id="ratsep-13_502" cat="fcl"+| 2 | III | III | roman<nowiki>|</nowiki>NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>ABBR | roman<nowiki>|</nowiki>NOM<nowiki>|</nowiki>up<nowiki>|</nowiki>SG<nowiki>|</nowiki>ABBR | _ | 1 | num | _ | _ | 
- <edge label="S" idref="ratsep-13_1"/> +| |||||||||| 
- <edge label="P" idref="ratsep-13_2"/> +| # b101.d.xml/2 |||||||||| 
- <edge label="A" idref="ratsep-13_503"/> +| 1 | Jäällä | jää | ADE<nowiki>|</nowiki>SG<nowiki>|</nowiki>up<nowiki>|</nowiki>N | ADE<nowiki>|</nowiki>SG<nowiki>|</nowiki>up<nowiki>|</nowiki>N | _ | 2 | nommod | _ | _ | 
- <edge label="A" idref="ratsep-13_5"/> +| 2 | kävely | kävely | DV-U<nowiki>|</nowiki>NOM<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | DV-U<nowiki>|</nowiki>NOM<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | _ | 3 | nsubj | _ | _ | 
- <edge label="A" idref="ratsep-13_6"/> +| 3 | avaa | avata | SG3<nowiki>|</nowiki>ACT<nowiki>|</nowiki>PRES<nowiki>|</nowiki>V | SG3<nowiki>|</nowiki>ACT<nowiki>|</nowiki>PRES<nowiki>|</nowiki>V | _ | 0 | ROOT | _ | _ | 
- <edge label="FST" idref="ratsep-13_7"/> +| 4 | aina | aina | ADV | ADV | _ | 3 | advmod | _ | _ | 
- </nt+| 5 | hauskoja | hauska | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | _ | 8 | amod | _ | _ | 
- <nt id="ratsep-13_503" cat="pp"> +| 6 | ja | ja | C<nowiki>|</nowiki>COORD | C<nowiki>|</nowiki>COORD | _ | 5 | cc | _ | _ | 
- <edge label="H" idref="ratsep-13_3"/> +| 7 | erikoisia | erikoinen | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | A<nowiki>|</nowiki>PTV<nowiki>|</nowiki>POS<nowiki>|</nowiki>PL | _ | 5 | conj | _ | _ | 
- <edge label="D" idref="ratsep-13_4"/> +| 8 | näkökulmia | näkö<nowiki>|</nowiki>kulma | PTV<nowiki>|</nowiki>PL<nowiki>|</nowiki>N | PTV<nowiki>|</nowiki>PL<nowiki>|</nowiki>N | _ | 3 | dobj | _ | _ | 
- </nt> +| 9 | kaupunkiin | kaupunki | ILL<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | ILL<nowiki>|</nowiki>SG<nowiki>|</nowiki>N | _ | 8 | nommod | _ | _ | 
- </nonterminals> +| 10 | . | . | PUNCT | PUNCT | _ | 3 | punct | _ | _ |
- </graph+
 ==== Parsing ==== ==== Parsing ====

[ Back to the navigation ] [ Back to the content ]