[ Skip to the content ]

Institute of Formal and Applied Linguistics Wiki


[ Back to the navigation ]

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision Both sides next revision
user:zeman:treebanks:ja [2012/01/04 09:25]
zeman Domain and size.
user:zeman:treebanks:ja [2012/01/04 09:34]
zeman Sample.
Line 50: Line 50:
 ==== Sample ==== ==== Sample ====
  
-The first sentence of DDT 1.0 in the DTAG format:+The first three sentences of the CoNLL 2006 training data:
  
-<code xml><tei.2> +| 1 | kasahara | <nowiki>_</nowiki| NAME | NAMEper | <nowiki>_</nowiki| 2 | HD | 2 | HD | 
-  <teiHeader type=text> +| 2 | arisa | <nowiki>_</nowiki| NAME | NAMEper | <nowiki>_</nowiki| 3 | COMP | 3 | COMP | 
-    <fileDesc+| 3 | desu | <nowiki>_</nowiki| PV | PVfin | u | 0 | ROOT | 0 | ROOT | 
-      <titleStmt> +| 4 | <nowiki>.</nowiki<nowiki>_</nowiki<nowiki>.</nowiki<nowiki>.</nowiki<nowiki>_</nowiki| 3 | PUNCT | 3 | PUNCT | 
-        <title>Tagged sample of: 'Jeltsins skæbnetime'</title> +| |||||||||| 
-      </titleStmt> +| 1 | kadowaki | <nowiki>_</nowiki| NAME | NAMEper | <nowiki>_</nowiki| 2 | HD | 2 | HD | 
-      <extent words=158>158 running words</extent+| 2 | masakazu | <nowiki>_</nowiki| NAME | NAMEper | <nowiki>_</nowiki| 3 | COMP | 3 | COMP | 
-      <publicationStmt> +| 3 | desu | <nowiki>_</nowiki| PV | PVfin | u | 0 | ROOT | 0 | ROOT | 
-         <distributor>PAROLE-DK</distributor+| 4 | <nowiki>.</nowiki<nowiki>_</nowiki<nowiki>.</nowiki<nowiki>.</nowiki<nowiki>_</nowiki> | 3 | PUNCT | 3 | PUNCT | 
-         <address><addrline>Christians Brygge 1,1., DK-1219 Copenhagen K.</address> +| |||||||||| 
-         <date>1998-06-02</date> +1 | kadowaki | <nowiki>_</nowiki| NAME | NAMEper | <nowiki>_</nowiki2 | COMP | 2 | COMP 
-         <availability status=restricted><p>by agreement with distributor</availability> +| 2 | saN | <nowiki>_</nowiki| P | PNsf | <nowiki>_</nowiki| 0 | ROOT | 0 | ROOT | 
-      </publicationStmt> +| omatase | <nowiki>_</nowiki> | N | VN | <nowiki>_</nowiki| 4 | COMP | 4 | COMP | 
-      <sourceDesc> +| 4 | shimashita | <nowiki>_</nowiki| VS | VSfin | ta | 0 | ROOT | 0 | ROOT | 
-        <biblStruct> +| 5 | <nowiki>.</nowiki<nowiki>_</nowiki> | <nowiki>.</nowiki<nowiki>.</nowiki<nowiki>_</nowiki| 4 | PUNCT | 4 | PUNCT |
-          <analytic> +
-            <title>Jeltsins skæbnetime</title> +
-            <author gender=m born=1925>Nikulin, Leon</author+
-          </analytic> +
-          <monogr> +
-            <imprint><pubPlace>Denmark</pubPlace+
-              <publisher>Det Fri Aktuelt</publisher> +
-              <date>1992-12-01</date+
-            </imprint> +
-          </monogr+
-        </biblStruct> +
-      </sourceDesc> +
-    </fileDesc> +
-    <profileDesc> +
-      <creation>1992-12-01</creation> +
-      <langUsage><language>Danish</langUsage> +
-      <textClass> +
-        <catRef target="P.M2"> +
-        <catRef target="P.G4.8"> +
-        <catRef target="P.T9.3"> +
-      </textClass> +
-    </profileDesc> +
-  </teiHeader> +
-<text id=AJK> +
-<body> +
-<div1 type=main> +
-<p> +
-<s> +
-<W lemma="to" msd="AC---U=--" in="9:subj" out="1:mod|2:mod|3:nobj|5:appr">To</W> +
-<W lemma="kendt" msd="ANP[CN]PU=[DI]U" in="-1:mod" out="">kendte</W> +
-<W lemma="russisk" msd="ANP[CN]PU=[DI]U" in="-2:mod" out="">russiske</W> +
-<W lemma="historiker" msd="NCCPU==I" in="-3:nobj" out="">historikere</W> +
-<W lemma="Andronik" msd="NP--U==-" in="1:namef" out="">Andronik</W> +
-<W lemma="Mirganjan" msd="NP--U==-" in="-5:appr" out="-1:namef|1:coord">Mirganjan</W> +
-<W lemma="og" msd="CC" in="-1:coord" out="2:conj">og</W> +
-<W lemma="Igor" msd="NP--U==-" in="1:namef" out="">Igor</W> +
-<W lemma="Klamkin" msd="NP--U==-" in="-2:conj" out="-1:namef">Klamkin</W> +
-<W lemma="tro" msd="VADR=----A-" in="" out="-9:subj|1:mod|2:pnct|3:dobj|12:pnct">tror</W> +
-<W lemma="ikke" msd="RGU" in="-1:mod" out="">ikke</W> +
-<W lemma="," msd="XP" in="-2:pnct" out="">,</W+
-<W lemma="at" msd="CS" in="-3:dobj" out="2:vobj">at</W> +
-<W lemma="Rusland" msd="NP--U==-" in="1:subj|2:[subj]" out="">Rusland</W> +
-<W lemma="kunne" msd="VADR=----A-" in="-2:vobj" out="-1:subj|1:vobj|2:mod">kan</W> +
-<W lemma="udvikle" msd="VAF-=----P-" in="-1:vobj" out="-2:[subj]">udvikles</W+
-<W lemma="uden" msd="SP" in="-2:mod" out="1:nobj">uden</W+
-<W lemma="en" msd="PI-CSU--U" in="-1:nobj" out="2:nobj">en</W> +
-<W lemma="&quot;" msd="XP" in="1:pnct" out="">"</W> +
-<W lemma="jernnæve" msd="NCCSU==I" in="-2:nobj" out="-1:pnct|1:pnct">jernnæve</W> +
-<W lemma="&quot;" msd="XP" in="-1:pnct" out="">"</W> +
-<W lemma="." msd="XP" in="-12:pnct" out="">.</W> +
-</s></code>+
  
-The first sentence of the CoNLL 2006 training data:+The first three sentences of the CoNLL 2006 test data:
  
-| 1 | Samme _ | A | AN | degree=pos<nowiki>|</nowiki>gender=common/neuter<nowiki>|</nowiki>number=sing/plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=def/indef<nowiki>|</nowiki>transcat=unmarked ROOT +| 1 | tashiro | <nowiki>_</nowiki>NAME NAMEper | <nowiki>_</nowiki>HD HD 
-| 2 | cifre _ | N | NC | gender=neuter<nowiki>|</nowiki>number=plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=indef nobj +| 2 | yasuko | <nowiki>_</nowiki> | NAME | NAMEper | <nowiki>_</nowiki>COMP COMP 
-| 3 | | _ | XP pnct +| 3 | desu <nowiki>_</nowiki> PV PVfin ROOT ROOT 
-| 4 | de | _ | P | PD | gender=common/neuter<nowiki>|</nowiki>number=plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>register=unmarked | 7 | subj | _ | _ | +| 4 | <nowiki>.</nowiki> | <nowiki>_</nowiki> | <nowiki>.</nowiki> | <nowiki>.</nowiki> | <nowiki>_</nowiki>PUNCT PUNCT 
-| 5 | norske | _ | A | AN degree=pos<nowiki>|</nowiki>gender=common/neuter<nowiki>|</nowiki>number=plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=def/indef<nowiki>|</nowiki>transcat=unmarked mod +| |||||||||| 
-piger NC gender=common<nowiki>|</nowiki>number=plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=indef nobj +| 1 | hayakawa | <nowiki>_</nowiki> | NAME | NAMEper | <nowiki>_</nowiki>HD HD 
-7 | tabte | _ | V VA mood=indic<nowiki>|</nowiki>tense=past<nowiki>|</nowiki>voice=active rel +ryou | <nowiki>_</nowiki> | NAME | NAMEper | <nowiki>_</nowiki>COMP COMP 
-med | _ | SP SP pobj +desu <nowiki>_</nowiki> PV PVfin ROOT ROOT 
-i_lørdags | _ | RG | RG | degree=unmarked | 7 | mod | _ | _ | +<nowiki>.</nowiki> <nowiki>_</nowiki> | <nowiki>.</nowiki> | <nowiki>.</nowiki> | <nowiki>_</nowiki>PUNCT PUNCT 
-| 10 | mod | _ | SP | SP | _ | 7 | pobj | _ | _ | +| |||||||||| 
-| 11 | VMs | _ | N | NP | case=gen | 10 | nobj | _ | _ | +| 1 | hayakawa <nowiki>_</nowiki> NAME NAMEper <nowiki>_</nowiki> COMP COMP 
-| 12 | værtsnation | _ | N | NC | gender=common<nowiki>|</nowiki>number=sing<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=indef 11 possd +| 2 | saN | <nowiki>_</nowiki>PNsf | <nowiki>_</nowiki>ADJ ADJ 
-13 XP pnct | +| 3 | ima | <nowiki>_</nowiki>Ntmp | <nowiki>_</nowiki>ADJ ADJ 
- +| 4 | chotto | <nowiki>_</nowiki> | ADV | ADV | <nowiki>_</nowiki>ADJ ADJ 
-The first sentence of the CoNLL 2006 test data: +| 5 | ojikaN <nowiki>_</nowiki> | N | NN <nowiki>_</nowiki> | 6 | SBJ SBJ 
- +| 6 | yoroshii <nowiki>_</nowiki> ADJ ADJifin <nowiki>_</nowiki> COMP COMP 
-| 1 | To | _ | AC case=unmarked 10 subj +| 7 | desu <nowiki>_</nowiki> PV PVfin ROOT ROOT 
-| 2 | kendte _ | A | AN | degree=pos<nowiki>|</nowiki>gender=common/neuter<nowiki>|</nowiki>number=plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=def/indef<nowiki>|</nowiki>transcat=unmarked mod +| 8 | ka <nowiki>_</nowiki> PS PSE <nowiki>_</nowiki> MRK MRK 
-| 3 | russiske _ | A | AN | degree=pos<nowiki>|</nowiki>gender=common/neuter<nowiki>|</nowiki>number=plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=def/indef<nowiki>|</nowiki>transcat=unmarked mod +| 9 | chotto <nowiki>_</nowiki> ADV ADV <nowiki>_</nowiki> 16 ADJ 16 ADJ 
-| 4 | historikere _ | N | NC | gender=common<nowiki>|</nowiki>number=plur<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=indef nobj +| 10 | doitsu | <nowiki>_</nowiki> | NAME | NAMEloc | <nowiki>_</nowiki>11 COMP 11 COMP 
-| 5 | Andronik | _ | N | NP case=unmarked | 6 | namef +| 11 | no <nowiki>_</nowiki> Pgen <nowiki>_</nowiki> 12 ADJ 12 ADJ 
-| 6 | Mirganjan | _ | NP case=unmarked appr +| 12 | shucchou <nowiki>_</nowiki> VN <nowiki>_</nowiki> 13 COMP 13 COMP 
-| 7 | og | _ | CC coord +| 13 | no <nowiki>_</nowiki> Pgen <nowiki>_</nowiki> 14 COMP 14 COMP 
-| 8 | Igor | _ | NP case=unmarked namef +| 14 | koto <nowiki>_</nowiki> | N | NF <nowiki>_</nowiki> | 15 | COMP 15 COMP 
-| 9 | Klamkin | _ | NP case=unmarked conj +| 15 | de | <nowiki>_</nowiki> | P | P | <nowiki>_</nowiki>16 ADJ 16 ADJ 
-| 10 | tror _ | V | VA | mood=indic<nowiki>|</nowiki>tense=present<nowiki>|</nowiki>voice=active ROOT +| 16 | gosoudaN <nowiki>_</nowiki> VN | <nowiki>_</nowiki>17 COMP 17 COMP 
-| 11 | ikke | _ | RG RG degree=unmarked 10 mod +| 17 | shitai <nowiki>_</nowiki> VADJ VADJi <nowiki>_</nowiki> 18 COMP 18 COMP 
-| 12 | | _ | XP | _ | 10 pnct +| 18 | no | <nowiki>_</nowiki> | N | NF | <nowiki>_</nowiki>19 COMP 19 COMP 
-| 13 | at | _ | CS | _ | 10 dobj +| 19 | desu <nowiki>_</nowiki> PV PVfin | 20 | COMP 20 COMP 
-| 14 | Rusland | _ | N | NP case=unmarked | 15 | subj +| 20 | ga | <nowiki>_</nowiki> | PS | PSSb | <nowiki>_</nowiki>ROOT ROOT 
-| 15 | kan | _ | V | VA mood=indic<nowiki>|</nowiki>tense=present<nowiki>|</nowiki>voice=active 13 vobj +| 21 | <nowiki>.</nowiki> <nowiki>_</nowiki> <nowiki>.</nowiki> <nowiki>.</nowiki> <nowiki>_</nowiki> | 20 | PUNCT 20 PUNCT |
-| 16 | udvikles | _ | VA mood=infin<nowiki>|</nowiki>voice=passive 15 vobj +
-| 17 | uden | _ | SP SP | _ | 15 mod +
-| 18 | en | _ | P | PI gender=common<nowiki>|</nowiki>number=sing<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>register=unmarked 17 nobj +
-| 19 | | _ | XP | 20 | pnct +
-| 20 | jernnæve _ | N | NC | gender=common<nowiki>|</nowiki>number=sing<nowiki>|</nowiki>case=unmarked<nowiki>|</nowiki>def=indef 18 nobj +
-| 21 | | _ | XP | _ | 20 | pnct _ | +
-| 22 | . | _ | X | XP | _ | 10 | pnct | _ | _ |+
  
 ==== Parsing ==== ==== Parsing ====

[ Back to the navigation ] [ Back to the content ]