Dataset Open Access

INEL Kamas Corpus

Gusev, Valentin; Klooster, Tiina; Wagner-Nagy, Beáta


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nmm##2200000uu#4500</leader>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="o">oai:fdr.uni-hamburg.de:13882</subfield>
    <subfield code="p">user-inel</subfield>
    <subfield code="p">user-adwhh</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="a">11022/0000-0007-FC25-4</subfield>
    <subfield code="i">isCitedBy</subfield>
    <subfield code="n">handle</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="a">10.25592/uhhfdm.9740</subfield>
    <subfield code="i">isVersionOf</subfield>
    <subfield code="n">doi</subfield>
  </datafield>
  <controlfield tag="005">20250922131412.0</controlfield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution Non Commercial Share Alike 4.0 International</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2023-12-29</subfield>
  </datafield>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">xas</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">endangered language</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">indigenous language</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">L1 data</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">language contact</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">language documentation</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">INEL</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">folklore</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">narrative</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">monologue</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">annotated</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">morphological glossing</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">borrowings</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">code-switching</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">semantic roles</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">syntactic functions</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">information status</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">English translation</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">German translation</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Russian translation</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">INEL Kamas Corpus</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">229232</subfield>
    <subfield code="u">https://www.fdr.uni-hamburg.de/record/13882/files/kamas-2.0-documentation.pdf</subfield>
    <subfield code="z">md5:be551320e8e3f9f09ff95843c8da92d8</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">492709288</subfield>
    <subfield code="u">https://www.fdr.uni-hamburg.de/record/13882/files/kamas-2.0-mp3only.zip</subfield>
    <subfield code="z">md5:35631d0a5c5ecdb7f186829f5e87c6fd</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">84942035</subfield>
    <subfield code="u">https://www.fdr.uni-hamburg.de/record/13882/files/kamas-2.0-noaudio.zip</subfield>
    <subfield code="z">md5:145417dbd05f5304f9fc5a487352f95c</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">4100274255</subfield>
    <subfield code="u">https://www.fdr.uni-hamburg.de/record/13882/files/kamas-2.0.zip</subfield>
    <subfield code="z">md5:d09850583132ebe49983c98957c3c4cd</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;&lt;strong&gt;Corpus Citation&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;Gusev, Valentin; Klooster, Tiina; Wagner-Nagy, Be&amp;aacute;ta.&lt;/em&gt; 2023. &amp;ldquo;INEL Kamas Corpus.&amp;rdquo; Version 2.0. Publication date 2023-12-31. &lt;a href="http://hdl.handle.net/11022/0000-0007-FC25-4"&gt;http://hdl.handle.net/11022/0000-0007-FC25-4&lt;/a&gt;. Archived at Universit&amp;auml;t Hamburg. In: The INEL corpora of indigenous Northern Eurasian languages.&lt;a href="https://hdl.handle.net/11022/0000-0007-F45A-1"&gt;https://hdl.handle.net/11022/0000-0007-F45A-1&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Corpus Description&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;The INEL Kamas corpus has been created within the long-term INEL project (&amp;quot;Grammatical Descriptions, Corpora and Language Technology for Indigenous Northern Eurasian Languages&amp;quot;), 2016&amp;ndash;2033. The corpus makes possible typologically aware corpus-based grammatical research on the Kamas language and expands the documentation of the lesser described indigenous languages of Northern Eurasia.&lt;/p&gt;

&lt;p&gt;The INEL Kamas corpus consists of two parts: folklore texts collected by Kai Donner in 1912&amp;ndash;1914, and transcribed audio recordings of the last speaker of Kamas, Klavdiya Plotnikova, made between 1964 and 1970.&lt;/p&gt;

&lt;p&gt;Each text in the corpus is provided with morphological glossing, translation into English, Russian and German, as well as annotation of syntactic functions, semantic roles, Russian borrowings and code-switching. Some texts also have annotations for information status.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;New in release 2.0&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
	&lt;li&gt;In texts from Donner&amp;rsquo;s collection, phonetic transcription according to Klumpp|s edition of Donner&amp;rsquo;s manuscripts has been added&amp;nbsp;(as stl tier)&lt;/li&gt;
	&lt;li&gt;Five texts which were originally split between different tapes have been merged, as well as respective parts of recordings. Sentences in each resulting text are numbered throughout
	&lt;ul&gt;
		&lt;li&gt;PKZ_196X_Alenushka_flk + PKZ_196X_Alenushka_continuation_flk &amp;gt; PKZ_196X_Alenushka_flk&lt;/li&gt;
		&lt;li&gt;End of PKZ_196X_SU0226 starting from PKZ_196X_SU0226.203 (210) + PKZ_196X_Alenushka2_continuation_flk &amp;gt; PKZ_196X_Alenushka2_flk&lt;/li&gt;
		&lt;li&gt;PKZ_196X_BlacksmithAndMerchant_flk + PKZ_196X_BlacksmithAndMerchant_cont_flk &amp;gt; PKZ_196X_BlacksmithAndMerchant_flk&lt;/li&gt;
		&lt;li&gt;PKZ_196X_Finist_flk + PKZ_196X_Finist_continuation_flk&amp;nbsp;&amp;gt;&amp;nbsp;PKZ_196X_Finist_flk&lt;/li&gt;
		&lt;li&gt;PKZ_196X_StupidWolf_flk + PKZ_196X_StupidWolf_continuation_flk &amp;gt; PKZ_196X_StupidWolf_flk&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;Part of the texts are now annotated for existential, locative and possessive predication (ExLocPoss tier, by C.L.&amp;nbsp;D&amp;auml;britz)&lt;/li&gt;
	&lt;li&gt;Numerous corrections in glosses, other annotations and transcriptions, including:
	&lt;ul&gt;
		&lt;li&gt;Fuller and more consistent transcription, glossing and annotations of borrowings&lt;/li&gt;
		&lt;li&gt;Vowel length is marked in mp tier in &lt;em&gt;baːzoʔ&lt;/em&gt; &amp;lsquo;again&amp;rsquo;, &lt;em&gt;b&amp;uuml;ːzʼe&lt;/em&gt; &amp;lsquo;man&amp;rsquo; and &lt;em&gt;saːgər&lt;/em&gt; &amp;lsquo;black&amp;rsquo;&lt;/li&gt;
		&lt;li&gt;Corrections in disambiguation of polysemous or homonymous morphemes:&amp;nbsp;&lt;br&gt;
		-ziʔ&amp;nbsp;&amp;quot;INS&amp;quot;/&amp;quot;COM&amp;quot;, -də &amp;quot;LAT&amp;quot;/&amp;quot;3SG&amp;quot;, mo- &amp;quot;can/become/want | мочь/стать/хотеть&amp;quot;&lt;/li&gt;
		&lt;li&gt;Possessive suffix unmarked for case: &amp;quot;NOM/GEN/ACC&amp;quot; &amp;gt; &amp;quot;POSS&amp;quot;&lt;/li&gt;
		&lt;li&gt;Glosses for personal pronouns were changed to uniform labels: &amp;quot;I | я&amp;quot; &amp;gt; &amp;quot;PRO1SG&amp;quot;, &amp;quot;we | мы&amp;quot; &amp;gt; &amp;quot;PRO1PL&amp;quot;, &amp;quot;you | ты&amp;quot;&amp;nbsp;&amp;gt;&amp;nbsp;&amp;quot;PRO2SG&amp;quot;, &amp;quot;you.PL | вы&amp;quot; &amp;gt; &amp;quot;PRO2PL&amp;quot;&lt;/li&gt;
		&lt;li&gt;Fuller annotations of code-switching and calques (CS tier)&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;Added ELAN *.eaf as a supplementary end-user file format for all transcripts&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;&lt;strong&gt;Funding&lt;/strong&gt;&lt;/p&gt;

&lt;p&gt;The corpus has been produced in the context of the joint research funding of the German Federal Government and Federal States in the Academies&amp;rsquo; Programme, with funding from the Federal Ministry of Education and Research and the Free and Hanseatic City of Hamburg. The Academies&amp;rsquo; Programme is coordinated by the Union of the German Academies of Sciences and Humanities.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Contributions/Acknowledgements&lt;/strong&gt;&lt;/p&gt;

&lt;ul&gt;
	&lt;li&gt;
	&lt;p&gt;Recordings of Kamas speech made by Ago K&amp;uuml;nnap in Abalakovo and by Tiit-Rein Viitso in Tartu provided by the Archive of Estonian Dialects and Kindred Languages of the University of Tartu, Estonia (AEDKL, or T&amp;Uuml;EMSA).&lt;/p&gt;
	&lt;/li&gt;
	&lt;li&gt;
	&lt;p&gt;Recordings of Klavdiya Plotnikova made by Jaakko Yli-Paavola in Tallinn in 1970 provided by the Institute for the Languages of Finland archive, Helsinki (KOTUS).&lt;/p&gt;
	&lt;/li&gt;
	&lt;li&gt;
	&lt;p&gt;Scanned pages from the Kai Donners Kamassisches W&amp;ouml;rterbuch (Joki 1944) containing texts collected by Kai Donner published online courtesy of the Finno-Ugrian Society.&lt;/p&gt;
	&lt;/li&gt;
	&lt;li&gt;
	&lt;p&gt;The web-based search interface is using the Tsakonian Corpus platform developed by Dr. Timofey Arkhangelskiy.&lt;/p&gt;
	&lt;/li&gt;
&lt;/ul&gt;</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">dataset</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-adwhh</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-inel</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Klooster, Tiina</subfield>
    <subfield code="u">Universität Hamburg</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Wagner-Nagy, Beáta</subfield>
    <subfield code="u">Universität Hamburg</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Wagner-Nagy, Beata</subfield>
    <subfield code="u">Universität Hamburg</subfield>
    <subfield code="4">res</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Arkhipov, Alexandre</subfield>
    <subfield code="u">Universität Hamburg</subfield>
    <subfield code="4">res</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Gusev, Valentin</subfield>
    <subfield code="u">Universität Hamburg</subfield>
    <subfield code="4">res</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Klooster, Tiina</subfield>
    <subfield code="u">Universität Hamburg</subfield>
    <subfield code="4">res</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Ferger, Anne</subfield>
    <subfield code="u">Universität Hamburg</subfield>
    <subfield code="4">dtm</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Jettka, Daniel</subfield>
    <subfield code="u">Universität Hamburg</subfield>
    <subfield code="4">dtm</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="a">Lehmberg, Timm</subfield>
    <subfield code="u">Universität Hamburg</subfield>
    <subfield code="4">dtm</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.25592/uhhfdm.13882</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="a">Gusev, Valentin</subfield>
    <subfield code="u">Universität Hamburg</subfield>
  </datafield>
  <controlfield tag="001">13882</controlfield>
</record>

Cite record as