RegistriesOverlap/data/re3dataRecords/r3d100013391.xml

88 lines
7.0 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="utf-8"?>
<!--re3data.org Schema for the Description of Research Data Repositories. Version 2.2, December 2014. doi:10.2312/re3.006-->
<r3d:re3data xmlns:r3d="http://www.re3data.org/schema/2-2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.re3data.org/schema/2-2 http://schema.re3data.org/2-2/re3dataV2-2.xsd">
<r3d:repository>
<r3d:re3data.orgIdentifier>r3d100013391</r3d:re3data.orgIdentifier>
<r3d:repositoryName language="eng">The University of Pittsburgh English Language Institute Corpus</r3d:repositoryName>
<r3d:additionalName language="eng">PELIC</r3d:additionalName>
<r3d:repositoryURL>https://github.com/ELI-Data-Mining-Group/PELIC-dataset</r3d:repositoryURL>
<r3d:description language="eng">The University of Pittsburgh English Language Institute Corpus (PELIC) is a 4.2-million-word learner corpus of written texts. These texts were collected in an English for Academic Purposes (EAP) context over seven years in the University of Pittsburghs Intensive English Program, and were produced by over 1100 students with a wide range of linguistic backgrounds and proficiency levels. PELIC is longitudinal, offering greater opportunities for tracking development in a natural classroom setting.</r3d:description>
<r3d:repositoryContact>bnaismith@pitt.edu</r3d:repositoryContact>
<r3d:type>other</r3d:type>
<r3d:size updated="2020-08-21">4.2 million words</r3d:size>
<r3d:startDate>2020-08-17</r3d:startDate>
<r3d:endDate></r3d:endDate>
<r3d:repositoryLanguage>eng</r3d:repositoryLanguage>
<r3d:subject subjectScheme="DFG">1 Humanities and Social Sciences</r3d:subject>
<r3d:subject subjectScheme="DFG">104 Linguistics</r3d:subject>
<r3d:subject subjectScheme="DFG">10401 General and Applied Linguistics</r3d:subject>
<r3d:subject subjectScheme="DFG">11 Humanities</r3d:subject>
<r3d:missionStatementURL></r3d:missionStatementURL>
<r3d:contentType contentTypeScheme="parse">Archived data</r3d:contentType>
<r3d:contentType contentTypeScheme="parse">Images</r3d:contentType>
<r3d:contentType contentTypeScheme="parse">Source code</r3d:contentType>
<r3d:contentType contentTypeScheme="parse">Standard office documents</r3d:contentType>
<r3d:providerType>dataProvider</r3d:providerType>
<r3d:keyword>English for Academic Purposes</r3d:keyword>
<r3d:keyword>TESOL</r3d:keyword>
<r3d:keyword>learner corpus</r3d:keyword>
<r3d:keyword>second language acquisition</r3d:keyword>
<r3d:institution>
<r3d:institutionName language="eng">GitHub, Inc.</r3d:institutionName>
<r3d:institutionCountry>USA</r3d:institutionCountry>
<r3d:responsibilityType>technical</r3d:responsibilityType>
<r3d:institutionType>non-profit</r3d:institutionType>
<r3d:institutionURL>https://github.com/</r3d:institutionURL>
<r3d:responsibilityStartDate></r3d:responsibilityStartDate>
<r3d:responsibilityEndDate></r3d:responsibilityEndDate>
</r3d:institution>
<r3d:institution>
<r3d:institutionName language="eng">National Science Foundation</r3d:institutionName>
<r3d:institutionAdditionalName language="eng">NSF</r3d:institutionAdditionalName>
<r3d:institutionCountry>USA</r3d:institutionCountry>
<r3d:responsibilityType>funding</r3d:responsibilityType>
<r3d:institutionType>non-profit</r3d:institutionType>
<r3d:institutionURL>https://www.nsf.gov/</r3d:institutionURL>
<r3d:institutionIdentifier>ROR:021nxhr62</r3d:institutionIdentifier>
<r3d:responsibilityStartDate></r3d:responsibilityStartDate>
<r3d:responsibilityEndDate></r3d:responsibilityEndDate>
</r3d:institution>
<r3d:institution>
<r3d:institutionName language="eng">University of Pittsburgh, English Language Institute</r3d:institutionName>
<r3d:institutionCountry>USA</r3d:institutionCountry>
<r3d:responsibilityType>general</r3d:responsibilityType>
<r3d:institutionType>non-profit</r3d:institutionType>
<r3d:institutionURL>https://www.eli.pitt.edu</r3d:institutionURL>
<r3d:responsibilityStartDate></r3d:responsibilityStartDate>
<r3d:responsibilityEndDate></r3d:responsibilityEndDate>
</r3d:institution>
<r3d:databaseAccess>
<r3d:databaseAccessType>open</r3d:databaseAccessType>
</r3d:databaseAccess>
<r3d:databaseLicense>
<r3d:databaseLicenseName>CC</r3d:databaseLicenseName>
<r3d:databaseLicenseURL>https://creativecommons.org/licenses/by-nc-nd/4.0/</r3d:databaseLicenseURL>
</r3d:databaseLicense>
<r3d:dataAccess>
<r3d:dataAccessType>open</r3d:dataAccessType>
</r3d:dataAccess>
<r3d:dataLicense>
<r3d:dataLicenseName>CC</r3d:dataLicenseName>
<r3d:dataLicenseURL>https://creativecommons.org/licenses/by-nc-nd/4.0/</r3d:dataLicenseURL>
</r3d:dataLicense>
<r3d:dataUpload>
<r3d:dataUploadType>restricted</r3d:dataUploadType>
<r3d:dataUploadRestriction>other</r3d:dataUploadRestriction>
</r3d:dataUpload>
<r3d:software>
<r3d:softwareName>other</r3d:softwareName>
</r3d:software>
<r3d:versioning>yes</r3d:versioning>
<r3d:citationGuidelineURL>https://github.com/ELI-Data-Mining-Group/PELIC-dataset</r3d:citationGuidelineURL>
<r3d:enhancedPublication>yes</r3d:enhancedPublication>
<r3d:qualityManagement>unknown</r3d:qualityManagement>
<r3d:remarks>A small subset of these files which are annotated in CHAT/CLAN and a list of published research is available at Talkbank.org. https://slabank.talkbank.org/access/English/Vercellotti.html</r3d:remarks>
<r3d:entryDate>2020-08-20</r3d:entryDate>
<r3d:lastUpdate>2020-09-15</r3d:lastUpdate>
</r3d:repository>
</r3d:re3data>