1
0
Fork 0

Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop

This commit is contained in:
sandro 2020-10-20 16:09:49 +02:00
commit 271b4db450
4 changed files with 244 additions and 45 deletions

View File

@ -304,9 +304,40 @@ public class MappersTest {
assertValidId(d.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
assertEquals(1, d.getAuthor().size());
assertEquals(0, d.getSubject().size());
assertEquals(1, d.getSubject().size());
assertEquals(1, d.getInstance().size());
assertEquals(1, d.getPid().size());
assertNotNull(d.getInstance().get(0).getUrl());
}
@Test
void testClaimFromCrossref() throws IOException {
final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml"));
final List<Oaf> list = new OafToOafMapper(vocs, false).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
final Publication p = (Publication) list.get(0);
assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
System.out.println(p.getTitle().get(0).getValue());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
}
@Test
void testODFRecord() throws IOException {
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml"));
List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(list));
System.out.println("***************");
final Dataset p = (Dataset) list.get(0);
assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
System.out.println(p.getTitle().get(0).getValue());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
}
private void assertValidId(final String id) {

View File

@ -0,0 +1,68 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header xmlns="http://namespace.openaire.eu/">
<dri:objIdentifier>userclaim___::7f0f7807f17db50e5c2b5c452ccaf06d</dri:objIdentifier>
<dri:recordIdentifier>userclaim___::7f0f7807f17db50e5c2b5c452ccaf06d</dri:recordIdentifier>
<dri:dateOfCollection>2020-08-06T07:04:09.62Z</dri:dateOfCollection>
<dri:mdFormat/>
<dri:mdFormatInterpretation/>
<dri:repositoryId/>
<dr:objectIdentifier/>
<dr:dateOfCollection/>
<dr:dateOfTransformation>2020-08-06T07:20:57.911Z</dr:dateOfTransformation>
<oaf:datasourceprefix>openaire____</oaf:datasourceprefix>
</header>
<metadata xmlns="http://namespace.openaire.eu/">
<dc:title>A case report of serious haemolysis in a glucose-6-phosphate dehydrogenase-deficient COVID-19 patient receiving hydroxychloroquine</dc:title>
<dc:creator>Maillart, E.</dc:creator>
<dc:creator>Leemans, S.</dc:creator>
<dc:creator>Van Noten, H.</dc:creator>
<dc:creator>Vandergraesen, T.</dc:creator>
<dc:creator>Mahadeb, B.</dc:creator>
<dc:creator>Salaouatchi, M. T.</dc:creator>
<dc:creator>De Bels, D.</dc:creator>
<dc:creator>Clevenbergh, P.</dc:creator>
<dc:date/>
<dc:identifier>http://dx.doi.org/10.1080/23744235.2020.1774644</dc:identifier>
<dc:language/>
<dc:publisher>Informa UK Limited</dc:publisher>
<dc:source>Crossref</dc:source>
<dc:source>Infectious Diseases</dc:source>
<dc:subject>Microbiology (medical)</dc:subject>
<dc:subject>General Immunology and Microbiology</dc:subject>
<dc:subject>Infectious Diseases</dc:subject>
<dc:subject>General Medicine</dc:subject>
<dc:type>journal-article</dc:type>
<dr:CobjCategory type="publication">0001</dr:CobjCategory>
<oaf:dateAccepted>2020-06-04</oaf:dateAccepted>
<oaf:projectid/>
<oaf:accessrights>UNKNOWN</oaf:accessrights>
<oaf:hostedBy
id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18" name="Unknown Repository"/>
<oaf:collectedFrom id="openaire____::crossref" name="Crossref"/>
<oaf:identifier identifierType="doi">10.1080/23744235.2020.1774644</oaf:identifier>
<oaf:journal eissn="2374-4243" ep="3" iss="" issn="2374-4235" sp="1" vol="">Infectious Diseases</oaf:journal>
</metadata>
<about xmlns:oai="http://www.openarchives.org/OAI/2.0/">
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
<originDescription altered="true" harvestDate="2020-08-06T07:04:09.62Z">
<baseURL>file%3A%2F%2F%2Fsrv%2Fclaims%2Frecords%2Fpublication%2Fcrossref</baseURL>
<identifier/>
<datestamp/>
<metadataNamespace/>
</originDescription>
</provenance>
<oaf:datainfo>
<oaf:inferred>false</oaf:inferred>
<oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.9</oaf:trust>
<oaf:inferenceprovenance/>
<oaf:provenanceaction classid="user:claim" classname="user:claim"
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
</oaf:datainfo>
</about>
</record>

View File

@ -1,77 +1,75 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<oai:header xmlns="http://namespace.openaire.eu/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<dri:objIdentifier>r3f5b9831893::cca7367159bc3ff90cd2f75bf9dc21c4</dri:objIdentifier>
<dri:recordIdentifier>oai:nakala.fr:hdl_11280_847e01df</dri:recordIdentifier>
<dri:dateOfCollection>2020-08-01T00:16:24.742Z</dri:dateOfCollection>
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<dri:objIdentifier>r3f5b9831893::01a497c6c6b44289c52dcdf22b6c0fc0</dri:objIdentifier>
<dri:recordIdentifier>oai:nakala.fr:hdl_11280_50f302c6</dri:recordIdentifier>
<dri:dateOfCollection>2020-10-03T06:06:52.228Z</dri:dateOfCollection>
<oaf:datasourceprefix>r3f5b9831893</oaf:datasourceprefix>
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:nakala.fr:hdl_11280_847e01df</identifier>
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2020-06-08T01:01:38Z</datestamp>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11280_2b09fc10</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11280_c1bc48d0</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11280_57c8db3a</setSpec>
<dr:dateOfTransformation>2020-08-01T00:31:35.625Z</dr:dateOfTransformation>
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:nakala.fr:hdl_11280_50f302c6</identifier>
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2020-09-19T23:56:08Z</datestamp>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11280_96355742</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11280_26914437</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11280_86561837</setSpec>
<dr:dateOfTransformation>2020-10-19T15:39:52.151Z</dr:dateOfTransformation>
</oai:header>
<metadata>
<datacite:resource xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 https://schema.datacite.org/meta/kernel-4/metadata.xsd">
<datacite:alternateIdentifier identifierType="URL" xmlns:datacite="http://datacite.org/schema/kernel-4/">277</datacite:alternateIdentifier>
<datacite:identifier identifierType="Handle" xmlns:datacite="http://datacite.org/schema/kernel-4/">http://hdl.handle.net/11280/847e01df</datacite:identifier>
<alternateIdentifiers>
<alternateIdentifier alternateIdentifierType="URL">http://hdl.handle.net/http://hdl.handle.net/11280/847e01df</alternateIdentifier>
</alternateIdentifiers>
<datacite:alternateIdentifier identifierType="URL" xmlns:datacite="http://datacite.org/schema/kernel-4/">http://nakala.fr/data/11280/847e01df</datacite:alternateIdentifier>
<datacite:creators xmlns:datacite="http://datacite.org/schema/kernel-4/">
<alternateIdentifier alternateIdentifierType="URL" xmlns="http://datacite.org/schema/kernel-4">http://nakala.fr/data/11280/50f302c6</alternateIdentifier>
<identifier identifierType="Handle" xmlns="http://datacite.org/schema/kernel-4">11280/50f302c6</identifier>
<datacite:creators>
<datacite:creator>
<datacite:creatorName>DHAAP</datacite:creatorName>
<datacite:creatorName>Desbrosse, Xavier</datacite:creatorName>
</datacite:creator>
</datacite:creators>
<datacite:titles xmlns:datacite="http://datacite.org/schema/kernel-4/">
<datacite:title>CVP_Notice277-1 place du Docteur Antoine Béclère _PHO02.jpg</datacite:title>
<datacite:titles>
<datacite:title>Les rues Stalingrad en France (1945-2013)</datacite:title>
</datacite:titles>
<datacite:descriptions xmlns:datacite="http://datacite.org/schema/kernel-4/">
<datacite:description descriptionType="Abstract">Hôpital Saint-Antoine. Fragment de dalle funéraire trouvée en décembre 1932. Paris (XIIème arr.). Photographie d'Albert Citerne (1876-1970). Plaque de verre, 1932. Département Histoire de l'Architecture et Archéologie de Paris.</datacite:description>
<datacite:description descriptionType="Abstract">Nfa_1146</datacite:description>
<datacite:description descriptionType="Abstract">Hôpital Saint-Antoine. Fragment de dalle funéraire trouvée en décembre 1932. Paris (XIIème arr.). Photographie d'Albert Citerne (1876-1970). Plaque de verre, 1932. Département Histoire de l'Architecture et Archéologie de Paris.</datacite:description>
<datacite:subjects>
<datacite:subject>Rues Noms -- France</datacite:subject>
</datacite:subjects>
<datacite:descriptions>
<datacite:description descriptionType="Abstract">Cette carte appartient à la collection « Guerre froide vue den bas » élaborée dans le cadre de lenquête 2009-2013 du réseau des correspondants départementaux de lIHTP « La Guerre froide vue den bas : 1947-1967 », enquête conduite sous la direction de Philippe Buton Professeur dHistoire contemporaine à lUniversité de Reims, dOlivier Büttner Ingénieur de Recherche IHTP-CNRS et de Michel Hastings, Professeur de Science politique à lInstitut dEtudes Politiques de Lille.</datacite:description>
</datacite:descriptions>
<datacite:publisher xmlns:datacite="http://datacite.org/schema/kernel-4/">Nakala by Huma-Num</datacite:publisher>
<datacite:contributors xmlns:datacite="http://datacite.org/schema/kernel-4/">
<datacite:publisher>IHTP-CNRS</datacite:publisher>
<datacite:contributors>
<datacite:contributor contributorType="Other">
<datacite:contributorName>DHAAP, Pôle Archéologique</datacite:contributorName>
<datacite:contributorName>(CNRS), Institut d'Histoire du Temps Présent (IHTP) - Centre National de la Recherche Scientifique </datacite:contributorName>
</datacite:contributor>
</datacite:contributors>
<datacite:dates xmlns:datacite="http://datacite.org/schema/kernel-4/">
<datacite:date dateType="Created">1932</datacite:date>
<datacite:dates>
<datacite:date dateType="Created">2013</datacite:date>
</datacite:dates>
<datacite:resourceType resourceTypeGeneral="Image" xmlns:datacite="http://datacite.org/schema/kernel-4/">StillImage</datacite:resourceType>
<datacite:rightsList xmlns:datacite="http://datacite.org/schema/kernel-4/">
<datacite:rights rightsURI="info:eu-repo/semantics/openAccess"/>
</datacite:rightsList>
<datacite:resourceType resourceTypeGeneral="Image">Carte</datacite:resourceType>
<datacite:geoLocations>
<datacite:geoLocation>
<datacite:geoLocationPlace>France</datacite:geoLocationPlace>
</datacite:geoLocation>
</datacite:geoLocations>
</datacite:resource>
<oaf:identifier identifierType="handle">http://hdl.handle.net/11280/847e01df</oaf:identifier>
<oaf:identifier identifierType="handle">11280/50f302c6</oaf:identifier>
<oaf:concept id="dariah"/>
<dr:CobjCategory type="dataset">0025</dr:CobjCategory>
<oaf:dateAccepted/>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:accessrights>UNKNOWN</oaf:accessrights>
<oaf:language>und</oaf:language>
<oaf:hostedBy id="re3data_____::r3d100012102" name="NAKALA"/>
<oaf:collectedFrom id="re3data_____::r3d100012102" name="NAKALA"/>
</metadata>
<about xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
<originDescription altered="true" harvestDate="2020-08-01T00:16:24.742Z">
<baseURL>https%3A%2F%2Fwww.nakala.fr%2Foai_oa%2F11280%2F8892ab4b</baseURL>
<identifier>oai:nakala.fr:hdl_11280_847e01df</identifier>
<datestamp>2020-06-08T01:01:38Z</datestamp>
<originDescription altered="true" harvestDate="2020-10-03T06:06:52.228Z">
<baseURL>https%3A%2F%2Fwww.nakala.fr%2Foai_oa%2F11280%2F92c4d30b</baseURL>
<identifier>oai:nakala.fr:hdl_11280_50f302c6</identifier>
<datestamp>2020-09-19T23:56:08Z</datestamp>
<metadataNamespace/>
</originDescription>
</provenance>

View File

@ -0,0 +1,102 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<oai:header xmlns="http://namespace.openaire.eu/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<dri:objIdentifier>r3a507cdacc5::03b31980d9bb3c4609e6005c4a3baba6</dri:objIdentifier>
<dri:recordIdentifier>oai:lindat.mff.cuni.cz:11372/LRT-1844</dri:recordIdentifier>
<dri:dateOfCollection>2020-09-04T14:36:48.411Z</dri:dateOfCollection>
<oaf:datasourceprefix>r3a507cdacc5</oaf:datasourceprefix>
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:lindat.mff.cuni.cz:11372/LRT-1844</identifier>
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2016-12-07T11:10:30Z</datestamp>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11858_00-097C-0000-0007-710A-A</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_11858_00-097C-0000-0007-710B-8</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">openaire_data</setSpec>
<dr:dateOfTransformation>2020-09-04T14:39:16.458Z</dr:dateOfTransformation>
</oai:header>
<metadata>
<resource xmlns="http://datacite.org/schema/kernel-4"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<identifier identifierType="Handle">11372/LRT-1844</identifier>
<alternateIdentifiers>
<alternateIdentifier alternateIdentifierType="URL">http://hdl.handle.net/11372/LRT-1844</alternateIdentifier>
</alternateIdentifiers>
<creators>
<creator>
<creatorName>Hercig, Tomáš</creatorName>
</creator>
<creator>
<creatorName>Brychcín, Tomáš</creatorName>
</creator>
<creator>
<creatorName>Svoboda, Lukáš</creatorName>
</creator>
<creator>
<creatorName>Konkol, Michal</creatorName>
</creator>
<creator>
<creatorName>Steinberger, Josef</creatorName>
</creator>
</creators>
<titles>
<title>Restaurant Reviews CZ ABSA corpus v2</title>
</titles>
<publisher>University of West Bohemia, Department of Computer Science and Engineering</publisher>
<publicationYear>2016</publicationYear>
<contributors>
<contributor contributorType="Funder">
<contributorName>European Commission</contributorName>
<nameIdentifier nameIdentifierScheme="info">info:eu-repo/grantAgreement/EC/FP7/630786</nameIdentifier>
</contributor>
</contributors>
<dates>
<date dateType="Issued">2016</date>
<date dateType="Accepted">2016-12-07T11:10:30Z</date>
<date dateType="Available">2016-12-07T11:10:30Z</date>
</dates>
<resourceType resourceTypeGeneral="Dataset">corpus</resourceType>
<rightsList>
<rights rightsURI="info:eu-repo/semantics/openAccess"/>
<rights rightsURI="http://creativecommons.org/licenses/by-nc-sa/4.0/"/>
</rightsList>
<descriptions>
<description descriptionType="Abstract">Restaurant Reviews CZ ABSA - 2.15k reviews with their related target and category
The work done is described in the paper: https://doi.org/10.13053/CyS-20-3-2469</description>
</descriptions>
</resource>
<oaf:identifier identifierType="handle">11372/LRT-1844</oaf:identifier>
<oaf:embargoenddate>2016-12-07</oaf:embargoenddate>
<dr:CobjCategory type="dataset">0021</dr:CobjCategory>
<oaf:dateAccepted>2016-01-01</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:license>http://creativecommons.org/licenses/by-nc-sa/4.0/</oaf:license>
<oaf:language>und</oaf:language>
<oaf:projectid>corda_______::630786</oaf:projectid>
<oaf:hostedBy id="re3data_____::r3d100010386" name="LINDAT/CLARIN repository"/>
<oaf:collectedFrom id="re3data_____::r3d100010386" name="LINDAT/CLARIN repository"/>
</metadata>
<about xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance">
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
<originDescription altered="true" harvestDate="2020-09-04T14:36:48.411Z">
<baseURL>https%3A%2F%2Flindat.mff.cuni.cz%2Frepository%2Foai%2Fopenaire_data</baseURL>
<identifier>oai:lindat.mff.cuni.cz:11372/LRT-1844</identifier>
<datestamp>2016-12-07T11:10:30Z</datestamp>
<metadataNamespace/>
</originDescription>
</provenance>
<oaf:datainfo>
<oaf:inferred>false</oaf:inferred>
<oaf:deletedbyinference>false</oaf:deletedbyinference>
<oaf:trust>0.9</oaf:trust>
<oaf:inferenceprovenance/>
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
classname="sysimport:crosswalk:datasetarchive"
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
</oaf:datainfo>
</about>
</record>