test records for Solr indexing

This commit is contained in:
Alessia Bardi 2023-06-06 14:34:33 +02:00
parent 654ffcba60
commit 5befd93d7d
4 changed files with 236 additions and 0 deletions

View File

@ -1023,6 +1023,23 @@ class MappersTest {
}
@Test
void testLeiden() throws IOException {
final String xml = IOUtils
.toString(Objects.requireNonNull(getClass().getResourceAsStream("leiden.xml")));
final List<Oaf> actual = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
assertNotNull(actual);
assertFalse(actual.isEmpty());
System.out.println("***************");
System.out.println(new ObjectMapper().writeValueAsString(actual));
System.out.println("***************");
final Publication d = (Publication) actual.get(0);
assertValidId(d.getId());
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
assertTrue(StringUtils.isNotBlank(d.getInstance().get(0).getUrl().get(0)));
}
private void assertValidId(final String id) {
// System.out.println(id);

View File

@ -0,0 +1,75 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns:oaire="http://namespace.openaire.eu/schema/oaire/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<header xmlns="HTTP://www.openarchives.org/OAI/2.0/">
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:scholarlypublications.universiteitleiden.nl:item_2870593</identifier>
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2021-11-03T14:09:07Z</datestamp>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_1887_4540</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_1887_4539</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_1887_26883</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">hdl_1887_20765</setSpec>
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">open_access</setSpec>
<dr:dateOfTransformation>2023-05-18T01:24:03.623Z</dr:dateOfTransformation>
<dri:objIdentifier>od_______202::0032acf47e4939f8ae28554dfd1240de</dri:objIdentifier>
<dri:recordIdentifier>1887/9526</dri:recordIdentifier>
<dri:dateOfCollection>2023-03-17T13:30:02.026+01:00</dri:dateOfCollection>
<oaf:datasourceprefix>od_______202</oaf:datasourceprefix>
</header>
<metadata>
<datacite:resource>
<datacite:identifier identifierType="Handle">1887/9526</datacite:identifier>
<datacite:alternateIdentifiers>
<datacite:alternateIdentifier alternateIdentifierType="URN">urn:nbn:nl:ui:26-1887/9526</datacite:alternateIdentifier>
<datacite:alternateIdentifier alternateIdentifierType="lucris-id">22146427</datacite:alternateIdentifier>
</datacite:alternateIdentifiers>
<datacite:relatedIdentifiers/>
<datacite:resourceType xs:anyURI="http://purl.org/coar/resource_type/c_3e5a">contribution to journal</datacite:resourceType>
<datacite:version>NA</datacite:version>
<datacite:rightsList>
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_abf2">open access</datacite:rights>
<datacite:rights rightsURI="https://hdl.handle.net/1887/license:3">Leiden University Non-exclusive license</datacite:rights>
</datacite:rightsList>
<datacite:titles>
<datacite:title>Tweesporenbeleid bij ontwikkeling Afrikaanse curricula: Afrika-Studiecentrum reageert op Peter Crossman</datacite:title>
</datacite:titles>
<datacite:language>nl</datacite:language>
<datacite:formats>
<datacite:format>application/pdf</datacite:format>
</datacite:formats>
<datacite:creators>
<datacite:creator>
<datacite:creatorName nameType="Personal">Hesseling, G.S.C.M.</datacite:creatorName>
<datacite:givenName>G.S.C.M.</datacite:givenName>
<datacite:familyName>Hesseling</datacite:familyName>
<datacite:nameIdentifier nameIdentifierScheme="DAI"
schemeURI="http://purl.org/info:eu-repo/dai/nl">069053138</datacite:nameIdentifier>
<datacite:affiliation>Afrika Studiecentrum</datacite:affiliation>
</datacite:creator>
</datacite:creators>
<datacite:dates>
<datacite:date dateType="Issued">1999</datacite:date>
</datacite:dates>
<datacite:subjects>
<datacite:subject>Africa</datacite:subject>
<datacite:subject>universities</datacite:subject>
</datacite:subjects>
</datacite:resource>
<oaf:identifier identifierType="Handle">1887/9526</oaf:identifier>
<oaf:identifier identifierType="URN">urn:nbn:nl:ui:26-1887/9526</oaf:identifier>
<dr:CobjCategory type="publication">0038</dr:CobjCategory>
<oaf:dateAccepted>1999-01-01</oaf:dateAccepted>
<oaf:accessrights>OPEN</oaf:accessrights>
<oaf:language>dut/nld</oaf:language>
<oaf:hostedBy name="Leiden University Scholarly Publications Repository"
id="opendoar____::202"/>
<oaf:collectedFrom name="Leiden University Scholarly Publications Repository"
id="opendoar____::202"/>
</metadata>
</record>

View File

@ -128,6 +128,12 @@ public class IndexRecordTransformerTest {
testRecordTransformation(record);
}
@Test
public void testForEOSCFutureSentinel() throws IOException, TransformerException {
final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/sentinel.xml"));
testRecordTransformation(record);
}
@Test
public void testForEdithDemo() throws IOException, TransformerException {
final String record = IOUtils.toString(getClass().getResourceAsStream("edith-demo/10.1098-rsta.2020.0257.xml"));

View File

@ -0,0 +1,138 @@
<record>
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header>
<dri:objIdentifier>doi_dedup___::10a910f4a66b7f4bce8407d7a486a80a</dri:objIdentifier>
<dri:dateOfCollection>2023-04-05T00:36:27+0000</dri:dateOfCollection>
<dri:dateOfTransformation>2023-04-05T07:33:52.185Z</dri:dateOfTransformation>
</header>
<metadata>
<oaf:entity xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://namespace.openaire.eu/oaf http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.2/oaf-0.2.xsd">
<oaf:result>
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<originalId>50|datacite____::10a910f4a66b7f4bce8407d7a486a80a</originalId>
<originalId>10.5281/zenodo.6967373</originalId>
<originalId>50|datacite____::172969c66c312a9656fc745f0ec62ce5</originalId>
<originalId>10.5281/zenodo.6969999</originalId>
<originalId>50|datacite____::4fa8f1c89ff11e8e99f9ded870ade80d</originalId>
<originalId>10.5281/zenodo.6967372</originalId>
<originalId>50|datacite____::a466b6173773d742b7a5881682748a8c</originalId>
<originalId>10.5281/zenodo.6970067</originalId>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6967373</pid>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6969999</pid>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6967372</pid>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6970067</pid>
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia</title>
<bestaccessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
<creator rank="1">Marasco Pier Lorenzo</creator>
<dateofacceptance>2022-08-05</dateofacceptance>
<description>Sentinel-3 NDVI Analysis Ready Data (ARD) (C_GLS_NDVI_20220101_20220701_Lombardia_S3_2.nc) product provided by the Copernicus Global Land Service [3]. The file C_GLS_NDVI_20220101_20220701_Lombardia_S3_2_masked.nc is derived from C_GLS_NDVI_20220101_20220701_Lombardia_S3_2.nc but values have been scaled (raw_value * ( 1/250) - 0.08) and values lower then -0.08 and greater than 0.92 have been removed (set to missing values). The original dataset can also be discovered through the OpenEO API[5] from the CGLS distributor VITO [4]. Access is free of charge but an EGI registration is needed. The file called Italy.geojson has been created using the Global Administrative Unit Layers GAUL G2015_2014 provided by FAO-UN (see Documentation). It only contains information related to Italy. Further info about drought indexes can be found in the Integrated Drought Management Programme [5] [1] Application of vegetation index and brightness temperature for drought detection [2] NDVI [3] Copernicus Global Land Service [4] Vito [5] OpenEO [5] Integrated Drought Management</description>
<description>These datasets are used for training purposes. See https://pangeo-data.github.io/foss4g-2022/intro.html</description>
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">NDVI</subject>
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">vegetaion</subject>
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">Copernicus Global Land Service</subject>
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">pangeo</subject>
<language classid="eng" classname="English" schemeid="dnet:languages" schemename="dnet:languages" />
<relevantdate classid="issued" classname="issued" schemeid="dnet:dataCite_date" schemename="dnet:dataCite_date">2022-08-05</relevantdate>
<publisher>Zenodo</publisher>
<resulttype classid="dataset" classname="dataset" schemeid="dnet:result_typologies" schemename="dnet:result_typologies" />
<resourcetype classid="UNKNOWN" classname="Unknown" schemeid="dnet:dataCite_resource" schemename="dnet:dataCite_resource" />
<eoscifguidelines code="EOSC::Jupyter Notebook" label="EOSC::Jupyter Notebook" semanticrelation="compliesWith" />
<datainfo>
<inferred>true</inferred>
<deletedbyinference>false</deletedbyinference>
<trust>0.8</trust>
<inferenceprovenance>dedup-result-decisiontree-v3</inferenceprovenance>
<provenanceaction classid="sysimport:dedup" classname="Inferred by OpenAIRE" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions" />
</datainfo>
<rels></rels>
<children>
<result objidentifier="doi_________::4fa8f1c89ff11e8e99f9ded870ade80d">
<publisher>Zenodo</publisher>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6967372</pid>
<dateofacceptance>2022-08-05</dateofacceptance>
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia</title>
</result>
<result objidentifier="doi_________::a466b6173773d742b7a5881682748a8c">
<publisher>Zenodo</publisher>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6970067</pid>
<dateofacceptance>2022-08-05</dateofacceptance>
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia</title>
</result>
<result objidentifier="doi_________::172969c66c312a9656fc745f0ec62ce5">
<publisher>Zenodo</publisher>
<dateofacceptance>2022-08-05</dateofacceptance>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6969999</pid>
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia</title>
</result>
<result objidentifier="doi_________::10a910f4a66b7f4bce8407d7a486a80a">
<publisher>Zenodo</publisher>
<dateofacceptance>2022-08-05</dateofacceptance>
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia</title>
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6967373</pid>
</result>
<instance>
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69" />
<dateofacceptance>2022-08-05</dateofacceptance>
<instancetype classid="0021" classname="Dataset" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" />
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6967373</pid>
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels" schemename="dnet:review_levels" />
<license>https://creativecommons.org/licenses/by/4.0/legalcode</license>
<webresource>
<url>https://doi.org/10.5281/zenodo.6967373</url>
</webresource>
</instance>
<instance>
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69" />
<dateofacceptance>2022-08-05</dateofacceptance>
<instancetype classid="0021" classname="Dataset" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" />
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6970067</pid>
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels" schemename="dnet:review_levels" />
<license>https://creativecommons.org/licenses/by/4.0/legalcode</license>
<webresource>
<url>https://doi.org/10.5281/zenodo.6970067</url>
</webresource>
</instance>
<instance>
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69" />
<dateofacceptance>2022-08-05</dateofacceptance>
<instancetype classid="0021" classname="Dataset" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" />
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6969999</pid>
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels" schemename="dnet:review_levels" />
<license>https://creativecommons.org/licenses/by/4.0/legalcode</license>
<webresource>
<url>https://doi.org/10.5281/zenodo.6969999</url>
</webresource>
</instance>
<instance>
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" />
<collectedfrom name="Datacite" id="openaire____::9e3be59865b2c1c335d32dae2fe7b254" />
<hostedby name="ZENODO" id="opendoar____::358aee4cc897452c00244351e4d91f69" />
<dateofacceptance>2022-08-05</dateofacceptance>
<instancetype classid="0021" classname="Dataset" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" />
<pid classid="doi" classname="Digital Object Identifier" schemeid="dnet:pid_types" schemename="dnet:pid_types" inferred="false" provenanceaction="sysimport:actionset" trust="0.9">10.5281/zenodo.6967372</pid>
<refereed classid="0000" classname="UNKNOWN" schemeid="dnet:review_levels" schemename="dnet:review_levels" />
<license>https://creativecommons.org/licenses/by/4.0/legalcode</license>
<webresource>
<url>https://doi.org/10.5281/zenodo.6967372</url>
</webresource>
</instance>
</children>
</oaf:result>
</oaf:entity>
</metadata>
</result>
</record>