From 7177a32d75fbc2badc0cb1fbe9bc806ca24dd52e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 10 Jun 2020 10:04:00 +0200 Subject: [PATCH 1/2] import of invisible stores --- .../raw/AbstractMdRecordToOafMapper.java | 13 ++++++---- .../raw/GenerateEntitiesApplication.java | 12 ++++++---- .../raw/MigrateMongoMdstoresApplication.java | 5 ++-- .../dhp/oa/graph/raw/OafToOafMapper.java | 4 ++-- .../dhp/oa/graph/raw/OdfToOafMapper.java | 4 ++-- .../oa/graph/raw_all/oozie_app/workflow.xml | 17 +++++++++++++ .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 24 ++++++++++++++++--- 7 files changed, 60 insertions(+), 19 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index ab1e89187..fc77950d0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -63,6 +63,8 @@ public abstract class AbstractMdRecordToOafMapper { protected final VocabularyGroup vocs; + private final boolean invisible; + protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4"; protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3"; protected static final Qualifier ORCID_PID_TYPE = qualifier( @@ -85,8 +87,9 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier( "main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title"); - protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs) { + protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible) { this.vocs = vocs; + this.invisible = invisible; } public List processMdRecord(final String xml) { @@ -112,7 +115,7 @@ public abstract class AbstractMdRecordToOafMapper { return null; } - final DataInfo info = prepareDataInfo(doc); + final DataInfo info = prepareDataInfo(doc, invisible); final long lastUpdateTimestamp = new Date().getTime(); return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp); @@ -510,11 +513,11 @@ public abstract class AbstractMdRecordToOafMapper { return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate); } - protected DataInfo prepareDataInfo(final Document doc) { + protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) { final Node n = doc.selectSingleNode("//oaf:datainfo"); if (n == null) { - return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); + return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9"); } final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); @@ -528,7 +531,7 @@ public abstract class AbstractMdRecordToOafMapper { final String trust = n.valueOf("./oaf:trust"); return dataInfo( - deletedbyinference, inferenceprovenance, inferred, false, + deletedbyinference, inferenceprovenance, inferred, invisible, qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 8e5ba9cd1..4262732a5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -137,10 +137,14 @@ public class GenerateEntitiesApplication { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { - case "native_oaf": - return new OafToOafMapper(vocs).processMdRecord(s); - case "native_odf": - return new OdfToOafMapper(vocs).processMdRecord(s); + case "oaf-store-cleaned": + return new OafToOafMapper(vocs, false).processMdRecord(s); + case "odf-store-cleaned": + return new OdfToOafMapper(vocs, false).processMdRecord(s); + case "oaf-store-intersection": + return new OafToOafMapper(vocs, true).processMdRecord(s); + case "odf-store-intersection": + return new OdfToOafMapper(vocs, true).processMdRecord(s); case "datasource": return Arrays.asList(convertFromJson(s, Datasource.class)); case "organization": diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java index 00c1dc4bb..e7703bf72 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java @@ -26,8 +26,7 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio IOUtils .toString( MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/migrate_mongo_mstores_parameters.json"))); + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_mongo_mstores_parameters.json"))); parser.parseArgument(args); final String mongoBaseUrl = parser.get("mongoBaseUrl"); @@ -60,7 +59,7 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio final String currentColl = entry.getValue(); for (final String xml : mdstoreClient.listRecords(currentColl)) { - emit(xml, "native_" + format); + emit(xml, String.format("%s-%s-%s", format, layout, interpretation)); } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 6f91ce733..e44f830df 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -37,8 +37,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class OafToOafMapper extends AbstractMdRecordToOafMapper { - public OafToOafMapper(final VocabularyGroup vocs) { - super(vocs); + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible) { + super(vocs, invisible); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index bbd9442e1..e6a744fc0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -44,8 +44,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/"; - public OdfToOafMapper(final VocabularyGroup vocs) { - super(vocs); + public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible) { + super(vocs, invisible); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index c2bea9f8a..8c58bf39a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -210,6 +210,23 @@ --mdLayoutstore --mdInterpretationcleaned + + + + + + + + + + eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication + --hdfsPath${contentPath}/oaf_records_invisible + --mongoBaseUrl${mongoURL} + --mongoDb${mongoDb} + --mdFormatOAF + --mdLayoutstore + --mdInterpretationintersection + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index dad427ce4..9bd20303f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.anyString; @@ -55,7 +56,7 @@ public class MappersTest { final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); - final List list = new OafToOafMapper(vocs).processMdRecord(xml); + final List list = new OafToOafMapper(vocs, false).processMdRecord(xml); assertEquals(3, list.size()); assertTrue(list.get(0) instanceof Publication); @@ -69,6 +70,7 @@ public class MappersTest { assertValidId(p.getId()); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); + assertFalse(p.getDataInfo().getInvisible()); assertTrue(p.getAuthor().size() > 0); final Optional author = p @@ -134,11 +136,27 @@ public class MappersTest { // System.out.println(new ObjectMapper().writeValueAsString(r2)); } + @Test + void testPublicationInvisible() throws IOException { + + final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + + final List list = new OafToOafMapper(vocs, true).processMdRecord(xml); + + assertTrue(list.size() > 0); + assertTrue(list.get(0) instanceof Publication); + + final Publication p = (Publication) list.get(0); + + assertTrue(p.getDataInfo().getInvisible()); + + } + @Test void testDataset() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); - final List list = new OdfToOafMapper(vocs).processMdRecord(xml); + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); assertEquals(3, list.size()); assertTrue(list.get(0) instanceof Dataset); @@ -220,7 +238,7 @@ public class MappersTest { void testSoftware() throws IOException { final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); - final List list = new OdfToOafMapper(vocs).processMdRecord(xml); + final List list = new OdfToOafMapper(vocs, false).processMdRecord(xml); assertEquals(1, list.size()); assertTrue(list.get(0) instanceof Software); From c08e66e01e2c5ff0726e835f214699be8bb68ab5 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 10 Jun 2020 10:11:56 +0200 Subject: [PATCH 2/2] fixed a workflow parameter --- .../eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 8c58bf39a..9a7e36570 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -254,7 +254,7 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims + --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims,${contentPath}/oaf_records_invisible --targetPath${workingDir}/entities_claim --isLookupUrl${isLookupUrl}