From b6a7ff3a99e0688f64e24e5f69c887245cece7bb Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 2 May 2022 15:52:33 +0200 Subject: [PATCH] EOSC Services - removed fields from mapping, testing preparation --- .../raw/MigrateDbEntitiesApplication.java | 13 ++-- .../oa/graph/raw_db/oozie_app/workflow.xml | 64 ++++++++++++++++++- .../dhp/oa/graph/sql/queryServices.sql | 1 - .../raw/MigrateDbEntitiesApplicationTest.java | 7 +- .../graph/raw/services_resultset_entry.json | 17 ----- .../dnetlib/dhp/oa/provision/datasource.json | 19 ------ 6 files changed, 68 insertions(+), 53 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 924d53593..c40f9b392 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -148,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i log.info("Processing Organizations..."); smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix); - log.info("Processing relationsNoRemoval ds <-> orgs ..."); + log.info("Processing relations services <-> orgs ..."); smdbe .execute( - "queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization, + "queryServiceOrganization.sql", smdbe::processServiceOrganization, verifyNamespacePrefix); log.info("Processing projects <-> orgs ..."); @@ -268,13 +268,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setOdpolicies(field(rs.getString("odpolicies"), info)); ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info)); ds.setLanguages(listValues(rs.getArray("languages"))); - ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info)); ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info)); ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info)); ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info)); ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info)); - ds.setDataprovider(field(rs.getBoolean("dataprovider"), info)); - ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info)); ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info)); ds.setDatauploadtype(field(rs.getString("datauploadtype"), info)); ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info)); @@ -293,10 +290,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i rs.getString("issnLinking"), info)); // Journal ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes"))); - ds.setProvidedproducttypes(listValues(rs.getArray("providedproducttypes"))); ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction"))); ds.setThematic(rs.getBoolean("thematic")); - ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl")); ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl")); @@ -434,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - public List processDatasourceOrganization(final ResultSet rs) { + public List processServiceOrganization(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); final String orgId = createOpenaireId(20, rs.getString("organization"), true); - final String dsId = createOpenaireId(10, rs.getString("datasource"), true); + final String dsId = createOpenaireId(10, rs.getString("service"), true); final List collectedFrom = listKeyValues( createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml index 52462adb4..31b726f39 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml @@ -30,6 +30,11 @@ a blacklist of nsprefixes (comma separeted) + + reuseContent + false + reuse content in the aggregator database + sparkDriverMemory memory for driver process @@ -85,12 +90,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('reuseContent') eq false} + ${wf:conf('reuseContent') eq true} + + + + @@ -125,6 +138,55 @@ --actionclaims --nsPrefixBlacklist${nsPrefixBlacklist} + + + + + + + yarn + cluster + GenerateEntities + eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePaths${contentPath}/db_records,${contentPath}/db_claims + --targetPath${workingDir}/entities + --isLookupUrl${isLookupUrl} + --shouldHashIdtrue + + + + + + + + yarn + cluster + GenerateGraph + eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=7680 + + --sourcePath${workingDir}/entities + --graphRawPath${workingDir}/graph_aggregator + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql index f83e077a3..eb70d39e0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryServices.sql @@ -109,7 +109,6 @@ SELECT d.lastconsenttermsofusedate AS lastconsenttermsofusedate, d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, d.thematic AS thematic, - -- REMOVED ???: d.knowledge_graph AS knowledgegraph, array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies, nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl, nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl, diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 69552c4dc..948dbfa50 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -129,9 +129,6 @@ public class MigrateDbEntitiesApplicationTest { getValueAsList("odlanguages", fields), ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList())); assertEquals(getValueAsList("languages", fields), ds.getLanguages()); - assertEquals( - getValueAsList("odcontenttypes", fields), - ds.getOdcontenttypes().stream().map(Field::getValue).collect(Collectors.toList())); assertEquals( getValueAsList("accessinfopackage", fields), ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList())); @@ -155,13 +152,11 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("certificates", fields), ds.getCertificates()); assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes()); - assertEquals(getValueAsList("providedproducttypes", fields), ds.getProvidedproducttypes()); assertEquals("National", ds.getJurisdiction().getClassid()); assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid()); assertTrue(ds.getThematic()); - assertTrue(ds.getKnowledgegraph()); HashSet cpSchemeId = ds .getContentpolicies() @@ -246,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest { public void testProcessDatasourceOrganization() throws Exception { final List fields = prepareMocks("datasourceorganization_resultset_entry.json"); - final List list = app.processDatasourceOrganization(rs); + final List list = app.processServiceOrganization(rs); assertEquals(2, list.size()); verifyMocks(fields); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json index 70fad3323..445334de5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/services_resultset_entry.json @@ -140,13 +140,6 @@ "Swedish" ] }, - { - "field": "odcontenttypes", - "type": "array", - "value": [ - "Journal articles" - ] - }, { "field": "accessinfopackage", "type": "array", @@ -169,16 +162,6 @@ "type": "string", "value": null }, - { - "field": "dataprovider", - "type": "boolean", - "value": null - }, - { - "field": "serviceprovider", - "type": "boolean", - "value": null - }, { "field": "databaseaccesstype", "type": "string", diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json index 8a23b4e6a..ce6b10826 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/datasource.json @@ -254,24 +254,6 @@ } ], "languages" : [ "English", "German" ], - "odcontenttypes": [ - { - "value": "Journal articles", - "dataInfo": { - "invisible": false, - "inferred": false, - "deletedbyinference": false, - "trust": "0.900", - "inferenceprovenance": null, - "provenanceaction": { - "classid": "sysimport:crosswalk:entityregistry", - "classname": "sysimport:crosswalk:entityregistry", - "schemeid": "dnet:provenanceActions", - "schemename": "dnet:provenanceActions" - } - } - } - ], "accessinfopackage": [ { "value": "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai", @@ -387,7 +369,6 @@ "schemename": "eosc:jurisdictions" }, "thematic": true, - "knowledgegraph": true, "contentpolicies": [ { "classid": "Journal article",