From adb798faa5cb4c96f639fd0fde2c126cc0a9768b Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Fri, 29 May 2020 12:03:51 +0200 Subject: [PATCH 1/5] import from db using is vocabularies --- .../raw/GenerateEntitiesApplication.java | 40 +------------- .../raw/MigrateDbEntitiesApplication.java | 49 ++++++++++++----- .../oa/graph/raw/common/VocabularyGroup.java | 41 +++++++++++++- .../graph/migrate_db_entities_parameters.json | 6 +++ .../oa/graph/raw_all/oozie_app/workflow.xml | 1 + .../oa/graph/raw_step1/oozie_app/workflow.xml | 5 ++ .../graph/sql/queryDatasourceOrganization.sql | 27 +++++----- .../dhp/oa/graph/sql/queryDatasources.sql | 53 ++++++------------- .../dhp/oa/graph/sql/queryOrganizations.sql | 5 +- .../sql/queryOrganizationsFromOpenOrgsDB.sql | 8 +-- .../oa/graph/sql/queryProjectOrganization.sql | 4 +- .../dhp/oa/graph/sql/queryProjects.sql | 17 +++--- .../oa/graph/sql/queryProjects_production.sql | 23 +++----- .../raw/MigrateDbEntitiesApplicationTest.java | 36 ++++++++----- ...atasourceorganization_resultset_entry.json | 2 +- .../raw/datasources_resultset_entry.json | 8 +-- .../raw/organizations_resultset_entry.json | 4 +- .../projectorganization_resultset_entry.json | 4 +- .../graph/raw/projects_resultset_entry.json | 2 +- 19 files changed, 173 insertions(+), 162 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 3becdec448..d6ddc1fddb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -39,9 +39,6 @@ import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import scala.Tuple2; public class GenerateEntitiesApplication { @@ -68,13 +65,9 @@ public class GenerateEntitiesApplication { final String sourcePaths = parser.get("sourcePaths"); final String targetPath = parser.get("targetPath"); - // final String dbUrl = parser.get("postgresUrl"); - // final String dbUser = parser.get("postgresUser"); - // final String dbPassword = parser.get("postgresPassword"); + final String isLookupUrl = parser.get("islookup"); - final String isLookupUrl = parser.get("isLookupUrl"); - - final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc + final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl); final SparkConf conf = new SparkConf(); runWithSparkSession(conf, isSparkSessionManaged, spark -> { @@ -165,35 +158,6 @@ public class GenerateEntitiesApplication { } } - private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { - final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); - - final String xquery = IOUtils - .toString( - GenerateEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); - - final VocabularyGroup vocs = new VocabularyGroup(); - - for (final String s : isLookUpService.quickSearchProfile(xquery)) { - final String[] arr = s.split("@=@"); - if (arr.length == 4) { - final String vocId = arr[0].trim(); - final String vocName = arr[1].trim(); - final String termId = arr[2].trim(); - final String termName = arr[3].trim(); - - if (!vocs.vocabularyExists(vocId)) { - vocs.addVocabulary(vocId, vocName); - } - - vocs.addTerm(vocId, termId, termName); - } - } - - return vocs; - } - private static Oaf convertFromJson(final String s, final Class clazz) { try { return OBJECT_MAPPER.readValue(s, clazz); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 5b8296c19b..899ce6b948 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -10,7 +10,28 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; import java.io.Closeable; import java.io.IOException; @@ -32,6 +53,7 @@ import org.apache.commons.logging.LogFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -61,6 +83,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i private final long lastUpdateTimestamp; + private final VocabularyGroup vocs; + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -73,13 +97,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final String dbUrl = parser.get("postgresUrl"); final String dbUser = parser.get("postgresUser"); final String dbPassword = parser.get("postgresPassword"); + final String isLookupUrl = parser.get("islookup"); final String hdfsPath = parser.get("hdfsPath"); final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, - dbPassword)) { + dbPassword, isLookupUrl)) { if (processClaims) { log.info("Processing claims..."); smdbe.execute("queryClaims.sql", smdbe::processClaims); @@ -103,18 +128,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST + protected MigrateDbEntitiesApplication(final VocabularyGroup vocs) { // ONLY FOR UNIT TEST super(); this.dbClient = null; this.lastUpdateTimestamp = new Date().getTime(); + this.vocs = vocs; } public MigrateDbEntitiesApplication( - final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) + final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword, + final String isLookupUrl) throws Exception { super(hdfsPath); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.lastUpdateTimestamp = new Date().getTime(); + this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl); } public void execute(final String sqlFile, final Function> producer) @@ -453,12 +481,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final Boolean inferred = rs.getBoolean("inferred"); final String trust = rs.getString("trust"); return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - ENTITYREGISTRY_PROVENANCE_ACTION, - trust); + deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust); } private Qualifier prepareQualifierSplitting(final String s) { @@ -466,7 +489,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return null; } final String[] arr = s.split("@@@"); - return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; + return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null; } private List> prepareListFields(final Array array, final DataInfo info) { @@ -485,8 +508,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i if (parts.length == 2) { final String value = parts[0]; final String[] arr = parts[1].split("@@@"); - if (arr.length == 4) { - return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); + if (arr.length == 2) { + return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo); } } return null; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java index 127f73e220..9991fa4cf4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java @@ -1,13 +1,50 @@ package eu.dnetlib.dhp.oa.graph.raw.common; +import java.io.IOException; import java.util.HashMap; import java.util.Map; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; + +import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class VocabularyGroup { + public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { + final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); + + final String xquery = IOUtils + .toString( + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); + + final VocabularyGroup vocs = new VocabularyGroup(); + + for (final String s : isLookUpService.quickSearchProfile(xquery)) { + final String[] arr = s.split("@=@"); + if (arr.length == 4) { + final String vocId = arr[0].trim(); + final String vocName = arr[1].trim(); + final String termId = arr[2].trim(); + final String termName = arr[3].trim(); + + if (!vocs.vocabularyExists(vocId)) { + vocs.addVocabulary(vocId, vocName); + } + + vocs.addTerm(vocId, termId, termName); + } + } + + return vocs; + } + private final Map vocs = new HashMap<>(); public void addVocabulary(final String id, final String name) { @@ -29,7 +66,9 @@ public class VocabularyGroup { } public Qualifier getTermAsQualifier(final String vocId, final String id) { - if (termExists(vocId, id)) { + if (StringUtils.isBlank(id)) { + return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId); + } else if (termExists(vocId, id)) { final Vocabulary v = vocs.get(vocId.toLowerCase()); final VocabularyTerm t = v.getTerm(id); return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json index cb13ff0242..edecb97b72 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json @@ -28,5 +28,11 @@ "paramLongName": "action", "paramDescription": "process claims", "paramRequired": false + }, + { + "paramName": "islookup", + "paramLongName": "islookup", + "paramDescription": "the url of the ISLookupService", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index f8426c35f2..5eee923427 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -173,6 +173,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --islookup${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml index f16e22f957..8684181526 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml @@ -24,6 +24,10 @@ mongoDb mongo database + + isLookupUrl + the address of the lookUp service + sparkDriverMemory memory for driver process @@ -62,6 +66,7 @@ -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} + -islookup${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql index 745f83971a..687377aa48 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql @@ -1,17 +1,16 @@ SELECT - dor.datasource AS datasource, - dor.organization AS organization, - NULL AS startdate, - NULL AS enddate, - false AS inferred, - false AS deletedbyinference, - 0.9 AS trust, - NULL AS inferenceprovenance, - dc.id AS collectedfromid, - dc.officialname AS collectedfromname, - 'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics, - d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction - + dor.datasource AS datasource, + dor.organization AS organization, + NULL AS startdate, + NULL AS enddate, + false AS inferred, + false AS deletedbyinference, + 0.9 AS trust, + NULL AS inferenceprovenance, + dc.id AS collectedfromid, + dc.officialname AS collectedfromname, + 'providedBy@@@dnet:datasources_organizations_typologies' AS semantics, + d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction FROM dsm_datasource_organization dor - LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id) + LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id) LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index ff1178c711..43b0f8f4b1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -7,36 +7,36 @@ SELECT CASE WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1']) THEN - 'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) THEN - 'openaire4.0@@@OpenAIRE 4.0@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN - 'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) THEN - 'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'driver@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) THEN - 'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) THEN - 'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire3.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) THEN - 'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) THEN - 'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'native@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) THEN - 'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'hostedBy@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) THEN - 'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'notCompatible@@@dnet:datasourceCompatibilityLevel' ELSE - 'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'UNKNOWN@@@dnet:datasourceCompatibilityLevel' END AS openairecompatibility, d.websiteurl AS websiteurl, d.logourl AS logourl, @@ -47,7 +47,7 @@ SELECT NULL AS odnumberofitems, NULL AS odnumberofitemsdate, - (SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies') + (SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies') FROM UNNEST( ARRAY( SELECT trim(s) @@ -83,32 +83,9 @@ SELECT ARRAY[]::text[] AS policies, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - d.typology || '@@@' || CASE - WHEN (d.typology = 'crissystem') THEN 'CRIS System' - WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository' - WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator' - WHEN (d.typology = 'infospace') THEN 'Information Space' - WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository' - WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator' - WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal' - WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher' - WHEN (d.typology = 'pubsrepository::mock') THEN 'Other' - WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue' - WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository' - WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator' - WHEN (d.typology = 'entityregistry') THEN 'Registry' - WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure' - WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository' - WHEN (d.typology = 'websource') THEN 'Web Source' - WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database' - WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories' - WHEN (d.typology = 'softwarerepository') THEN 'Software Repository' - WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator' - WHEN (d.typology = 'orprepository') THEN 'Repository' - ELSE 'Other' - END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, - CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal + d.typology||'@@@dnet:datasource_typologies' AS datasourcetype, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, + d.issn || ' @@@ ' || d.eissn || ' @@@ ' || d.lissn AS journal FROM dsm_datasources d diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index d13bd43425..3e5de80711 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -22,13 +22,12 @@ SELECT '' AS inferenceprovenance, d.id AS collectedfromid, d.officialname AS collectedfromname, - o.country || '@@@' || COALESCE(cntr.name,o.country) || '@@@dnet:countries@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, ARRAY[]::text[] AS pid FROM dsm_organizations o LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) - LEFT OUTER JOIN class cntr ON (cntr.code = o.country) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql index 99c8e04b4b..3396f365c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql @@ -11,8 +11,8 @@ SELECT '' AS inferenceprovenance, 'openaire____::openorgs' AS collectedfromid, 'OpenOrgs Database' AS collectedfromname, - o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid FROM organizations o LEFT OUTER JOIN acronyms a ON (a.id = o.id) @@ -40,8 +40,8 @@ SELECT '' AS inferenceprovenance, 'openaire____::openorgs' AS collectedfromid, 'OpenOrgs Database' AS collectedfromname, - o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid FROM other_names n LEFT OUTER JOIN organizations o ON (n.id = o.id) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql index 4c06ca5b9a..13cfca8715 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql @@ -11,8 +11,8 @@ SELECT NULL AS inferenceprovenance, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction + po.semanticclass || '@@@dnet:project_organization_relations' AS semantics, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction FROM project_organization po LEFT OUTER JOIN projects p ON (p.id = po.project) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql index 685b57ab65..db0da83f77 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql @@ -31,17 +31,14 @@ SELECT p.fundedamount AS fundedamount, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - p.contracttype || '@@@' || p.contracttypename || '@@@' || p.contracttypescheme || '@@@' || p.contracttypescheme AS contracttype, - pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, + p.contracttype || '@@@' || p.contracttypescheme AS contracttype, + p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, - array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, + array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects, array_agg(DISTINCT fp.path) AS fundingtree FROM projects p - LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass) - LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme) - LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) @@ -53,9 +50,6 @@ SELECT LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN subjects s ON (s.id = ps.subject) - LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass) - LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme) - GROUP BY p.id, p.code, @@ -85,5 +79,6 @@ SELECT p.fundedamount, dc.id, dc.officialname, - pac.code, pac.name, pas.code, pas.name, - p.contracttype , p.contracttypename, p.contracttypescheme; \ No newline at end of file + p.contracttype, + p.contracttypescheme; + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql index 6cff188756..234bb7c3eb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql @@ -28,18 +28,15 @@ SELECT p.summary AS summary, p.currency AS currency, p.totalcost AS totalcost, - p.fundedamount AS fundedamount, + p.fundedamount AS fundedamount, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - ctc.code || '@@@' || ctc.name || '@@@' || cts.code || '@@@' || cts.name AS contracttype, - pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, + p.contracttypeclass || '@@@' || p.contracttypescheme AS contracttype, + p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, - array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, + array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects, array_agg(DISTINCT fp.path) AS fundingtree FROM projects p - LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass) - LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme) - LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) @@ -51,12 +48,6 @@ SELECT LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN subjects s ON (s.id = ps.subject) - LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass) - LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme) - - LEFT OUTER JOIN class ctc ON (ctc.code = p.contracttypeclass) - LEFT OUTER JOIN scheme cts ON (cts.code = p.contracttypescheme) - GROUP BY p.id, p.code, @@ -85,6 +76,6 @@ SELECT p.totalcost, p.fundedamount, dc.id, - dc.officialname, - pac.code, pac.name, pas.code, pas.name, - ctc.code, ctc.name, cts.code, cts.name; \ No newline at end of file + dc.officialname + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 1bbe57ee83..22fcb36c98 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.lenient; import java.io.IOException; import java.sql.Array; @@ -25,6 +27,8 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Organization; @@ -40,9 +44,22 @@ public class MigrateDbEntitiesApplicationTest { @Mock private ResultSet rs; + @Mock + private VocabularyGroup vocs; + @BeforeEach public void setUp() { - this.app = new MigrateDbEntitiesApplication(); + lenient() + .when(vocs.getTermAsQualifier(anyString(), anyString())) + .thenAnswer( + invocation -> OafMapperUtils + .qualifier( + invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), + invocation.getArgument(0))); + + lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); + + this.app = new MigrateDbEntitiesApplication(vocs); } @Test @@ -61,8 +78,7 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields)); assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields)); - assertEquals( - ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); + assertEquals(ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); } @Test @@ -78,8 +94,7 @@ public class MigrateDbEntitiesApplicationTest { assertValidId(p.getCollectedfrom().get(0).getKey()); assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields)); assertEquals(p.getTitle().getValue(), getValueAsString("title", fields)); - assertEquals( - p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); + assertEquals(p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); } @Test @@ -99,13 +114,10 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields)); assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]); - assertEquals( - o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[1]); - assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[2]); - assertEquals( - o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[3]); - assertEquals( - o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); + assertEquals(o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[0]); + assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[1]); + assertEquals(o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[1]); + assertEquals(o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json index 3a0318ed7b..2baf7c8f17 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json @@ -52,7 +52,7 @@ { "field": "semantics", "type": "not_used", - "value": "providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies" + "value": "providedBy@@@dnet:datasources_organizations_typologies" }, { "field": "provenanceaction", diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index 71e84954f6..f4c5f97ed1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -30,7 +30,7 @@ { "field": "openairecompatibility", "type": "string", - "value": "hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel" + "value": "hostedBy@@@dnet:datasourceCompatibilityLevel" }, { "field": "websiteurl", @@ -219,16 +219,16 @@ { "field": "datasourcetype", "type": "string", - "value": "pubsrepository::journal@@@Journal@@@dnet:datasource_typologies@@@dnet:datasource_typologies" + "value": "pubsrepository::journal@@@dnet:datasource_typologies" }, { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions" }, { "field": "journal", "type": "string", - "value": "2579-5449@@@2597-6540@@@" + "value": "2579-5449 @@@ 2597-6540 @@@ " } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json index f766246bcc..38657a1e1f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json @@ -117,11 +117,11 @@ { "field": "country", "type": "string", - "value": "US@@@US@@@dnet:countries@@@dnet:countries" + "value": "US@@@dnet:countries" }, { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions" } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json index 855e1a4839..4311086e74 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json @@ -62,11 +62,11 @@ { "field": "semantics", "type": "not_used", - "value": "coordinator@@@coordinator@@@dnet:project_organization_relations@@@dnet:project_organization_relations" + "value": "coordinator@@@dnet:project_organization_relations" }, { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions" } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json index 7d6ebffbee..d6109cac1f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json @@ -167,7 +167,7 @@ { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@Harvested@@@dnet:provenanceActions@@@dnet:provenanceActions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenanceActions" }, { "field": "pid", From 1577bd5b8b446e63b825b5a956c0ce7726910fa1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 29 May 2020 16:18:16 +0200 Subject: [PATCH 2/5] added IsLookupUrl to the raw_db workflow parameters --- .../eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml index 05b85a5615..93018e6741 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml @@ -16,6 +16,10 @@ postgresPassword the password postgres + + isLookupUrl + the address of the lookUp service + sparkDriverMemory @@ -88,6 +92,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --islookup${isLookupUrl} @@ -103,6 +108,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --islookup${isLookupUrl} --actionclaims From 54ca8ed6c3c9c34bd39e36af02ca492c8fe25b19 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 29 May 2020 18:17:30 +0200 Subject: [PATCH 3/5] uniformed param name (isLookupUrl), Vocab model classes defined as Serializable --- .../graph/raw/GenerateEntitiesApplication.java | 8 ++++++-- .../graph/raw/MigrateDbEntitiesApplication.java | 17 +++++++++++++---- .../dhp/oa/graph/raw/common/Vocabulary.java | 3 ++- .../oa/graph/raw/common/VocabularyGroup.java | 3 ++- .../dhp/oa/graph/raw/common/VocabularyTerm.java | 4 +++- .../oa/graph/generate_entities_parameters.json | 4 ++-- .../graph/migrate_db_entities_parameters.json | 4 ++-- .../dhp/oa/graph/raw_all/oozie_app/workflow.xml | 7 ++++--- .../dhp/oa/graph/raw_db/oozie_app/workflow.xml | 4 ++-- 9 files changed, 36 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index d6ddc1fddb..8e5ba9cd11 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -63,9 +63,13 @@ public class GenerateEntitiesApplication { log.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String sourcePaths = parser.get("sourcePaths"); - final String targetPath = parser.get("targetPath"); + log.info("sourcePaths: {}", sourcePaths); - final String isLookupUrl = parser.get("islookup"); + final String targetPath = parser.get("targetPath"); + log.info("targetPath: {}", targetPath); + + final String isLookupUrl = parser.get("isLookupUrl"); + log.info("isLookupUrl: {}", isLookupUrl); final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 899ce6b948..d42754fdcc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -47,8 +47,6 @@ import java.util.function.Function; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.DbClient; @@ -71,10 +69,12 @@ import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable { - private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); + private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); public static final String SOURCE_TYPE = "source_type"; public static final String TARGET_TYPE = "target_type"; @@ -95,13 +95,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i parser.parseArgument(args); final String dbUrl = parser.get("postgresUrl"); + log.info("postgresUrl: {}", dbUrl); + final String dbUser = parser.get("postgresUser"); + log.info("postgresUser: {}", dbUser); + final String dbPassword = parser.get("postgresPassword"); - final String isLookupUrl = parser.get("islookup"); + log.info("postgresPassword: xxx"); + + final String isLookupUrl = parser.get("isLookupUrl"); + log.info("isLookupUrl: {}", isLookupUrl); final String hdfsPath = parser.get("hdfsPath"); + log.info("hdfsPath: {}", hdfsPath); final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); + log.info("processClaims: {}", processClaims); try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, dbPassword, isLookupUrl)) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java index 7714f6d90c..c955ee6404 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.oa.graph.raw.common; +import java.io.Serializable; import java.util.HashMap; import java.util.Map; -public class Vocabulary { +public class Vocabulary implements Serializable { private final String id; private final String name; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java index 9991fa4cf4..43ed7f2d9e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.graph.raw.common; import java.io.IOException; +import java.io.Serializable; import java.util.HashMap; import java.util.Map; @@ -14,7 +15,7 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -public class VocabularyGroup { +public class VocabularyGroup implements Serializable { public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java index b3c7859231..1aa1b82539 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.oa.graph.raw.common; -public class VocabularyTerm { +import java.io.Serializable; + +public class VocabularyTerm implements Serializable { private final String id; private final String name; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json index 9e3992bcfb..8342dde959 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json @@ -18,8 +18,8 @@ "paramRequired": true }, { - "paramName": "islookup", - "paramLongName": "islookup", + "paramName": "isu", + "paramLongName": "isLookupUrl", "paramDescription": "the url of the ISLookupService", "paramRequired": true } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json index edecb97b72..4e838561d5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json @@ -30,8 +30,8 @@ "paramRequired": false }, { - "paramName": "islookup", - "paramLongName": "islookup", + "paramName": "isu", + "paramLongName": "isLookupUrl", "paramDescription": "the url of the ISLookupService", "paramRequired": true } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index 5eee923427..c2bea9f8a2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -123,6 +123,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --isLookupUrl${isLookupUrl} --actionclaims @@ -173,7 +174,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} - --islookup${isLookupUrl} + --isLookupUrl${isLookupUrl} @@ -238,7 +239,7 @@ --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims --targetPath${workingDir}/entities_claim - --islookup${isLookupUrl} + --isLookupUrl${isLookupUrl} @@ -285,7 +286,7 @@ --sourcePaths${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records --targetPath${workingDir}/entities - --islookup${isLookupUrl} + --isLookupUrl${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml index 93018e6741..575f9229ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml @@ -92,7 +92,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} - --islookup${isLookupUrl} + --isLookupUrl${isLookupUrl} @@ -108,7 +108,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} - --islookup${isLookupUrl} + --isLookupUrl${isLookupUrl} --actionclaims From 5e23fb3a748cf85e8bad0a9f9155003b987d829a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Sat, 30 May 2020 10:52:56 +0200 Subject: [PATCH 4/5] code formatting --- .../raw/MigrateDbEntitiesApplication.java | 4 +- .../oa/provision/XmlRecordFactoryTest.java | 48 ++++++++++--------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index d42754fdcc..653027039a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -47,6 +47,8 @@ import java.util.function.Function; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.DbClient; @@ -69,8 +71,6 @@ import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index f485ea6804..885c7c425b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -1,9 +1,11 @@ + package eu.dnetlib.dhp.oa.provision; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; -import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import static org.junit.jupiter.api.Assertions.*; + +import java.io.IOException; +import java.io.StringReader; + import org.apache.commons.io.IOUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -11,37 +13,39 @@ import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.io.StringReader; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; +import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; public class XmlRecordFactoryTest { - private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; + private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; - @Test - public void testXMLRecordFactory() throws IOException, DocumentException { + @Test + public void testXMLRecordFactory() throws IOException, DocumentException { - String json = IOUtils.toString(getClass().getResourceAsStream("joined_entity.json")); + String json = IOUtils.toString(getClass().getResourceAsStream("joined_entity.json")); - assertNotNull(json); - JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class); - assertNotNull(je); + assertNotNull(json); + JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class); + assertNotNull(je); - ContextMapper contextMapper = new ContextMapper(); + ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, otherDsTypeId); + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + otherDsTypeId); - String xml = xmlRecordFactory.build(je); + String xml = xmlRecordFactory.build(je); - assertNotNull(xml); + assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + Document doc = new SAXReader().read(new StringReader(xml)); - assertNotNull(doc); + assertNotNull(doc); - System.out.println(doc.asXML()); + System.out.println(doc.asXML()); - } + } } From 97177d7f7b67f03a40c19939f123ec126b670d91 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Thu, 4 Jun 2020 10:26:34 +0200 Subject: [PATCH 5/5] partial refactoring --- .../broker/oa/GenerateEventsApplication.java | 54 ++++++++++++------- .../dhp/broker/oa/util/BrokerConstants.java | 16 +++++- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index d5e577972d..045104720c 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -57,7 +57,6 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -87,25 +86,32 @@ public class GenerateEventsApplication { private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = + new EnrichMissingPublicationIsReferencedBy(); private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = + new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = + new EnrichMissingPublicationIsSupplementedBy(); - private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); - private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = + new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = + new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = + new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = + new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = + new EnrichMissingDatasetIsSupplementedBy(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString(GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -128,10 +134,13 @@ public class GenerateEventsApplication { final JavaRDD eventsRdd = sc.emptyRDD(); - eventsRdd.union(generateSimpleEvents(spark, graphPath, Publication.class)); - eventsRdd.union(generateSimpleEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class)); - eventsRdd.union(generateSimpleEvents(spark, graphPath, Software.class)); - eventsRdd.union(generateSimpleEvents(spark, graphPath, OtherResearchProduct.class)); + for (final Class r1 : BrokerConstants.RESULT_CLASSES) { + eventsRdd.union(generateSimpleEvents(spark, graphPath, r1)); + + for (final Class r2 : BrokerConstants.RESULT_CLASSES) { + eventsRdd.union(generateRelationEvents(spark, graphPath, r1, r2)); + } + } eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class); }); @@ -146,9 +155,8 @@ public class GenerateEventsApplication { final String graphPath, final Class resultClazz) { - final Dataset results = readPath( - spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + .filter(r -> r.getDataInfo().getDeletedbyinference()); final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); @@ -169,6 +177,14 @@ public class GenerateEventsApplication { } + private static JavaRDD generateRelationEvents(final SparkSession spark, + final String graphPath, + final Class sourceClass, + final Class targetClass) { + // TODO Auto-generated method stub + return null; + } + private List generateSimpleEvents(final Collection children) { final List> list = new ArrayList<>(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 8e97192bfb..06a46b7dab 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -1,9 +1,21 @@ package eu.dnetlib.dhp.broker.oa.util; +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + public class BrokerConstants { - public final static String OPEN_ACCESS = "OPEN"; - public final static String IS_MERGED_IN_CLASS = "isMergedIn"; + public static final String OPEN_ACCESS = "OPEN"; + public static final String IS_MERGED_IN_CLASS = "isMergedIn"; + + public static final List> RESULT_CLASSES = + Arrays.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class); }