diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java index d5e577972..045104720 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/GenerateEventsApplication.java @@ -57,7 +57,6 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -87,25 +86,32 @@ public class GenerateEventsApplication { private static final UpdateMatcher>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); private static final UpdateMatcher>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsReferencedBy = + new EnrichMissingPublicationIsReferencedBy(); private static final UpdateMatcher>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedTo = + new EnrichMissingPublicationIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingPublicationIsSupplementedBy = + new EnrichMissingPublicationIsSupplementedBy(); - private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); - private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); - private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); + private static final UpdateMatcher>, ?> enrichMisissingDatasetIsRelatedTo = + new EnrichMissingDatasetIsRelatedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsReferencedBy = + new EnrichMissingDatasetIsReferencedBy(); + private static final UpdateMatcher>, ?> enrichMissingDatasetReferences = + new EnrichMissingDatasetReferences(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedTo = + new EnrichMissingDatasetIsSupplementedTo(); + private static final UpdateMatcher>, ?> enrichMissingDatasetIsSupplementedBy = + new EnrichMissingDatasetIsSupplementedBy(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils - .toString( - GenerateEventsApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + .toString(GenerateEventsApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); parser.parseArgument(args); final Boolean isSparkSessionManaged = Optional @@ -128,10 +134,13 @@ public class GenerateEventsApplication { final JavaRDD eventsRdd = sc.emptyRDD(); - eventsRdd.union(generateSimpleEvents(spark, graphPath, Publication.class)); - eventsRdd.union(generateSimpleEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class)); - eventsRdd.union(generateSimpleEvents(spark, graphPath, Software.class)); - eventsRdd.union(generateSimpleEvents(spark, graphPath, OtherResearchProduct.class)); + for (final Class r1 : BrokerConstants.RESULT_CLASSES) { + eventsRdd.union(generateSimpleEvents(spark, graphPath, r1)); + + for (final Class r2 : BrokerConstants.RESULT_CLASSES) { + eventsRdd.union(generateRelationEvents(spark, graphPath, r1, r2)); + } + } eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class); }); @@ -146,9 +155,8 @@ public class GenerateEventsApplication { final String graphPath, final Class resultClazz) { - final Dataset results = readPath( - spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) - .filter(r -> r.getDataInfo().getDeletedbyinference()); + final Dataset results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) + .filter(r -> r.getDataInfo().getDeletedbyinference()); final Dataset rels = readPath(spark, graphPath + "/relation", Relation.class) .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); @@ -169,6 +177,14 @@ public class GenerateEventsApplication { } + private static JavaRDD generateRelationEvents(final SparkSession spark, + final String graphPath, + final Class sourceClass, + final Class targetClass) { + // TODO Auto-generated method stub + return null; + } + private List generateSimpleEvents(final Collection children) { final List> list = new ArrayList<>(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 8e97192bf..06a46b7da 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -1,9 +1,21 @@ package eu.dnetlib.dhp.broker.oa.util; +import java.util.Arrays; +import java.util.List; + +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; + public class BrokerConstants { - public final static String OPEN_ACCESS = "OPEN"; - public final static String IS_MERGED_IN_CLASS = "isMergedIn"; + public static final String OPEN_ACCESS = "OPEN"; + public static final String IS_MERGED_IN_CLASS = "isMergedIn"; + + public static final List> RESULT_CLASSES = + Arrays.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 3becdec44..8e5ba9cd1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -39,9 +39,6 @@ import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Software; -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import scala.Tuple2; public class GenerateEntitiesApplication { @@ -66,15 +63,15 @@ public class GenerateEntitiesApplication { log.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String sourcePaths = parser.get("sourcePaths"); - final String targetPath = parser.get("targetPath"); + log.info("sourcePaths: {}", sourcePaths); - // final String dbUrl = parser.get("postgresUrl"); - // final String dbUser = parser.get("postgresUser"); - // final String dbPassword = parser.get("postgresPassword"); + final String targetPath = parser.get("targetPath"); + log.info("targetPath: {}", targetPath); final String isLookupUrl = parser.get("isLookupUrl"); + log.info("isLookupUrl: {}", isLookupUrl); - final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc + final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl); final SparkConf conf = new SparkConf(); runWithSparkSession(conf, isSparkSessionManaged, spark -> { @@ -165,35 +162,6 @@ public class GenerateEntitiesApplication { } } - private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { - final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); - - final String xquery = IOUtils - .toString( - GenerateEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); - - final VocabularyGroup vocs = new VocabularyGroup(); - - for (final String s : isLookUpService.quickSearchProfile(xquery)) { - final String[] arr = s.split("@=@"); - if (arr.length == 4) { - final String vocId = arr[0].trim(); - final String vocName = arr[1].trim(); - final String termId = arr[2].trim(); - final String termName = arr[3].trim(); - - if (!vocs.vocabularyExists(vocId)) { - vocs.addVocabulary(vocId, vocName); - } - - vocs.addTerm(vocId, termId, termName); - } - } - - return vocs; - } - private static Oaf convertFromJson(final String s, final Class clazz) { try { return OBJECT_MAPPER.readValue(s, clazz); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index 5b8296c19..653027039 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -10,7 +10,28 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; import java.io.Closeable; import java.io.IOException; @@ -26,12 +47,13 @@ import java.util.function.Function; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -52,7 +74,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable { - private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); + private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); public static final String SOURCE_TYPE = "source_type"; public static final String TARGET_TYPE = "target_type"; @@ -61,6 +83,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i private final long lastUpdateTimestamp; + private final VocabularyGroup vocs; + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -71,15 +95,25 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i parser.parseArgument(args); final String dbUrl = parser.get("postgresUrl"); + log.info("postgresUrl: {}", dbUrl); + final String dbUser = parser.get("postgresUser"); + log.info("postgresUser: {}", dbUser); + final String dbPassword = parser.get("postgresPassword"); + log.info("postgresPassword: xxx"); + + final String isLookupUrl = parser.get("isLookupUrl"); + log.info("isLookupUrl: {}", isLookupUrl); final String hdfsPath = parser.get("hdfsPath"); + log.info("hdfsPath: {}", hdfsPath); final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); + log.info("processClaims: {}", processClaims); try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, - dbPassword)) { + dbPassword, isLookupUrl)) { if (processClaims) { log.info("Processing claims..."); smdbe.execute("queryClaims.sql", smdbe::processClaims); @@ -103,18 +137,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST + protected MigrateDbEntitiesApplication(final VocabularyGroup vocs) { // ONLY FOR UNIT TEST super(); this.dbClient = null; this.lastUpdateTimestamp = new Date().getTime(); + this.vocs = vocs; } public MigrateDbEntitiesApplication( - final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) + final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword, + final String isLookupUrl) throws Exception { super(hdfsPath); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.lastUpdateTimestamp = new Date().getTime(); + this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl); } public void execute(final String sqlFile, final Function> producer) @@ -453,12 +490,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final Boolean inferred = rs.getBoolean("inferred"); final String trust = rs.getString("trust"); return dataInfo( - deletedbyinference, - inferenceprovenance, - inferred, - false, - ENTITYREGISTRY_PROVENANCE_ACTION, - trust); + deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust); } private Qualifier prepareQualifierSplitting(final String s) { @@ -466,7 +498,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return null; } final String[] arr = s.split("@@@"); - return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; + return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null; } private List> prepareListFields(final Array array, final DataInfo info) { @@ -485,8 +517,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i if (parts.length == 2) { final String value = parts[0]; final String[] arr = parts[1].split("@@@"); - if (arr.length == 4) { - return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); + if (arr.length == 2) { + return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo); } } return null; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java index 7714f6d90..c955ee640 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.oa.graph.raw.common; +import java.io.Serializable; import java.util.HashMap; import java.util.Map; -public class Vocabulary { +public class Vocabulary implements Serializable { private final String id; private final String name; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java index 127f73e22..43ed7f2d9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java @@ -1,12 +1,50 @@ package eu.dnetlib.dhp.oa.graph.raw.common; +import java.io.IOException; +import java.io.Serializable; import java.util.HashMap; import java.util.Map; -import eu.dnetlib.dhp.schema.oaf.Qualifier; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; -public class VocabularyGroup { +import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +public class VocabularyGroup implements Serializable { + + public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { + final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); + + final String xquery = IOUtils + .toString( + GenerateEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); + + final VocabularyGroup vocs = new VocabularyGroup(); + + for (final String s : isLookUpService.quickSearchProfile(xquery)) { + final String[] arr = s.split("@=@"); + if (arr.length == 4) { + final String vocId = arr[0].trim(); + final String vocName = arr[1].trim(); + final String termId = arr[2].trim(); + final String termName = arr[3].trim(); + + if (!vocs.vocabularyExists(vocId)) { + vocs.addVocabulary(vocId, vocName); + } + + vocs.addTerm(vocId, termId, termName); + } + } + + return vocs; + } private final Map vocs = new HashMap<>(); @@ -29,7 +67,9 @@ public class VocabularyGroup { } public Qualifier getTermAsQualifier(final String vocId, final String id) { - if (termExists(vocId, id)) { + if (StringUtils.isBlank(id)) { + return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId); + } else if (termExists(vocId, id)) { final Vocabulary v = vocs.get(vocId.toLowerCase()); final VocabularyTerm t = v.getTerm(id); return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java index b3c785923..1aa1b8253 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.oa.graph.raw.common; -public class VocabularyTerm { +import java.io.Serializable; + +public class VocabularyTerm implements Serializable { private final String id; private final String name; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json index 9e3992bcf..8342dde95 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json @@ -18,8 +18,8 @@ "paramRequired": true }, { - "paramName": "islookup", - "paramLongName": "islookup", + "paramName": "isu", + "paramLongName": "isLookupUrl", "paramDescription": "the url of the ISLookupService", "paramRequired": true } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json index cb13ff024..4e838561d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json @@ -28,5 +28,11 @@ "paramLongName": "action", "paramDescription": "process claims", "paramRequired": false + }, + { + "paramName": "isu", + "paramLongName": "isLookupUrl", + "paramDescription": "the url of the ISLookupService", + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index f8426c35f..c2bea9f8a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -123,6 +123,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --isLookupUrl${isLookupUrl} --actionclaims @@ -173,6 +174,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --isLookupUrl${isLookupUrl} @@ -237,7 +239,7 @@ --sourcePaths${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims --targetPath${workingDir}/entities_claim - --islookup${isLookupUrl} + --isLookupUrl${isLookupUrl} @@ -284,7 +286,7 @@ --sourcePaths${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records --targetPath${workingDir}/entities - --islookup${isLookupUrl} + --isLookupUrl${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml index 05b85a561..575f9229e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_db/oozie_app/workflow.xml @@ -16,6 +16,10 @@ postgresPassword the password postgres + + isLookupUrl + the address of the lookUp service + sparkDriverMemory @@ -88,6 +92,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --isLookupUrl${isLookupUrl} @@ -103,6 +108,7 @@ --postgresUrl${postgresURL} --postgresUser${postgresUser} --postgresPassword${postgresPassword} + --isLookupUrl${isLookupUrl} --actionclaims diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml index f16e22f95..868418152 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step1/oozie_app/workflow.xml @@ -24,6 +24,10 @@ mongoDb mongo database + + isLookupUrl + the address of the lookUp service + sparkDriverMemory memory for driver process @@ -62,6 +66,7 @@ -pgurl${postgresURL} -pguser${postgresUser} -pgpasswd${postgresPassword} + -islookup${isLookupUrl} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql index 745f83971..687377aa4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasourceOrganization.sql @@ -1,17 +1,16 @@ SELECT - dor.datasource AS datasource, - dor.organization AS organization, - NULL AS startdate, - NULL AS enddate, - false AS inferred, - false AS deletedbyinference, - 0.9 AS trust, - NULL AS inferenceprovenance, - dc.id AS collectedfromid, - dc.officialname AS collectedfromname, - 'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics, - d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction - + dor.datasource AS datasource, + dor.organization AS organization, + NULL AS startdate, + NULL AS enddate, + false AS inferred, + false AS deletedbyinference, + 0.9 AS trust, + NULL AS inferenceprovenance, + dc.id AS collectedfromid, + dc.officialname AS collectedfromname, + 'providedBy@@@dnet:datasources_organizations_typologies' AS semantics, + d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction FROM dsm_datasource_organization dor - LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id) + LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id) LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index ff1178c71..43b0f8f4b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -7,36 +7,36 @@ SELECT CASE WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1']) THEN - 'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) THEN - 'openaire4.0@@@OpenAIRE 4.0@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN - 'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) THEN - 'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'driver@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) THEN - 'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) THEN - 'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire3.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) THEN - 'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) THEN - 'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'native@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) THEN - 'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'hostedBy@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) THEN - 'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'notCompatible@@@dnet:datasourceCompatibilityLevel' ELSE - 'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' + 'UNKNOWN@@@dnet:datasourceCompatibilityLevel' END AS openairecompatibility, d.websiteurl AS websiteurl, d.logourl AS logourl, @@ -47,7 +47,7 @@ SELECT NULL AS odnumberofitems, NULL AS odnumberofitemsdate, - (SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies') + (SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies') FROM UNNEST( ARRAY( SELECT trim(s) @@ -83,32 +83,9 @@ SELECT ARRAY[]::text[] AS policies, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - d.typology || '@@@' || CASE - WHEN (d.typology = 'crissystem') THEN 'CRIS System' - WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository' - WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator' - WHEN (d.typology = 'infospace') THEN 'Information Space' - WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository' - WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator' - WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal' - WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher' - WHEN (d.typology = 'pubsrepository::mock') THEN 'Other' - WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue' - WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository' - WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator' - WHEN (d.typology = 'entityregistry') THEN 'Registry' - WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure' - WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository' - WHEN (d.typology = 'websource') THEN 'Web Source' - WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database' - WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories' - WHEN (d.typology = 'softwarerepository') THEN 'Software Repository' - WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator' - WHEN (d.typology = 'orprepository') THEN 'Repository' - ELSE 'Other' - END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, - CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal + d.typology||'@@@dnet:datasource_typologies' AS datasourcetype, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, + d.issn || ' @@@ ' || d.eissn || ' @@@ ' || d.lissn AS journal FROM dsm_datasources d diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql index d13bd4342..3e5de8071 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizations.sql @@ -22,13 +22,12 @@ SELECT '' AS inferenceprovenance, d.id AS collectedfromid, d.officialname AS collectedfromname, - o.country || '@@@' || COALESCE(cntr.name,o.country) || '@@@dnet:countries@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, ARRAY[]::text[] AS pid FROM dsm_organizations o LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) - LEFT OUTER JOIN class cntr ON (cntr.code = o.country) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql index 99c8e04b4..3396f365c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql @@ -11,8 +11,8 @@ SELECT '' AS inferenceprovenance, 'openaire____::openorgs' AS collectedfromid, 'OpenOrgs Database' AS collectedfromname, - o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid FROM organizations o LEFT OUTER JOIN acronyms a ON (a.id = o.id) @@ -40,8 +40,8 @@ SELECT '' AS inferenceprovenance, 'openaire____::openorgs' AS collectedfromid, 'OpenOrgs Database' AS collectedfromname, - o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, + o.country || '@@@dnet:countries' AS country, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid FROM other_names n LEFT OUTER JOIN organizations o ON (n.id = o.id) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql index 4c06ca5b9..13cfca871 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjectOrganization.sql @@ -11,8 +11,8 @@ SELECT NULL AS inferenceprovenance, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics, - 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction + po.semanticclass || '@@@dnet:project_organization_relations' AS semantics, + 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction FROM project_organization po LEFT OUTER JOIN projects p ON (p.id = po.project) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql index 685b57ab6..db0da83f7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects.sql @@ -31,17 +31,14 @@ SELECT p.fundedamount AS fundedamount, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - p.contracttype || '@@@' || p.contracttypename || '@@@' || p.contracttypescheme || '@@@' || p.contracttypescheme AS contracttype, - pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, + p.contracttype || '@@@' || p.contracttypescheme AS contracttype, + p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, - array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, + array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects, array_agg(DISTINCT fp.path) AS fundingtree FROM projects p - LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass) - LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme) - LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) @@ -53,9 +50,6 @@ SELECT LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN subjects s ON (s.id = ps.subject) - LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass) - LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme) - GROUP BY p.id, p.code, @@ -85,5 +79,6 @@ SELECT p.fundedamount, dc.id, dc.officialname, - pac.code, pac.name, pas.code, pas.name, - p.contracttype , p.contracttypename, p.contracttypescheme; \ No newline at end of file + p.contracttype, + p.contracttypescheme; + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql index 6cff18875..234bb7c3e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryProjects_production.sql @@ -28,18 +28,15 @@ SELECT p.summary AS summary, p.currency AS currency, p.totalcost AS totalcost, - p.fundedamount AS fundedamount, + p.fundedamount AS fundedamount, dc.id AS collectedfromid, dc.officialname AS collectedfromname, - ctc.code || '@@@' || ctc.name || '@@@' || cts.code || '@@@' || cts.name AS contracttype, - pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, + p.contracttypeclass || '@@@' || p.contracttypescheme AS contracttype, + p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, - array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, + array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects, array_agg(DISTINCT fp.path) AS fundingtree FROM projects p - LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass) - LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme) - LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) @@ -51,12 +48,6 @@ SELECT LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN subjects s ON (s.id = ps.subject) - LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass) - LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme) - - LEFT OUTER JOIN class ctc ON (ctc.code = p.contracttypeclass) - LEFT OUTER JOIN scheme cts ON (cts.code = p.contracttypescheme) - GROUP BY p.id, p.code, @@ -85,6 +76,6 @@ SELECT p.totalcost, p.fundedamount, dc.id, - dc.officialname, - pac.code, pac.name, pas.code, pas.name, - ctc.code, ctc.name, cts.code, cts.name; \ No newline at end of file + dc.officialname + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 1bbe57ee8..22fcb36c9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.lenient; import java.io.IOException; import java.sql.Array; @@ -25,6 +27,8 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils; +import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Organization; @@ -40,9 +44,22 @@ public class MigrateDbEntitiesApplicationTest { @Mock private ResultSet rs; + @Mock + private VocabularyGroup vocs; + @BeforeEach public void setUp() { - this.app = new MigrateDbEntitiesApplication(); + lenient() + .when(vocs.getTermAsQualifier(anyString(), anyString())) + .thenAnswer( + invocation -> OafMapperUtils + .qualifier( + invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), + invocation.getArgument(0))); + + lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); + + this.app = new MigrateDbEntitiesApplication(vocs); } @Test @@ -61,8 +78,7 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields)); assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields)); - assertEquals( - ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); + assertEquals(ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); } @Test @@ -78,8 +94,7 @@ public class MigrateDbEntitiesApplicationTest { assertValidId(p.getCollectedfrom().get(0).getKey()); assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields)); assertEquals(p.getTitle().getValue(), getValueAsString("title", fields)); - assertEquals( - p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); + assertEquals(p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); } @Test @@ -99,13 +114,10 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields)); assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]); - assertEquals( - o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[1]); - assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[2]); - assertEquals( - o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[3]); - assertEquals( - o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); + assertEquals(o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[0]); + assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[1]); + assertEquals(o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[1]); + assertEquals(o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields)); } @Test diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json index 3a0318ed7..2baf7c8f1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasourceorganization_resultset_entry.json @@ -52,7 +52,7 @@ { "field": "semantics", "type": "not_used", - "value": "providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies" + "value": "providedBy@@@dnet:datasources_organizations_typologies" }, { "field": "provenanceaction", diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index 71e84954f..f4c5f97ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -30,7 +30,7 @@ { "field": "openairecompatibility", "type": "string", - "value": "hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel" + "value": "hostedBy@@@dnet:datasourceCompatibilityLevel" }, { "field": "websiteurl", @@ -219,16 +219,16 @@ { "field": "datasourcetype", "type": "string", - "value": "pubsrepository::journal@@@Journal@@@dnet:datasource_typologies@@@dnet:datasource_typologies" + "value": "pubsrepository::journal@@@dnet:datasource_typologies" }, { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions" }, { "field": "journal", "type": "string", - "value": "2579-5449@@@2597-6540@@@" + "value": "2579-5449 @@@ 2597-6540 @@@ " } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json index f766246bc..38657a1e1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/organizations_resultset_entry.json @@ -117,11 +117,11 @@ { "field": "country", "type": "string", - "value": "US@@@US@@@dnet:countries@@@dnet:countries" + "value": "US@@@dnet:countries" }, { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions" } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json index 855e1a483..4311086e7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projectorganization_resultset_entry.json @@ -62,11 +62,11 @@ { "field": "semantics", "type": "not_used", - "value": "coordinator@@@coordinator@@@dnet:project_organization_relations@@@dnet:project_organization_relations" + "value": "coordinator@@@dnet:project_organization_relations" }, { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions" } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json index 7d6ebffbe..d6109cac1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/projects_resultset_entry.json @@ -167,7 +167,7 @@ { "field": "provenanceaction", "type": "not_used", - "value": "sysimport:crosswalk:entityregistry@@@Harvested@@@dnet:provenanceActions@@@dnet:provenanceActions" + "value": "sysimport:crosswalk:entityregistry@@@dnet:provenanceActions" }, { "field": "pid",