enrichment steps #38

Merged
claudio.atzori merged 334 commits from miriam.baglioni/dnet-hadoop:master into enrichment_wfs 2020-08-11 16:40:26 +02:00
25 changed files with 256 additions and 193 deletions
Showing only changes of commit b57e8ba374 - Show all commits

View File

@ -57,7 +57,6 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
@ -87,25 +86,32 @@ public class GenerateEventsApplication {
private static final UpdateMatcher<Pair<Result, List<Software>>, ?> enrichMoreSoftware = new EnrichMoreSoftware(); private static final UpdateMatcher<Pair<Result, List<Software>>, ?> enrichMoreSoftware = new EnrichMoreSoftware();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo(); private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy(); private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsReferencedBy =
new EnrichMissingPublicationIsReferencedBy();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences(); private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo(); private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedTo =
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy(); new EnrichMissingPublicationIsSupplementedTo();
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedBy =
new EnrichMissingPublicationIsSupplementedBy();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo(); private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMisissingDatasetIsRelatedTo =
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy(); new EnrichMissingDatasetIsRelatedTo();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences(); private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsReferencedBy =
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo(); new EnrichMissingDatasetIsReferencedBy();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy(); private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetReferences =
new EnrichMissingDatasetReferences();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedTo =
new EnrichMissingDatasetIsSupplementedTo();
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedBy =
new EnrichMissingDatasetIsSupplementedBy();
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(GenerateEventsApplication.class
GenerateEventsApplication.class .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
parser.parseArgument(args); parser.parseArgument(args);
final Boolean isSparkSessionManaged = Optional final Boolean isSparkSessionManaged = Optional
@ -128,10 +134,13 @@ public class GenerateEventsApplication {
final JavaRDD<Event> eventsRdd = sc.emptyRDD(); final JavaRDD<Event> eventsRdd = sc.emptyRDD();
eventsRdd.union(generateSimpleEvents(spark, graphPath, Publication.class)); for (final Class<? extends Result> r1 : BrokerConstants.RESULT_CLASSES) {
eventsRdd.union(generateSimpleEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class)); eventsRdd.union(generateSimpleEvents(spark, graphPath, r1));
eventsRdd.union(generateSimpleEvents(spark, graphPath, Software.class));
eventsRdd.union(generateSimpleEvents(spark, graphPath, OtherResearchProduct.class)); for (final Class<? extends Result> r2 : BrokerConstants.RESULT_CLASSES) {
eventsRdd.union(generateRelationEvents(spark, graphPath, r1, r2));
}
}
eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class); eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class);
}); });
@ -146,9 +155,8 @@ public class GenerateEventsApplication {
final String graphPath, final String graphPath,
final Class<R> resultClazz) { final Class<R> resultClazz) {
final Dataset<R> results = readPath( final Dataset<R> results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz)
spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz) .filter(r -> r.getDataInfo().getDeletedbyinference());
.filter(r -> r.getDataInfo().getDeletedbyinference());
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class) final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS)); .filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
@ -169,6 +177,14 @@ public class GenerateEventsApplication {
} }
private static <SRC extends Result, TRG extends Result> JavaRDD<Event> generateRelationEvents(final SparkSession spark,
final String graphPath,
final Class<SRC> sourceClass,
final Class<TRG> targetClass) {
// TODO Auto-generated method stub
return null;
}
private List<Event> generateSimpleEvents(final Collection<Result> children) { private List<Event> generateSimpleEvents(final Collection<Result> children) {
final List<UpdateInfo<?>> list = new ArrayList<>(); final List<UpdateInfo<?>> list = new ArrayList<>();

View File

@ -1,9 +1,21 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import java.util.Arrays;
import java.util.List;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
public class BrokerConstants { public class BrokerConstants {
public final static String OPEN_ACCESS = "OPEN"; public static final String OPEN_ACCESS = "OPEN";
public final static String IS_MERGED_IN_CLASS = "isMergedIn"; public static final String IS_MERGED_IN_CLASS = "isMergedIn";
public static final List<Class<? extends Result>> RESULT_CLASSES =
Arrays.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class);
} }

View File

@ -39,9 +39,6 @@ import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
public class GenerateEntitiesApplication { public class GenerateEntitiesApplication {
@ -66,15 +63,15 @@ public class GenerateEntitiesApplication {
log.info("isSparkSessionManaged: {}", isSparkSessionManaged); log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String sourcePaths = parser.get("sourcePaths"); final String sourcePaths = parser.get("sourcePaths");
final String targetPath = parser.get("targetPath"); log.info("sourcePaths: {}", sourcePaths);
// final String dbUrl = parser.get("postgresUrl"); final String targetPath = parser.get("targetPath");
// final String dbUser = parser.get("postgresUser"); log.info("targetPath: {}", targetPath);
// final String dbPassword = parser.get("postgresPassword");
final String isLookupUrl = parser.get("isLookupUrl"); final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
runWithSparkSession(conf, isSparkSessionManaged, spark -> { runWithSparkSession(conf, isSparkSessionManaged, spark -> {
@ -165,35 +162,6 @@ public class GenerateEntitiesApplication {
} }
} }
private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final String xquery = IOUtils
.toString(
GenerateEntitiesApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
final VocabularyGroup vocs = new VocabularyGroup();
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
final String[] arr = s.split("@=@");
if (arr.length == 4) {
final String vocId = arr[0].trim();
final String vocName = arr[1].trim();
final String termId = arr[2].trim();
final String termName = arr[3].trim();
if (!vocs.vocabularyExists(vocId)) {
vocs.addVocabulary(vocId, vocName);
}
vocs.addTerm(vocId, termId, termName);
}
}
return vocs;
}
private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) { private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) {
try { try {
return OBJECT_MAPPER.readValue(s, clazz); return OBJECT_MAPPER.readValue(s, clazz);

View File

@ -10,7 +10,28 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
@ -26,12 +47,13 @@ import java.util.function.Function;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.slf4j.Logger;
import org.apache.commons.logging.LogFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
@ -52,7 +74,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable { public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class); private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class);
public static final String SOURCE_TYPE = "source_type"; public static final String SOURCE_TYPE = "source_type";
public static final String TARGET_TYPE = "target_type"; public static final String TARGET_TYPE = "target_type";
@ -61,6 +83,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
private final long lastUpdateTimestamp; private final long lastUpdateTimestamp;
private final VocabularyGroup vocs;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -71,15 +95,25 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
parser.parseArgument(args); parser.parseArgument(args);
final String dbUrl = parser.get("postgresUrl"); final String dbUrl = parser.get("postgresUrl");
log.info("postgresUrl: {}", dbUrl);
final String dbUser = parser.get("postgresUser"); final String dbUser = parser.get("postgresUser");
log.info("postgresUser: {}", dbUser);
final String dbPassword = parser.get("postgresPassword"); final String dbPassword = parser.get("postgresPassword");
log.info("postgresPassword: xxx");
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
final String hdfsPath = parser.get("hdfsPath"); final String hdfsPath = parser.get("hdfsPath");
log.info("hdfsPath: {}", hdfsPath);
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
log.info("processClaims: {}", processClaims);
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
dbPassword)) { dbPassword, isLookupUrl)) {
if (processClaims) { if (processClaims) {
log.info("Processing claims..."); log.info("Processing claims...");
smdbe.execute("queryClaims.sql", smdbe::processClaims); smdbe.execute("queryClaims.sql", smdbe::processClaims);
@ -103,18 +137,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
} }
} }
protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST protected MigrateDbEntitiesApplication(final VocabularyGroup vocs) { // ONLY FOR UNIT TEST
super(); super();
this.dbClient = null; this.dbClient = null;
this.lastUpdateTimestamp = new Date().getTime(); this.lastUpdateTimestamp = new Date().getTime();
this.vocs = vocs;
} }
public MigrateDbEntitiesApplication( public MigrateDbEntitiesApplication(
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword,
final String isLookupUrl)
throws Exception { throws Exception {
super(hdfsPath); super(hdfsPath);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.lastUpdateTimestamp = new Date().getTime(); this.lastUpdateTimestamp = new Date().getTime();
this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
} }
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer) public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
@ -453,12 +490,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final Boolean inferred = rs.getBoolean("inferred"); final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust"); final String trust = rs.getString("trust");
return dataInfo( return dataInfo(
deletedbyinference, deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
inferenceprovenance,
inferred,
false,
ENTITYREGISTRY_PROVENANCE_ACTION,
trust);
} }
private Qualifier prepareQualifierSplitting(final String s) { private Qualifier prepareQualifierSplitting(final String s) {
@ -466,7 +498,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return null; return null;
} }
final String[] arr = s.split("@@@"); final String[] arr = s.split("@@@");
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null;
} }
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) { private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
@ -485,8 +517,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
if (parts.length == 2) { if (parts.length == 2) {
final String value = parts[0]; final String value = parts[0];
final String[] arr = parts[1].split("@@@"); final String[] arr = parts[1].split("@@@");
if (arr.length == 4) { if (arr.length == 2) {
return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo);
} }
} }
return null; return null;

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.oa.graph.raw.common; package eu.dnetlib.dhp.oa.graph.raw.common;
import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
public class Vocabulary { public class Vocabulary implements Serializable {
private final String id; private final String id;
private final String name; private final String name;

View File

@ -1,12 +1,50 @@
package eu.dnetlib.dhp.oa.graph.raw.common; package eu.dnetlib.dhp.oa.graph.raw.common;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
public class VocabularyGroup { import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class VocabularyGroup implements Serializable {
public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final String xquery = IOUtils
.toString(
GenerateEntitiesApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
final VocabularyGroup vocs = new VocabularyGroup();
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
final String[] arr = s.split("@=@");
if (arr.length == 4) {
final String vocId = arr[0].trim();
final String vocName = arr[1].trim();
final String termId = arr[2].trim();
final String termName = arr[3].trim();
if (!vocs.vocabularyExists(vocId)) {
vocs.addVocabulary(vocId, vocName);
}
vocs.addTerm(vocId, termId, termName);
}
}
return vocs;
}
private final Map<String, Vocabulary> vocs = new HashMap<>(); private final Map<String, Vocabulary> vocs = new HashMap<>();
@ -29,7 +67,9 @@ public class VocabularyGroup {
} }
public Qualifier getTermAsQualifier(final String vocId, final String id) { public Qualifier getTermAsQualifier(final String vocId, final String id) {
if (termExists(vocId, id)) { if (StringUtils.isBlank(id)) {
return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId);
} else if (termExists(vocId, id)) {
final Vocabulary v = vocs.get(vocId.toLowerCase()); final Vocabulary v = vocs.get(vocId.toLowerCase());
final VocabularyTerm t = v.getTerm(id); final VocabularyTerm t = v.getTerm(id);
return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName()); return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());

View File

@ -1,7 +1,9 @@
package eu.dnetlib.dhp.oa.graph.raw.common; package eu.dnetlib.dhp.oa.graph.raw.common;
public class VocabularyTerm { import java.io.Serializable;
public class VocabularyTerm implements Serializable {
private final String id; private final String id;
private final String name; private final String name;

View File

@ -18,8 +18,8 @@
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "islookup", "paramName": "isu",
"paramLongName": "islookup", "paramLongName": "isLookupUrl",
"paramDescription": "the url of the ISLookupService", "paramDescription": "the url of the ISLookupService",
"paramRequired": true "paramRequired": true
} }

View File

@ -28,5 +28,11 @@
"paramLongName": "action", "paramLongName": "action",
"paramDescription": "process claims", "paramDescription": "process claims",
"paramRequired": false "paramRequired": false
},
{
"paramName": "isu",
"paramLongName": "isLookupUrl",
"paramDescription": "the url of the ISLookupService",
"paramRequired": true
} }
] ]

View File

@ -123,6 +123,7 @@
<arg>--postgresUrl</arg><arg>${postgresURL}</arg> <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg> <arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg> <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--action</arg><arg>claims</arg> <arg>--action</arg><arg>claims</arg>
</java> </java>
<ok to="ImportODF_claims"/> <ok to="ImportODF_claims"/>
@ -173,6 +174,7 @@
<arg>--postgresUrl</arg><arg>${postgresURL}</arg> <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg> <arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg> <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</java> </java>
<ok to="ImportODF"/> <ok to="ImportODF"/>
<error to="Kill"/> <error to="Kill"/>
@ -237,7 +239,7 @@
</spark-opts> </spark-opts>
<arg>--sourcePaths</arg><arg>${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims</arg> <arg>--sourcePaths</arg><arg>${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims</arg>
<arg>--targetPath</arg><arg>${workingDir}/entities_claim</arg> <arg>--targetPath</arg><arg>${workingDir}/entities_claim</arg>
<arg>--islookup</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="GenerateGraph_claims"/> <ok to="GenerateGraph_claims"/>
<error to="Kill"/> <error to="Kill"/>
@ -284,7 +286,7 @@
</spark-opts> </spark-opts>
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records</arg> <arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records</arg>
<arg>--targetPath</arg><arg>${workingDir}/entities</arg> <arg>--targetPath</arg><arg>${workingDir}/entities</arg>
<arg>--islookup</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="GenerateGraph"/> <ok to="GenerateGraph"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -16,6 +16,10 @@
<name>postgresPassword</name> <name>postgresPassword</name>
<description>the password postgres</description> <description>the password postgres</description>
</property> </property>
<property>
<name>isLookupUrl</name>
<description>the address of the lookUp service</description>
</property>
<property> <property>
<name>sparkDriverMemory</name> <name>sparkDriverMemory</name>
@ -88,6 +92,7 @@
<arg>--postgresUrl</arg><arg>${postgresURL}</arg> <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg> <arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg> <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</java> </java>
<ok to="ImportDB_claims"/> <ok to="ImportDB_claims"/>
<error to="Kill"/> <error to="Kill"/>
@ -103,6 +108,7 @@
<arg>--postgresUrl</arg><arg>${postgresURL}</arg> <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg> <arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg> <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--action</arg><arg>claims</arg> <arg>--action</arg><arg>claims</arg>
</java> </java>
<ok to="End"/> <ok to="End"/>

View File

@ -24,6 +24,10 @@
<name>mongoDb</name> <name>mongoDb</name>
<description>mongo database</description> <description>mongo database</description>
</property> </property>
<property>
<name>isLookupUrl</name>
<description>the address of the lookUp service</description>
</property>
<property> <property>
<name>sparkDriverMemory</name> <name>sparkDriverMemory</name>
<description>memory for driver process</description> <description>memory for driver process</description>
@ -62,6 +66,7 @@
<arg>-pgurl</arg><arg>${postgresURL}</arg> <arg>-pgurl</arg><arg>${postgresURL}</arg>
<arg>-pguser</arg><arg>${postgresUser}</arg> <arg>-pguser</arg><arg>${postgresUser}</arg>
<arg>-pgpasswd</arg><arg>${postgresPassword}</arg> <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
<arg>-islookup</arg><arg>${isLookupUrl}</arg>
</java> </java>
<ok to="ImportODF"/> <ok to="ImportODF"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -1,17 +1,16 @@
SELECT SELECT
dor.datasource AS datasource, dor.datasource AS datasource,
dor.organization AS organization, dor.organization AS organization,
NULL AS startdate, NULL AS startdate,
NULL AS enddate, NULL AS enddate,
false AS inferred, false AS inferred,
false AS deletedbyinference, false AS deletedbyinference,
0.9 AS trust, 0.9 AS trust,
NULL AS inferenceprovenance, NULL AS inferenceprovenance,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics, 'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction
FROM dsm_datasource_organization dor FROM dsm_datasource_organization dor
LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id) LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id)
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom) LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom)

View File

@ -7,36 +7,36 @@ SELECT
CASE CASE
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1'])
THEN THEN
'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
THEN THEN
'openaire4.0@@@OpenAIRE 4.0@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire4.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
THEN THEN
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver'])
THEN THEN
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'driver@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0'])
THEN THEN
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire2.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0'])
THEN THEN
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire3.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data'])
THEN THEN
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native'])
THEN THEN
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'native@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy'])
THEN THEN
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'hostedBy@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible'])
THEN THEN
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'notCompatible@@@dnet:datasourceCompatibilityLevel'
ELSE ELSE
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'UNKNOWN@@@dnet:datasourceCompatibilityLevel'
END AS openairecompatibility, END AS openairecompatibility,
d.websiteurl AS websiteurl, d.websiteurl AS websiteurl,
d.logourl AS logourl, d.logourl AS logourl,
@ -47,7 +47,7 @@ SELECT
NULL AS odnumberofitems, NULL AS odnumberofitems,
NULL AS odnumberofitemsdate, NULL AS odnumberofitemsdate,
(SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies') (SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
FROM UNNEST( FROM UNNEST(
ARRAY( ARRAY(
SELECT trim(s) SELECT trim(s)
@ -83,32 +83,9 @@ SELECT
ARRAY[]::text[] AS policies, ARRAY[]::text[] AS policies,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
d.typology || '@@@' || CASE d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
WHEN (d.typology = 'crissystem') THEN 'CRIS System' 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository' d.issn || ' @@@ ' || d.eissn || ' @@@ ' || d.lissn AS journal
WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator'
WHEN (d.typology = 'infospace') THEN 'Information Space'
WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository'
WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator'
WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal'
WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher'
WHEN (d.typology = 'pubsrepository::mock') THEN 'Other'
WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue'
WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository'
WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator'
WHEN (d.typology = 'entityregistry') THEN 'Registry'
WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure'
WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository'
WHEN (d.typology = 'websource') THEN 'Web Source'
WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database'
WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories'
WHEN (d.typology = 'softwarerepository') THEN 'Software Repository'
WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator'
WHEN (d.typology = 'orprepository') THEN 'Repository'
ELSE 'Other'
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal
FROM dsm_datasources d FROM dsm_datasources d

View File

@ -22,13 +22,12 @@ SELECT
'' AS inferenceprovenance, '' AS inferenceprovenance,
d.id AS collectedfromid, d.id AS collectedfromid,
d.officialname AS collectedfromname, d.officialname AS collectedfromname,
o.country || '@@@' || COALESCE(cntr.name,o.country) || '@@@dnet:countries@@@dnet:countries' AS country, o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
ARRAY[]::text[] AS pid ARRAY[]::text[] AS pid
FROM dsm_organizations o FROM dsm_organizations o
LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
LEFT OUTER JOIN class cntr ON (cntr.code = o.country)

View File

@ -11,8 +11,8 @@ SELECT
'' AS inferenceprovenance, '' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid, 'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname, 'OpenOrgs Database' AS collectedfromname,
o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
FROM organizations o FROM organizations o
LEFT OUTER JOIN acronyms a ON (a.id = o.id) LEFT OUTER JOIN acronyms a ON (a.id = o.id)
@ -40,8 +40,8 @@ SELECT
'' AS inferenceprovenance, '' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid, 'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname, 'OpenOrgs Database' AS collectedfromname,
o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
FROM other_names n FROM other_names n
LEFT OUTER JOIN organizations o ON (n.id = o.id) LEFT OUTER JOIN organizations o ON (n.id = o.id)

View File

@ -11,8 +11,8 @@ SELECT
NULL AS inferenceprovenance, NULL AS inferenceprovenance,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics, po.semanticclass || '@@@dnet:project_organization_relations' AS semantics,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction
FROM project_organization po FROM project_organization po
LEFT OUTER JOIN projects p ON (p.id = po.project) LEFT OUTER JOIN projects p ON (p.id = po.project)

View File

@ -31,17 +31,14 @@ SELECT
p.fundedamount AS fundedamount, p.fundedamount AS fundedamount,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
p.contracttype || '@@@' || p.contracttypename || '@@@' || p.contracttypescheme || '@@@' || p.contracttypescheme AS contracttype, p.contracttype || '@@@' || p.contracttypescheme AS contracttype,
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
array_agg(DISTINCT fp.path) AS fundingtree array_agg(DISTINCT fp.path) AS fundingtree
FROM projects p FROM projects p
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
@ -53,9 +50,6 @@ SELECT
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
LEFT OUTER JOIN subjects s ON (s.id = ps.subject) LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
GROUP BY GROUP BY
p.id, p.id,
p.code, p.code,
@ -85,5 +79,6 @@ SELECT
p.fundedamount, p.fundedamount,
dc.id, dc.id,
dc.officialname, dc.officialname,
pac.code, pac.name, pas.code, pas.name, p.contracttype,
p.contracttype , p.contracttypename, p.contracttypescheme; p.contracttypescheme;

View File

@ -28,18 +28,15 @@ SELECT
p.summary AS summary, p.summary AS summary,
p.currency AS currency, p.currency AS currency,
p.totalcost AS totalcost, p.totalcost AS totalcost,
p.fundedamount AS fundedamount, p.fundedamount AS fundedamount,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
ctc.code || '@@@' || ctc.name || '@@@' || cts.code || '@@@' || cts.name AS contracttype, p.contracttypeclass || '@@@' || p.contracttypescheme AS contracttype,
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
array_agg(DISTINCT fp.path) AS fundingtree array_agg(DISTINCT fp.path) AS fundingtree
FROM projects p FROM projects p
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
@ -51,12 +48,6 @@ SELECT
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
LEFT OUTER JOIN subjects s ON (s.id = ps.subject) LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
LEFT OUTER JOIN class ctc ON (ctc.code = p.contracttypeclass)
LEFT OUTER JOIN scheme cts ON (cts.code = p.contracttypescheme)
GROUP BY GROUP BY
p.id, p.id,
p.code, p.code,
@ -85,6 +76,6 @@ SELECT
p.totalcost, p.totalcost,
p.fundedamount, p.fundedamount,
dc.id, dc.id,
dc.officialname, dc.officialname
pac.code, pac.name, pas.code, pas.name,
ctc.code, ctc.name, cts.code, cts.name;

View File

@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.lenient;
import java.io.IOException; import java.io.IOException;
import java.sql.Array; import java.sql.Array;
@ -25,6 +27,8 @@ import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Organization;
@ -40,9 +44,22 @@ public class MigrateDbEntitiesApplicationTest {
@Mock @Mock
private ResultSet rs; private ResultSet rs;
@Mock
private VocabularyGroup vocs;
@BeforeEach @BeforeEach
public void setUp() { public void setUp() {
this.app = new MigrateDbEntitiesApplication(); lenient()
.when(vocs.getTermAsQualifier(anyString(), anyString()))
.thenAnswer(
invocation -> OafMapperUtils
.qualifier(
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
invocation.getArgument(0)));
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
this.app = new MigrateDbEntitiesApplication(vocs);
} }
@Test @Test
@ -61,8 +78,7 @@ public class MigrateDbEntitiesApplicationTest {
assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields)); assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields));
assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields)); assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields));
assertEquals( assertEquals(ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
} }
@Test @Test
@ -78,8 +94,7 @@ public class MigrateDbEntitiesApplicationTest {
assertValidId(p.getCollectedfrom().get(0).getKey()); assertValidId(p.getCollectedfrom().get(0).getKey());
assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields)); assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields));
assertEquals(p.getTitle().getValue(), getValueAsString("title", fields)); assertEquals(p.getTitle().getValue(), getValueAsString("title", fields));
assertEquals( assertEquals(p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
} }
@Test @Test
@ -99,13 +114,10 @@ public class MigrateDbEntitiesApplicationTest {
assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields)); assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields));
assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]); assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]);
assertEquals( assertEquals(o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[0]);
o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[1]); assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[1]);
assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[2]); assertEquals(o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[1]);
assertEquals( assertEquals(o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[3]);
assertEquals(
o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
} }
@Test @Test

View File

@ -52,7 +52,7 @@
{ {
"field": "semantics", "field": "semantics",
"type": "not_used", "type": "not_used",
"value": "providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies" "value": "providedBy@@@dnet:datasources_organizations_typologies"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",

View File

@ -30,7 +30,7 @@
{ {
"field": "openairecompatibility", "field": "openairecompatibility",
"type": "string", "type": "string",
"value": "hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel" "value": "hostedBy@@@dnet:datasourceCompatibilityLevel"
}, },
{ {
"field": "websiteurl", "field": "websiteurl",
@ -219,16 +219,16 @@
{ {
"field": "datasourcetype", "field": "datasourcetype",
"type": "string", "type": "string",
"value": "pubsrepository::journal@@@Journal@@@dnet:datasource_typologies@@@dnet:datasource_typologies" "value": "pubsrepository::journal@@@dnet:datasource_typologies"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
}, },
{ {
"field": "journal", "field": "journal",
"type": "string", "type": "string",
"value": "2579-5449@@@2597-6540@@@" "value": "2579-5449 @@@ 2597-6540 @@@ "
} }
] ]

View File

@ -117,11 +117,11 @@
{ {
"field": "country", "field": "country",
"type": "string", "type": "string",
"value": "US@@@US@@@dnet:countries@@@dnet:countries" "value": "US@@@dnet:countries"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
} }
] ]

View File

@ -62,11 +62,11 @@
{ {
"field": "semantics", "field": "semantics",
"type": "not_used", "type": "not_used",
"value": "coordinator@@@coordinator@@@dnet:project_organization_relations@@@dnet:project_organization_relations" "value": "coordinator@@@dnet:project_organization_relations"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
} }
] ]

View File

@ -167,7 +167,7 @@
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@Harvested@@@dnet:provenanceActions@@@dnet:provenanceActions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenanceActions"
}, },
{ {
"field": "pid", "field": "pid",