import from db using is vocabularies

This commit is contained in:
Michele Artini 2020-05-29 12:03:51 +02:00
parent f5ce7d76e1
commit adb798faa5
19 changed files with 173 additions and 162 deletions

View File

@ -39,9 +39,6 @@ import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
public class GenerateEntitiesApplication { public class GenerateEntitiesApplication {
@ -68,13 +65,9 @@ public class GenerateEntitiesApplication {
final String sourcePaths = parser.get("sourcePaths"); final String sourcePaths = parser.get("sourcePaths");
final String targetPath = parser.get("targetPath"); final String targetPath = parser.get("targetPath");
// final String dbUrl = parser.get("postgresUrl"); final String isLookupUrl = parser.get("islookup");
// final String dbUser = parser.get("postgresUser");
// final String dbPassword = parser.get("postgresPassword");
final String isLookupUrl = parser.get("isLookupUrl"); final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
runWithSparkSession(conf, isSparkSessionManaged, spark -> { runWithSparkSession(conf, isSparkSessionManaged, spark -> {
@ -165,35 +158,6 @@ public class GenerateEntitiesApplication {
} }
} }
private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final String xquery = IOUtils
.toString(
GenerateEntitiesApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
final VocabularyGroup vocs = new VocabularyGroup();
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
final String[] arr = s.split("@=@");
if (arr.length == 4) {
final String vocId = arr[0].trim();
final String vocName = arr[1].trim();
final String termId = arr[2].trim();
final String termName = arr[3].trim();
if (!vocs.vocabularyExists(vocId)) {
vocs.addVocabulary(vocId, vocName);
}
vocs.addTerm(vocId, termId, termName);
}
}
return vocs;
}
private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) { private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) {
try { try {
return OBJECT_MAPPER.readValue(s, clazz); return OBJECT_MAPPER.readValue(s, clazz);

View File

@ -10,7 +10,28 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
@ -32,6 +53,7 @@ import org.apache.commons.logging.LogFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
@ -61,6 +83,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
private final long lastUpdateTimestamp; private final long lastUpdateTimestamp;
private final VocabularyGroup vocs;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -73,13 +97,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final String dbUrl = parser.get("postgresUrl"); final String dbUrl = parser.get("postgresUrl");
final String dbUser = parser.get("postgresUser"); final String dbUser = parser.get("postgresUser");
final String dbPassword = parser.get("postgresPassword"); final String dbPassword = parser.get("postgresPassword");
final String isLookupUrl = parser.get("islookup");
final String hdfsPath = parser.get("hdfsPath"); final String hdfsPath = parser.get("hdfsPath");
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims"); final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser, try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
dbPassword)) { dbPassword, isLookupUrl)) {
if (processClaims) { if (processClaims) {
log.info("Processing claims..."); log.info("Processing claims...");
smdbe.execute("queryClaims.sql", smdbe::processClaims); smdbe.execute("queryClaims.sql", smdbe::processClaims);
@ -103,18 +128,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
} }
} }
protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST protected MigrateDbEntitiesApplication(final VocabularyGroup vocs) { // ONLY FOR UNIT TEST
super(); super();
this.dbClient = null; this.dbClient = null;
this.lastUpdateTimestamp = new Date().getTime(); this.lastUpdateTimestamp = new Date().getTime();
this.vocs = vocs;
} }
public MigrateDbEntitiesApplication( public MigrateDbEntitiesApplication(
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword) final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword,
final String isLookupUrl)
throws Exception { throws Exception {
super(hdfsPath); super(hdfsPath);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.lastUpdateTimestamp = new Date().getTime(); this.lastUpdateTimestamp = new Date().getTime();
this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
} }
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer) public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
@ -453,12 +481,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
final Boolean inferred = rs.getBoolean("inferred"); final Boolean inferred = rs.getBoolean("inferred");
final String trust = rs.getString("trust"); final String trust = rs.getString("trust");
return dataInfo( return dataInfo(
deletedbyinference, deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
inferenceprovenance,
inferred,
false,
ENTITYREGISTRY_PROVENANCE_ACTION,
trust);
} }
private Qualifier prepareQualifierSplitting(final String s) { private Qualifier prepareQualifierSplitting(final String s) {
@ -466,7 +489,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return null; return null;
} }
final String[] arr = s.split("@@@"); final String[] arr = s.split("@@@");
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null; return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null;
} }
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) { private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
@ -485,8 +508,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
if (parts.length == 2) { if (parts.length == 2) {
final String value = parts[0]; final String value = parts[0];
final String[] arr = parts[1].split("@@@"); final String[] arr = parts[1].split("@@@");
if (arr.length == 4) { if (arr.length == 2) {
return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo);
} }
} }
return null; return null;

View File

@ -1,13 +1,50 @@
package eu.dnetlib.dhp.oa.graph.raw.common; package eu.dnetlib.dhp.oa.graph.raw.common;
import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class VocabularyGroup { public class VocabularyGroup {
public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final String xquery = IOUtils
.toString(
GenerateEntitiesApplication.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
final VocabularyGroup vocs = new VocabularyGroup();
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
final String[] arr = s.split("@=@");
if (arr.length == 4) {
final String vocId = arr[0].trim();
final String vocName = arr[1].trim();
final String termId = arr[2].trim();
final String termName = arr[3].trim();
if (!vocs.vocabularyExists(vocId)) {
vocs.addVocabulary(vocId, vocName);
}
vocs.addTerm(vocId, termId, termName);
}
}
return vocs;
}
private final Map<String, Vocabulary> vocs = new HashMap<>(); private final Map<String, Vocabulary> vocs = new HashMap<>();
public void addVocabulary(final String id, final String name) { public void addVocabulary(final String id, final String name) {
@ -29,7 +66,9 @@ public class VocabularyGroup {
} }
public Qualifier getTermAsQualifier(final String vocId, final String id) { public Qualifier getTermAsQualifier(final String vocId, final String id) {
if (termExists(vocId, id)) { if (StringUtils.isBlank(id)) {
return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId);
} else if (termExists(vocId, id)) {
final Vocabulary v = vocs.get(vocId.toLowerCase()); final Vocabulary v = vocs.get(vocId.toLowerCase());
final VocabularyTerm t = v.getTerm(id); final VocabularyTerm t = v.getTerm(id);
return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName()); return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());

View File

@ -28,5 +28,11 @@
"paramLongName": "action", "paramLongName": "action",
"paramDescription": "process claims", "paramDescription": "process claims",
"paramRequired": false "paramRequired": false
},
{
"paramName": "islookup",
"paramLongName": "islookup",
"paramDescription": "the url of the ISLookupService",
"paramRequired": true
} }
] ]

View File

@ -173,6 +173,7 @@
<arg>--postgresUrl</arg><arg>${postgresURL}</arg> <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg> <arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg> <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
<arg>--islookup</arg><arg>${isLookupUrl}</arg>
</java> </java>
<ok to="ImportODF"/> <ok to="ImportODF"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -24,6 +24,10 @@
<name>mongoDb</name> <name>mongoDb</name>
<description>mongo database</description> <description>mongo database</description>
</property> </property>
<property>
<name>isLookupUrl</name>
<description>the address of the lookUp service</description>
</property>
<property> <property>
<name>sparkDriverMemory</name> <name>sparkDriverMemory</name>
<description>memory for driver process</description> <description>memory for driver process</description>
@ -62,6 +66,7 @@
<arg>-pgurl</arg><arg>${postgresURL}</arg> <arg>-pgurl</arg><arg>${postgresURL}</arg>
<arg>-pguser</arg><arg>${postgresUser}</arg> <arg>-pguser</arg><arg>${postgresUser}</arg>
<arg>-pgpasswd</arg><arg>${postgresPassword}</arg> <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
<arg>-islookup</arg><arg>${isLookupUrl}</arg>
</java> </java>
<ok to="ImportODF"/> <ok to="ImportODF"/>
<error to="Kill"/> <error to="Kill"/>

View File

@ -9,9 +9,8 @@ SELECT
NULL AS inferenceprovenance, NULL AS inferenceprovenance,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics, 'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction
FROM dsm_datasource_organization dor FROM dsm_datasource_organization dor
LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id) LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id)
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom) LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom)

View File

@ -7,36 +7,36 @@ SELECT
CASE CASE
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1'])
THEN THEN
'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
THEN THEN
'openaire4.0@@@OpenAIRE 4.0@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire4.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
THEN THEN
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver'])
THEN THEN
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'driver@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0'])
THEN THEN
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire2.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0'])
THEN THEN
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire3.0@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data'])
THEN THEN
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native'])
THEN THEN
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'native@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy'])
THEN THEN
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'hostedBy@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible'])
THEN THEN
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'notCompatible@@@dnet:datasourceCompatibilityLevel'
ELSE ELSE
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' 'UNKNOWN@@@dnet:datasourceCompatibilityLevel'
END AS openairecompatibility, END AS openairecompatibility,
d.websiteurl AS websiteurl, d.websiteurl AS websiteurl,
d.logourl AS logourl, d.logourl AS logourl,
@ -47,7 +47,7 @@ SELECT
NULL AS odnumberofitems, NULL AS odnumberofitems,
NULL AS odnumberofitemsdate, NULL AS odnumberofitemsdate,
(SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies') (SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
FROM UNNEST( FROM UNNEST(
ARRAY( ARRAY(
SELECT trim(s) SELECT trim(s)
@ -83,32 +83,9 @@ SELECT
ARRAY[]::text[] AS policies, ARRAY[]::text[] AS policies,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
d.typology || '@@@' || CASE d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
WHEN (d.typology = 'crissystem') THEN 'CRIS System' 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository' d.issn || ' @@@ ' || d.eissn || ' @@@ ' || d.lissn AS journal
WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator'
WHEN (d.typology = 'infospace') THEN 'Information Space'
WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository'
WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator'
WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal'
WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher'
WHEN (d.typology = 'pubsrepository::mock') THEN 'Other'
WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue'
WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository'
WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator'
WHEN (d.typology = 'entityregistry') THEN 'Registry'
WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure'
WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository'
WHEN (d.typology = 'websource') THEN 'Web Source'
WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database'
WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories'
WHEN (d.typology = 'softwarerepository') THEN 'Software Repository'
WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator'
WHEN (d.typology = 'orprepository') THEN 'Repository'
ELSE 'Other'
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal
FROM dsm_datasources d FROM dsm_datasources d

View File

@ -22,13 +22,12 @@ SELECT
'' AS inferenceprovenance, '' AS inferenceprovenance,
d.id AS collectedfromid, d.id AS collectedfromid,
d.officialname AS collectedfromname, d.officialname AS collectedfromname,
o.country || '@@@' || COALESCE(cntr.name,o.country) || '@@@dnet:countries@@@dnet:countries' AS country, o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
ARRAY[]::text[] AS pid ARRAY[]::text[] AS pid
FROM dsm_organizations o FROM dsm_organizations o
LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom) LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
LEFT OUTER JOIN class cntr ON (cntr.code = o.country)

View File

@ -11,8 +11,8 @@ SELECT
'' AS inferenceprovenance, '' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid, 'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname, 'OpenOrgs Database' AS collectedfromname,
o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
FROM organizations o FROM organizations o
LEFT OUTER JOIN acronyms a ON (a.id = o.id) LEFT OUTER JOIN acronyms a ON (a.id = o.id)
@ -40,8 +40,8 @@ SELECT
'' AS inferenceprovenance, '' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid, 'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname, 'OpenOrgs Database' AS collectedfromname,
o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country, o.country || '@@@dnet:countries' AS country,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
FROM other_names n FROM other_names n
LEFT OUTER JOIN organizations o ON (n.id = o.id) LEFT OUTER JOIN organizations o ON (n.id = o.id)

View File

@ -11,8 +11,8 @@ SELECT
NULL AS inferenceprovenance, NULL AS inferenceprovenance,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics, po.semanticclass || '@@@dnet:project_organization_relations' AS semantics,
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction
FROM project_organization po FROM project_organization po
LEFT OUTER JOIN projects p ON (p.id = po.project) LEFT OUTER JOIN projects p ON (p.id = po.project)

View File

@ -31,17 +31,14 @@ SELECT
p.fundedamount AS fundedamount, p.fundedamount AS fundedamount,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
p.contracttype || '@@@' || p.contracttypename || '@@@' || p.contracttypescheme || '@@@' || p.contracttypescheme AS contracttype, p.contracttype || '@@@' || p.contracttypescheme AS contracttype,
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
array_agg(DISTINCT fp.path) AS fundingtree array_agg(DISTINCT fp.path) AS fundingtree
FROM projects p FROM projects p
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
@ -53,9 +50,6 @@ SELECT
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
LEFT OUTER JOIN subjects s ON (s.id = ps.subject) LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
GROUP BY GROUP BY
p.id, p.id,
p.code, p.code,
@ -85,5 +79,6 @@ SELECT
p.fundedamount, p.fundedamount,
dc.id, dc.id,
dc.officialname, dc.officialname,
pac.code, pac.name, pas.code, pas.name, p.contracttype,
p.contracttype , p.contracttypename, p.contracttypescheme; p.contracttypescheme;

View File

@ -31,15 +31,12 @@ SELECT
p.fundedamount AS fundedamount, p.fundedamount AS fundedamount,
dc.id AS collectedfromid, dc.id AS collectedfromid,
dc.officialname AS collectedfromname, dc.officialname AS collectedfromname,
ctc.code || '@@@' || ctc.name || '@@@' || cts.code || '@@@' || cts.name AS contracttype, p.contracttypeclass || '@@@' || p.contracttypescheme AS contracttype,
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid, array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
array_agg(DISTINCT fp.path) AS fundingtree array_agg(DISTINCT fp.path) AS fundingtree
FROM projects p FROM projects p
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id) LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid) LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
@ -51,12 +48,6 @@ SELECT
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id) LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
LEFT OUTER JOIN subjects s ON (s.id = ps.subject) LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
LEFT OUTER JOIN class ctc ON (ctc.code = p.contracttypeclass)
LEFT OUTER JOIN scheme cts ON (cts.code = p.contracttypescheme)
GROUP BY GROUP BY
p.id, p.id,
p.code, p.code,
@ -85,6 +76,6 @@ SELECT
p.totalcost, p.totalcost,
p.fundedamount, p.fundedamount,
dc.id, dc.id,
dc.officialname, dc.officialname
pac.code, pac.name, pas.code, pas.name,
ctc.code, ctc.name, cts.code, cts.name;

View File

@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.lenient;
import java.io.IOException; import java.io.IOException;
import java.sql.Array; import java.sql.Array;
@ -25,6 +27,8 @@ import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Organization;
@ -40,9 +44,22 @@ public class MigrateDbEntitiesApplicationTest {
@Mock @Mock
private ResultSet rs; private ResultSet rs;
@Mock
private VocabularyGroup vocs;
@BeforeEach @BeforeEach
public void setUp() { public void setUp() {
this.app = new MigrateDbEntitiesApplication(); lenient()
.when(vocs.getTermAsQualifier(anyString(), anyString()))
.thenAnswer(
invocation -> OafMapperUtils
.qualifier(
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
invocation.getArgument(0)));
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
this.app = new MigrateDbEntitiesApplication(vocs);
} }
@Test @Test
@ -61,8 +78,7 @@ public class MigrateDbEntitiesApplicationTest {
assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields)); assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields));
assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields)); assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields));
assertEquals( assertEquals(ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
} }
@Test @Test
@ -78,8 +94,7 @@ public class MigrateDbEntitiesApplicationTest {
assertValidId(p.getCollectedfrom().get(0).getKey()); assertValidId(p.getCollectedfrom().get(0).getKey());
assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields)); assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields));
assertEquals(p.getTitle().getValue(), getValueAsString("title", fields)); assertEquals(p.getTitle().getValue(), getValueAsString("title", fields));
assertEquals( assertEquals(p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
} }
@Test @Test
@ -99,13 +114,10 @@ public class MigrateDbEntitiesApplicationTest {
assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields)); assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields));
assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]); assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]);
assertEquals( assertEquals(o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[0]);
o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[1]); assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[1]);
assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[2]); assertEquals(o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[1]);
assertEquals( assertEquals(o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[3]);
assertEquals(
o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
} }
@Test @Test

View File

@ -52,7 +52,7 @@
{ {
"field": "semantics", "field": "semantics",
"type": "not_used", "type": "not_used",
"value": "providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies" "value": "providedBy@@@dnet:datasources_organizations_typologies"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",

View File

@ -30,7 +30,7 @@
{ {
"field": "openairecompatibility", "field": "openairecompatibility",
"type": "string", "type": "string",
"value": "hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel" "value": "hostedBy@@@dnet:datasourceCompatibilityLevel"
}, },
{ {
"field": "websiteurl", "field": "websiteurl",
@ -219,12 +219,12 @@
{ {
"field": "datasourcetype", "field": "datasourcetype",
"type": "string", "type": "string",
"value": "pubsrepository::journal@@@Journal@@@dnet:datasource_typologies@@@dnet:datasource_typologies" "value": "pubsrepository::journal@@@dnet:datasource_typologies"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
}, },
{ {
"field": "journal", "field": "journal",

View File

@ -117,11 +117,11 @@
{ {
"field": "country", "field": "country",
"type": "string", "type": "string",
"value": "US@@@US@@@dnet:countries@@@dnet:countries" "value": "US@@@dnet:countries"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
} }
] ]

View File

@ -62,11 +62,11 @@
{ {
"field": "semantics", "field": "semantics",
"type": "not_used", "type": "not_used",
"value": "coordinator@@@coordinator@@@dnet:project_organization_relations@@@dnet:project_organization_relations" "value": "coordinator@@@dnet:project_organization_relations"
}, },
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
} }
] ]

View File

@ -167,7 +167,7 @@
{ {
"field": "provenanceaction", "field": "provenanceaction",
"type": "not_used", "type": "not_used",
"value": "sysimport:crosswalk:entityregistry@@@Harvested@@@dnet:provenanceActions@@@dnet:provenanceActions" "value": "sysimport:crosswalk:entityregistry@@@dnet:provenanceActions"
}, },
{ {
"field": "pid", "field": "pid",