forked from D-Net/dnet-hadoop
import from db using is vocabularies
This commit is contained in:
parent
f5ce7d76e1
commit
adb798faa5
|
@ -39,9 +39,6 @@ import eu.dnetlib.dhp.schema.oaf.Project;
|
|||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class GenerateEntitiesApplication {
|
||||
|
@ -68,13 +65,9 @@ public class GenerateEntitiesApplication {
|
|||
final String sourcePaths = parser.get("sourcePaths");
|
||||
final String targetPath = parser.get("targetPath");
|
||||
|
||||
// final String dbUrl = parser.get("postgresUrl");
|
||||
// final String dbUser = parser.get("postgresUser");
|
||||
// final String dbPassword = parser.get("postgresPassword");
|
||||
final String isLookupUrl = parser.get("islookup");
|
||||
|
||||
final String isLookupUrl = parser.get("isLookupUrl");
|
||||
|
||||
final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc
|
||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
|
@ -165,35 +158,6 @@ public class GenerateEntitiesApplication {
|
|||
}
|
||||
}
|
||||
|
||||
private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
|
||||
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||
|
||||
final String xquery = IOUtils
|
||||
.toString(
|
||||
GenerateEntitiesApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
|
||||
|
||||
final VocabularyGroup vocs = new VocabularyGroup();
|
||||
|
||||
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
|
||||
final String[] arr = s.split("@=@");
|
||||
if (arr.length == 4) {
|
||||
final String vocId = arr[0].trim();
|
||||
final String vocName = arr[1].trim();
|
||||
final String termId = arr[2].trim();
|
||||
final String termName = arr[3].trim();
|
||||
|
||||
if (!vocs.vocabularyExists(vocId)) {
|
||||
vocs.addVocabulary(vocId, vocName);
|
||||
}
|
||||
|
||||
vocs.addTerm(vocId, termId, termName);
|
||||
}
|
||||
}
|
||||
|
||||
return vocs;
|
||||
}
|
||||
|
||||
private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) {
|
||||
try {
|
||||
return OBJECT_MAPPER.readValue(s, clazz);
|
||||
|
|
|
@ -10,7 +10,28 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
|
|||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues;
|
||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
|
||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
@ -32,6 +53,7 @@ import org.apache.commons.logging.LogFactory;
|
|||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.DbClient;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
|
@ -61,6 +83,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
|
||||
private final long lastUpdateTimestamp;
|
||||
|
||||
private final VocabularyGroup vocs;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
|
@ -73,13 +97,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final String dbUrl = parser.get("postgresUrl");
|
||||
final String dbUser = parser.get("postgresUser");
|
||||
final String dbPassword = parser.get("postgresPassword");
|
||||
final String isLookupUrl = parser.get("islookup");
|
||||
|
||||
final String hdfsPath = parser.get("hdfsPath");
|
||||
|
||||
final boolean processClaims = parser.get("action") != null && parser.get("action").equalsIgnoreCase("claims");
|
||||
|
||||
try (final MigrateDbEntitiesApplication smdbe = new MigrateDbEntitiesApplication(hdfsPath, dbUrl, dbUser,
|
||||
dbPassword)) {
|
||||
dbPassword, isLookupUrl)) {
|
||||
if (processClaims) {
|
||||
log.info("Processing claims...");
|
||||
smdbe.execute("queryClaims.sql", smdbe::processClaims);
|
||||
|
@ -103,18 +128,21 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
}
|
||||
}
|
||||
|
||||
protected MigrateDbEntitiesApplication() { // ONLY FOR UNIT TEST
|
||||
protected MigrateDbEntitiesApplication(final VocabularyGroup vocs) { // ONLY FOR UNIT TEST
|
||||
super();
|
||||
this.dbClient = null;
|
||||
this.lastUpdateTimestamp = new Date().getTime();
|
||||
this.vocs = vocs;
|
||||
}
|
||||
|
||||
public MigrateDbEntitiesApplication(
|
||||
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword)
|
||||
final String hdfsPath, final String dbUrl, final String dbUser, final String dbPassword,
|
||||
final String isLookupUrl)
|
||||
throws Exception {
|
||||
super(hdfsPath);
|
||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||
this.lastUpdateTimestamp = new Date().getTime();
|
||||
this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
||||
}
|
||||
|
||||
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
|
||||
|
@ -453,12 +481,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final Boolean inferred = rs.getBoolean("inferred");
|
||||
final String trust = rs.getString("trust");
|
||||
return dataInfo(
|
||||
deletedbyinference,
|
||||
inferenceprovenance,
|
||||
inferred,
|
||||
false,
|
||||
ENTITYREGISTRY_PROVENANCE_ACTION,
|
||||
trust);
|
||||
deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
|
||||
}
|
||||
|
||||
private Qualifier prepareQualifierSplitting(final String s) {
|
||||
|
@ -466,7 +489,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
return null;
|
||||
}
|
||||
final String[] arr = s.split("@@@");
|
||||
return arr.length == 4 ? qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
|
||||
return arr.length == 2 ? vocs.getTermAsQualifier(arr[1], arr[0]) : null;
|
||||
}
|
||||
|
||||
private List<Field<String>> prepareListFields(final Array array, final DataInfo info) {
|
||||
|
@ -485,8 +508,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
if (parts.length == 2) {
|
||||
final String value = parts[0];
|
||||
final String[] arr = parts[1].split("@@@");
|
||||
if (arr.length == 4) {
|
||||
return structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo);
|
||||
if (arr.length == 2) {
|
||||
return structuredProperty(value, vocs.getTermAsQualifier(arr[1], arr[0]), dataInfo);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -1,13 +1,50 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class VocabularyGroup {
|
||||
|
||||
public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
|
||||
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||
|
||||
final String xquery = IOUtils
|
||||
.toString(
|
||||
GenerateEntitiesApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
|
||||
|
||||
final VocabularyGroup vocs = new VocabularyGroup();
|
||||
|
||||
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
|
||||
final String[] arr = s.split("@=@");
|
||||
if (arr.length == 4) {
|
||||
final String vocId = arr[0].trim();
|
||||
final String vocName = arr[1].trim();
|
||||
final String termId = arr[2].trim();
|
||||
final String termName = arr[3].trim();
|
||||
|
||||
if (!vocs.vocabularyExists(vocId)) {
|
||||
vocs.addVocabulary(vocId, vocName);
|
||||
}
|
||||
|
||||
vocs.addTerm(vocId, termId, termName);
|
||||
}
|
||||
}
|
||||
|
||||
return vocs;
|
||||
}
|
||||
|
||||
private final Map<String, Vocabulary> vocs = new HashMap<>();
|
||||
|
||||
public void addVocabulary(final String id, final String name) {
|
||||
|
@ -29,7 +66,9 @@ public class VocabularyGroup {
|
|||
}
|
||||
|
||||
public Qualifier getTermAsQualifier(final String vocId, final String id) {
|
||||
if (termExists(vocId, id)) {
|
||||
if (StringUtils.isBlank(id)) {
|
||||
return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId);
|
||||
} else if (termExists(vocId, id)) {
|
||||
final Vocabulary v = vocs.get(vocId.toLowerCase());
|
||||
final VocabularyTerm t = v.getTerm(id);
|
||||
return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());
|
||||
|
|
|
@ -28,5 +28,11 @@
|
|||
"paramLongName": "action",
|
||||
"paramDescription": "process claims",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "islookup",
|
||||
"paramLongName": "islookup",
|
||||
"paramDescription": "the url of the ISLookupService",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -173,6 +173,7 @@
|
|||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
||||
<arg>--islookup</arg><arg>${isLookupUrl}</arg>
|
||||
</java>
|
||||
<ok to="ImportODF"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -24,6 +24,10 @@
|
|||
<name>mongoDb</name>
|
||||
<description>mongo database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the address of the lookUp service</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
|
@ -62,6 +66,7 @@
|
|||
<arg>-pgurl</arg><arg>${postgresURL}</arg>
|
||||
<arg>-pguser</arg><arg>${postgresUser}</arg>
|
||||
<arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
|
||||
<arg>-islookup</arg><arg>${isLookupUrl}</arg>
|
||||
</java>
|
||||
<ok to="ImportODF"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -9,9 +9,8 @@ SELECT
|
|||
NULL AS inferenceprovenance,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics,
|
||||
d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
|
||||
|
||||
'providedBy@@@dnet:datasources_organizations_typologies' AS semantics,
|
||||
d.provenanceaction || '@@@dnet:provenanceActions' AS provenanceaction
|
||||
FROM dsm_datasource_organization dor
|
||||
LEFT OUTER JOIN dsm_datasources d ON (dor.datasource = d.id)
|
||||
LEFT OUTER JOIN dsm_datasources dc ON (dc.id = d.collectedfrom)
|
||||
|
|
|
@ -7,36 +7,36 @@ SELECT
|
|||
CASE
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1'])
|
||||
THEN
|
||||
'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
|
||||
THEN
|
||||
'openaire4.0@@@OpenAIRE 4.0@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'openaire4.0@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
||||
THEN
|
||||
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver'])
|
||||
THEN
|
||||
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'driver@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0'])
|
||||
THEN
|
||||
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'openaire2.0@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0'])
|
||||
THEN
|
||||
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'openaire3.0@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data'])
|
||||
THEN
|
||||
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'openaire2.0_data@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native'])
|
||||
THEN
|
||||
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'native@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy'])
|
||||
THEN
|
||||
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'hostedBy@@@dnet:datasourceCompatibilityLevel'
|
||||
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible'])
|
||||
THEN
|
||||
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'notCompatible@@@dnet:datasourceCompatibilityLevel'
|
||||
ELSE
|
||||
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
||||
'UNKNOWN@@@dnet:datasourceCompatibilityLevel'
|
||||
END AS openairecompatibility,
|
||||
d.websiteurl AS websiteurl,
|
||||
d.logourl AS logourl,
|
||||
|
@ -47,7 +47,7 @@ SELECT
|
|||
NULL AS odnumberofitems,
|
||||
NULL AS odnumberofitemsdate,
|
||||
|
||||
(SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies')
|
||||
(SELECT array_agg(s|| '###keywords@@@dnet:subject_classification_typologies')
|
||||
FROM UNNEST(
|
||||
ARRAY(
|
||||
SELECT trim(s)
|
||||
|
@ -83,32 +83,9 @@ SELECT
|
|||
ARRAY[]::text[] AS policies,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
d.typology || '@@@' || CASE
|
||||
WHEN (d.typology = 'crissystem') THEN 'CRIS System'
|
||||
WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository'
|
||||
WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator'
|
||||
WHEN (d.typology = 'infospace') THEN 'Information Space'
|
||||
WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository'
|
||||
WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator'
|
||||
WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal'
|
||||
WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher'
|
||||
WHEN (d.typology = 'pubsrepository::mock') THEN 'Other'
|
||||
WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue'
|
||||
WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository'
|
||||
WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator'
|
||||
WHEN (d.typology = 'entityregistry') THEN 'Registry'
|
||||
WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure'
|
||||
WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository'
|
||||
WHEN (d.typology = 'websource') THEN 'Web Source'
|
||||
WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database'
|
||||
WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories'
|
||||
WHEN (d.typology = 'softwarerepository') THEN 'Software Repository'
|
||||
WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator'
|
||||
WHEN (d.typology = 'orprepository') THEN 'Repository'
|
||||
ELSE 'Other'
|
||||
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
CONCAT(d.issn, ' @@@ ', d.eissn, ' @@@ ', d.lissn) AS journal
|
||||
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||
d.issn || ' @@@ ' || d.eissn || ' @@@ ' || d.lissn AS journal
|
||||
|
||||
FROM dsm_datasources d
|
||||
|
||||
|
|
|
@ -22,13 +22,12 @@ SELECT
|
|||
'' AS inferenceprovenance,
|
||||
d.id AS collectedfromid,
|
||||
d.officialname AS collectedfromname,
|
||||
o.country || '@@@' || COALESCE(cntr.name,o.country) || '@@@dnet:countries@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
o.country || '@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||
ARRAY[]::text[] AS pid
|
||||
|
||||
FROM dsm_organizations o
|
||||
LEFT OUTER JOIN dsm_datasources d ON (d.id = o.collectedfrom)
|
||||
LEFT OUTER JOIN class cntr ON (cntr.code = o.country)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -11,8 +11,8 @@ SELECT
|
|||
'' AS inferenceprovenance,
|
||||
'openaire____::openorgs' AS collectedfromid,
|
||||
'OpenOrgs Database' AS collectedfromname,
|
||||
o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
o.country || '@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
|
||||
FROM organizations o
|
||||
LEFT OUTER JOIN acronyms a ON (a.id = o.id)
|
||||
|
@ -40,8 +40,8 @@ SELECT
|
|||
'' AS inferenceprovenance,
|
||||
'openaire____::openorgs' AS collectedfromid,
|
||||
'OpenOrgs Database' AS collectedfromname,
|
||||
o.country || '@@@' || o.country || '@@@dnet:countries@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
|
||||
o.country || '@@@dnet:countries' AS country,
|
||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid
|
||||
FROM other_names n
|
||||
LEFT OUTER JOIN organizations o ON (n.id = o.id)
|
||||
|
|
|
@ -11,8 +11,8 @@ SELECT
|
|||
NULL AS inferenceprovenance,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
|
||||
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction
|
||||
po.semanticclass || '@@@dnet:project_organization_relations' AS semantics,
|
||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction
|
||||
|
||||
FROM project_organization po
|
||||
LEFT OUTER JOIN projects p ON (p.id = po.project)
|
||||
|
|
|
@ -31,17 +31,14 @@ SELECT
|
|||
p.fundedamount AS fundedamount,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
p.contracttype || '@@@' || p.contracttypename || '@@@' || p.contracttypescheme || '@@@' || p.contracttypescheme AS contracttype,
|
||||
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction,
|
||||
p.contracttype || '@@@' || p.contracttypescheme AS contracttype,
|
||||
p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
|
||||
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
|
||||
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
|
||||
array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
|
||||
array_agg(DISTINCT fp.path) AS fundingtree
|
||||
|
||||
FROM projects p
|
||||
|
||||
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
|
||||
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
|
||||
|
||||
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
||||
|
||||
|
@ -53,9 +50,6 @@ SELECT
|
|||
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
|
||||
LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
|
||||
|
||||
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
|
||||
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
|
||||
|
||||
GROUP BY
|
||||
p.id,
|
||||
p.code,
|
||||
|
@ -85,5 +79,6 @@ SELECT
|
|||
p.fundedamount,
|
||||
dc.id,
|
||||
dc.officialname,
|
||||
pac.code, pac.name, pas.code, pas.name,
|
||||
p.contracttype , p.contracttypename, p.contracttypescheme;
|
||||
p.contracttype,
|
||||
p.contracttypescheme;
|
||||
|
|
@ -31,15 +31,12 @@ SELECT
|
|||
p.fundedamount AS fundedamount,
|
||||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
ctc.code || '@@@' || ctc.name || '@@@' || cts.code || '@@@' || cts.name AS contracttype,
|
||||
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction,
|
||||
p.contracttypeclass || '@@@' || p.contracttypescheme AS contracttype,
|
||||
p.provenanceactionclass || '@@@' || p.provenanceactionscheme AS provenanceaction,
|
||||
array_agg(DISTINCT i.pid || '###' || i.issuertype) AS pid,
|
||||
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
|
||||
array_agg(DISTINCT s.name || '###' || s.semanticclass || '@@@' || s.semanticscheme) AS subjects,
|
||||
array_agg(DISTINCT fp.path) AS fundingtree
|
||||
FROM projects p
|
||||
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
|
||||
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
|
||||
|
||||
LEFT OUTER JOIN projectpids pp ON (pp.project = p.id)
|
||||
LEFT OUTER JOIN dsm_identities i ON (i.pid = pp.pid)
|
||||
|
||||
|
@ -51,12 +48,6 @@ SELECT
|
|||
LEFT OUTER JOIN project_subject ps ON (ps.project = p.id)
|
||||
LEFT OUTER JOIN subjects s ON (s.id = ps.subject)
|
||||
|
||||
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
|
||||
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
|
||||
|
||||
LEFT OUTER JOIN class ctc ON (ctc.code = p.contracttypeclass)
|
||||
LEFT OUTER JOIN scheme cts ON (cts.code = p.contracttypescheme)
|
||||
|
||||
GROUP BY
|
||||
p.id,
|
||||
p.code,
|
||||
|
@ -85,6 +76,6 @@ SELECT
|
|||
p.totalcost,
|
||||
p.fundedamount,
|
||||
dc.id,
|
||||
dc.officialname,
|
||||
pac.code, pac.name, pas.code, pas.name,
|
||||
ctc.code, ctc.name, cts.code, cts.name;
|
||||
dc.officialname
|
||||
|
||||
|
|
@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
|||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.Array;
|
||||
|
@ -25,6 +27,8 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
|||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
|
@ -40,9 +44,22 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
@Mock
|
||||
private ResultSet rs;
|
||||
|
||||
@Mock
|
||||
private VocabularyGroup vocs;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
this.app = new MigrateDbEntitiesApplication();
|
||||
lenient()
|
||||
.when(vocs.getTermAsQualifier(anyString(), anyString()))
|
||||
.thenAnswer(
|
||||
invocation -> OafMapperUtils
|
||||
.qualifier(
|
||||
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
|
||||
invocation.getArgument(0)));
|
||||
|
||||
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
|
||||
|
||||
this.app = new MigrateDbEntitiesApplication(vocs);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -61,8 +78,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields));
|
||||
assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
|
||||
assertEquals(ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields));
|
||||
assertEquals(
|
||||
ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
||||
assertEquals(ds.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -78,8 +94,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||
assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields));
|
||||
assertEquals(p.getTitle().getValue(), getValueAsString("title", fields));
|
||||
assertEquals(
|
||||
p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
||||
assertEquals(p.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -99,13 +114,10 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields));
|
||||
assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
|
||||
assertEquals(o.getCountry().getClassid(), getValueAsString("country", fields).split("@@@")[0]);
|
||||
assertEquals(
|
||||
o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[1]);
|
||||
assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[2]);
|
||||
assertEquals(
|
||||
o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[3]);
|
||||
assertEquals(
|
||||
o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
||||
assertEquals(o.getCountry().getClassname(), getValueAsString("country", fields).split("@@@")[0]);
|
||||
assertEquals(o.getCountry().getSchemeid(), getValueAsString("country", fields).split("@@@")[1]);
|
||||
assertEquals(o.getCountry().getSchemename(), getValueAsString("country", fields).split("@@@")[1]);
|
||||
assertEquals(o.getCollectedfrom().get(0).getValue(), getValueAsString("collectedfromname", fields));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
{
|
||||
"field": "semantics",
|
||||
"type": "not_used",
|
||||
"value": "providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies"
|
||||
"value": "providedBy@@@dnet:datasources_organizations_typologies"
|
||||
},
|
||||
{
|
||||
"field": "provenanceaction",
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
{
|
||||
"field": "openairecompatibility",
|
||||
"type": "string",
|
||||
"value": "hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel"
|
||||
"value": "hostedBy@@@dnet:datasourceCompatibilityLevel"
|
||||
},
|
||||
{
|
||||
"field": "websiteurl",
|
||||
|
@ -219,12 +219,12 @@
|
|||
{
|
||||
"field": "datasourcetype",
|
||||
"type": "string",
|
||||
"value": "pubsrepository::journal@@@Journal@@@dnet:datasource_typologies@@@dnet:datasource_typologies"
|
||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
||||
},
|
||||
{
|
||||
"field": "provenanceaction",
|
||||
"type": "not_used",
|
||||
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions"
|
||||
"value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
|
||||
},
|
||||
{
|
||||
"field": "journal",
|
||||
|
|
|
@ -117,11 +117,11 @@
|
|||
{
|
||||
"field": "country",
|
||||
"type": "string",
|
||||
"value": "US@@@US@@@dnet:countries@@@dnet:countries"
|
||||
"value": "US@@@dnet:countries"
|
||||
},
|
||||
{
|
||||
"field": "provenanceaction",
|
||||
"type": "not_used",
|
||||
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions"
|
||||
"value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
|
||||
}
|
||||
]
|
|
@ -62,11 +62,11 @@
|
|||
{
|
||||
"field": "semantics",
|
||||
"type": "not_used",
|
||||
"value": "coordinator@@@coordinator@@@dnet:project_organization_relations@@@dnet:project_organization_relations"
|
||||
"value": "coordinator@@@dnet:project_organization_relations"
|
||||
},
|
||||
{
|
||||
"field": "provenanceaction",
|
||||
"type": "not_used",
|
||||
"value": "sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions"
|
||||
"value": "sysimport:crosswalk:entityregistry@@@dnet:provenance_actions"
|
||||
}
|
||||
]
|
|
@ -167,7 +167,7 @@
|
|||
{
|
||||
"field": "provenanceaction",
|
||||
"type": "not_used",
|
||||
"value": "sysimport:crosswalk:entityregistry@@@Harvested@@@dnet:provenanceActions@@@dnet:provenanceActions"
|
||||
"value": "sysimport:crosswalk:entityregistry@@@dnet:provenanceActions"
|
||||
},
|
||||
{
|
||||
"field": "pid",
|
||||
|
|
Loading…
Reference in New Issue