forked from D-Net/dnet-hadoop
Merge pull request 'towards EOSC datasource profiles' (#130) from datasource_model_eosc_beta into beta
Reviewed-on: D-Net/dnet-hadoop#130
This commit is contained in:
commit
3bcac7e88c
|
@ -1,17 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project;
|
package eu.dnetlib.dhp.actionmanager.project;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
|
||||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.common.collection.GetCSV;
|
|
||||||
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.LocalFileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.junit.jupiter.api.*;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -19,7 +9,20 @@ import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.junit.jupiter.api.*;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||||
|
|
||||||
public class DownloadCsvTest {
|
public class DownloadCsvTest {
|
||||||
|
|
||||||
|
@ -103,8 +106,8 @@ public class DownloadCsvTest {
|
||||||
GetCSV
|
GetCSV
|
||||||
.getCsv(
|
.getCsv(
|
||||||
fs,
|
fs,
|
||||||
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
|
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||||
, workingDir + "/projects",
|
workingDir + "/projects",
|
||||||
CSVProject.class.getName(), ';');
|
CSVProject.class.getName(), ';');
|
||||||
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
|
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
|
||||||
|
|
|
@ -33,7 +33,7 @@ public class Process implements Serializable {
|
||||||
ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
|
ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
|
||||||
}
|
}
|
||||||
ri.setId(Utils.getContextId(ci.getId()));
|
ri.setId(Utils.getContextId(ci.getId()));
|
||||||
ri.setOriginalId(ci.getId());
|
ri.setAcronym(ci.getId());
|
||||||
|
|
||||||
ri.setDescription(ci.getDescription());
|
ri.setDescription(ci.getDescription());
|
||||||
ri.setName(ci.getName());
|
ri.setName(ci.getName());
|
||||||
|
|
|
@ -1,15 +1,51 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_MERGED_IN;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.MERGES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.sql.Array;
|
import java.sql.Array;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
|
@ -50,8 +86,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class);
|
||||||
|
|
||||||
private static final DataInfo DATA_INFO_CLAIM = dataInfo(
|
private static final DataInfo DATA_INFO_CLAIM = dataInfo(
|
||||||
false, null, false, false,
|
false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||||
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
|
"0.9");
|
||||||
|
|
||||||
private static final List<KeyValue> COLLECTED_FROM_CLAIM = listKeyValues(
|
private static final List<KeyValue> COLLECTED_FROM_CLAIM = listKeyValues(
|
||||||
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
||||||
|
@ -69,10 +105,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
Objects
|
|
||||||
.requireNonNull(
|
|
||||||
MigrateDbEntitiesApplication.class
|
MigrateDbEntitiesApplication.class
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))));
|
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
|
||||||
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
@ -86,7 +120,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
log.info("postgresPassword: xxx");
|
log.info("postgresPassword: xxx");
|
||||||
|
|
||||||
final String dbSchema = parser.get("dbschema");
|
final String dbSchema = parser.get("dbschema");
|
||||||
log.info("dbSchema {}: ", dbSchema);
|
log.info("dbSchema {}: " + dbSchema);
|
||||||
|
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
@ -139,8 +173,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
case openorgs_dedup: // generates organization entities and relations for openorgs dedup
|
case openorgs_dedup: // generates organization entities and relations for openorgs dedup
|
||||||
log.info("Processing Openorgs...");
|
log.info("Processing Openorgs...");
|
||||||
smdbe
|
smdbe
|
||||||
.execute(
|
.execute("queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||||
"queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
|
||||||
|
|
||||||
log.info("Processing Openorgs Sim Rels...");
|
log.info("Processing Openorgs Sim Rels...");
|
||||||
smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels);
|
smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels);
|
||||||
|
@ -149,8 +182,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
case openorgs: // generates organization entities and relations for provision
|
case openorgs: // generates organization entities and relations for provision
|
||||||
log.info("Processing Openorgs For Provision...");
|
log.info("Processing Openorgs For Provision...");
|
||||||
smdbe
|
smdbe
|
||||||
.execute(
|
.execute("queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||||
"queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
|
||||||
|
|
||||||
log.info("Processing Openorgs Merge Rels...");
|
log.info("Processing Openorgs Merge Rels...");
|
||||||
smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels);
|
smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels);
|
||||||
|
@ -228,6 +260,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
||||||
ds.setOaiprovenance(null); // Values not present in the DB
|
ds.setOaiprovenance(null); // Values not present in the DB
|
||||||
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
||||||
|
ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
|
||||||
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
||||||
ds.setOfficialname(field(rs.getString("officialname"), info));
|
ds.setOfficialname(field(rs.getString("officialname"), info));
|
||||||
ds.setEnglishname(field(rs.getString("englishname"), info));
|
ds.setEnglishname(field(rs.getString("englishname"), info));
|
||||||
|
@ -269,6 +302,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
ds.setDataInfo(info);
|
ds.setDataInfo(info);
|
||||||
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
|
|
||||||
|
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
||||||
|
ds.setThematic(rs.getBoolean("thematic"));
|
||||||
|
ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
|
||||||
|
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
||||||
|
|
||||||
return Arrays.asList(ds);
|
return Arrays.asList(ds);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
@ -494,8 +532,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
String
|
String
|
||||||
.format(
|
.format(
|
||||||
"invalid claim, sourceId: %s, targetId: %s, semantics: %s",
|
"invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId,
|
||||||
sourceId, targetId, semantics));
|
semantics));
|
||||||
}
|
}
|
||||||
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
|
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
|
||||||
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
|
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
|
||||||
|
@ -515,8 +553,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Relation prepareRelation(String sourceId, String targetId, String validationDate) {
|
private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) {
|
||||||
Relation r = new Relation();
|
final Relation r = new Relation();
|
||||||
if (StringUtils.isNotBlank(validationDate)) {
|
if (StringUtils.isNotBlank(validationDate)) {
|
||||||
r.setValidated(true);
|
r.setValidated(true);
|
||||||
r.setValidationDate(validationDate);
|
r.setValidationDate(validationDate);
|
||||||
|
@ -529,7 +567,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) {
|
private Relation setRelationSemantic(final Relation r, final String relType, final String subRelType,
|
||||||
|
final String relClass) {
|
||||||
r.setRelType(relType);
|
r.setRelType(relType);
|
||||||
r.setSubRelType(subRelType);
|
r.setSubRelType(subRelType);
|
||||||
r.setRelClass(relClass);
|
r.setRelClass(relClass);
|
||||||
|
@ -602,6 +641,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<Qualifier> prepareListOfQualifiers(final Array array) throws SQLException {
|
||||||
|
final List<Qualifier> res = new ArrayList<>();
|
||||||
|
if (array != null) {
|
||||||
|
for (final String s : (String[]) array.getArray()) {
|
||||||
|
final Qualifier q = prepareQualifierSplitting(s);
|
||||||
|
if (q != null) {
|
||||||
|
res.add(q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
public List<Oaf> processOrgOrgMergeRels(final ResultSet rs) {
|
public List<Oaf> processOrgOrgMergeRels(final ResultSet rs) {
|
||||||
try {
|
try {
|
||||||
final DataInfo info = prepareDataInfo(rs); // TODO
|
final DataInfo info = prepareDataInfo(rs); // TODO
|
||||||
|
@ -658,6 +710,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
r1.setDataInfo(info);
|
r1.setDataInfo(info);
|
||||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
|
|
||||||
|
// removed because there's no difference between two sides //TODO
|
||||||
|
// final Relation r2 = new Relation();
|
||||||
|
// r2.setRelType(ORG_ORG_RELTYPE);
|
||||||
|
// r2.setSubRelType(ORG_ORG_SUBRELTYPE);
|
||||||
|
// r2.setRelClass(relClass);
|
||||||
|
// r2.setSource(orgId2);
|
||||||
|
// r2.setTarget(orgId1);
|
||||||
|
// r2.setCollectedfrom(collectedFrom);
|
||||||
|
// r2.setDataInfo(info);
|
||||||
|
// r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||||
|
// return Arrays.asList(r1, r2);
|
||||||
|
|
||||||
return Arrays.asList(r1);
|
return Arrays.asList(r1);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -165,7 +165,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
return Arrays.asList(instance);
|
return Arrays.asList(instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String trimAndDecodeUrl(String url){
|
protected String trimAndDecodeUrl(String url) {
|
||||||
try {
|
try {
|
||||||
return URLDecoder.decode(url.trim(), "UTF-8");
|
return URLDecoder.decode(url.trim(), "UTF-8");
|
||||||
} catch (UnsupportedEncodingException e) {
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
|
|
@ -84,13 +84,18 @@ SELECT
|
||||||
dc.id AS collectedfromid,
|
dc.id AS collectedfromid,
|
||||||
dc.officialname AS collectedfromname,
|
dc.officialname AS collectedfromname,
|
||||||
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
||||||
|
d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui,
|
||||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||||
d.issn AS issnPrinted,
|
d.issn AS issnPrinted,
|
||||||
d.eissn AS issnOnline,
|
d.eissn AS issnOnline,
|
||||||
d.lissn AS issnLinking
|
d.lissn AS issnLinking,
|
||||||
|
de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
||||||
|
de.thematic AS thematic,
|
||||||
|
de.knowledge_graph AS knowledgegraph,
|
||||||
|
array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies
|
||||||
|
|
||||||
FROM dsm_datasources d
|
FROM dsm_datasources d
|
||||||
|
LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id)
|
||||||
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
|
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
|
||||||
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
|
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
|
||||||
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
|
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
|
||||||
|
@ -126,4 +131,8 @@ GROUP BY
|
||||||
dc.officialname,
|
dc.officialname,
|
||||||
d.issn,
|
d.issn,
|
||||||
d.eissn,
|
d.eissn,
|
||||||
d.lissn
|
d.lissn,
|
||||||
|
de.jurisdiction,
|
||||||
|
de.thematic,
|
||||||
|
de.knowledge_graph,
|
||||||
|
de.content_policies
|
||||||
|
|
|
@ -11,7 +11,6 @@ import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
@ -21,6 +20,7 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
|
@ -760,7 +760,6 @@ class MappersTest {
|
||||||
assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid());
|
assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testXMLEncodedURL() throws IOException, DocumentException {
|
void testXMLEncodedURL() throws IOException, DocumentException {
|
||||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml")));
|
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml")));
|
||||||
|
@ -779,7 +778,8 @@ class MappersTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testXMLEncodedURL_ODF() throws IOException, DocumentException {
|
void testXMLEncodedURL_ODF() throws IOException, DocumentException {
|
||||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml")));
|
final String xml = IOUtils
|
||||||
|
.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml")));
|
||||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||||
|
|
||||||
System.out.println("***************");
|
System.out.println("***************");
|
||||||
|
@ -788,7 +788,7 @@ class MappersTest {
|
||||||
|
|
||||||
final Dataset p = (Dataset) list.get(0);
|
final Dataset p = (Dataset) list.get(0);
|
||||||
assertTrue(p.getInstance().size() > 0);
|
assertTrue(p.getInstance().size() > 0);
|
||||||
for(String url : p.getInstance().get(0).getUrl()){
|
for (String url : p.getInstance().get(0).getUrl()) {
|
||||||
System.out.println(url);
|
System.out.println(url);
|
||||||
assertTrue(!url.contains("&"));
|
assertTrue(!url.contains("&"));
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,11 +28,16 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
class MigrateDbEntitiesApplicationTest {
|
public class MigrateDbEntitiesApplicationTest {
|
||||||
|
|
||||||
private MigrateDbEntitiesApplication app;
|
private MigrateDbEntitiesApplication app;
|
||||||
|
|
||||||
|
@ -46,11 +51,8 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
public void setUp() {
|
public void setUp() {
|
||||||
lenient()
|
lenient()
|
||||||
.when(vocs.getTermAsQualifier(anyString(), anyString()))
|
.when(vocs.getTermAsQualifier(anyString(), anyString()))
|
||||||
.thenAnswer(
|
.thenAnswer(invocation -> OafMapperUtils
|
||||||
invocation -> OafMapperUtils
|
.qualifier(invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(0)));
|
||||||
.qualifier(
|
|
||||||
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
|
|
||||||
invocation.getArgument(0)));
|
|
||||||
|
|
||||||
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
|
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
|
||||||
|
|
||||||
|
@ -58,7 +60,7 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testProcessDatasource() throws Exception {
|
public void testProcessDatasource() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("datasources_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("datasources_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processDatasource(rs);
|
final List<Oaf> list = app.processDatasource(rs);
|
||||||
|
@ -78,10 +80,27 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
|
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
|
||||||
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
|
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
|
||||||
assertEquals(getValueAsString("issnLinking", fields), ds.getJournal().getIssnLinking());
|
assertEquals(getValueAsString("issnLinking", fields), ds.getJournal().getIssnLinking());
|
||||||
|
|
||||||
|
assertEquals("pubsrepository::journal", ds.getDatasourcetype().getClassid());
|
||||||
|
assertEquals("dnet:datasource_typologies", ds.getDatasourcetype().getSchemeid());
|
||||||
|
|
||||||
|
assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid());
|
||||||
|
assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid());
|
||||||
|
|
||||||
|
assertEquals("National", ds.getJurisdiction().getClassid());
|
||||||
|
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
||||||
|
|
||||||
|
assertTrue(ds.getThematic());
|
||||||
|
assertTrue(ds.getKnowledgegraph());
|
||||||
|
|
||||||
|
assertEquals(1, ds.getContentpolicies().size());
|
||||||
|
assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid());
|
||||||
|
assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testProcessProject() throws Exception {
|
public void testProcessProject() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("projects_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("projects_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processProject(rs);
|
final List<Oaf> list = app.processProject(rs);
|
||||||
|
@ -99,7 +118,7 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testProcessOrganization() throws Exception {
|
public void testProcessOrganization() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("organizations_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("organizations_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processOrganization(rs);
|
final List<Oaf> list = app.processOrganization(rs);
|
||||||
|
@ -119,14 +138,14 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid());
|
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid());
|
||||||
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename());
|
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename());
|
||||||
assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue());
|
assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue());
|
||||||
List<String> alternativenames = getValueAsList("alternativenames", fields);
|
final List<String> alternativenames = getValueAsList("alternativenames", fields);
|
||||||
assertEquals(2, alternativenames.size());
|
assertEquals(2, alternativenames.size());
|
||||||
assertTrue(alternativenames.contains("Pippo"));
|
assertTrue(alternativenames.contains("Pippo"));
|
||||||
assertTrue(alternativenames.contains("Foo"));
|
assertTrue(alternativenames.contains("Foo"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testProcessDatasourceOrganization() throws Exception {
|
public void testProcessDatasourceOrganization() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processDatasourceOrganization(rs);
|
final List<Oaf> list = app.processDatasourceOrganization(rs);
|
||||||
|
@ -143,7 +162,7 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testProcessProjectOrganization() throws Exception {
|
public void testProcessProjectOrganization() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("projectorganization_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("projectorganization_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processProjectOrganization(rs);
|
final List<Oaf> list = app.processProjectOrganization(rs);
|
||||||
|
@ -162,7 +181,7 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testProcessClaims_context() throws Exception {
|
public void testProcessClaims_context() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("claimscontext_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("claimscontext_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processClaims(rs);
|
final List<Oaf> list = app.processClaims(rs);
|
||||||
|
@ -177,7 +196,7 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testProcessClaims_rels() throws Exception {
|
public void testProcessClaims_rels() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("claimsrel_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("claimsrel_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processClaims(rs);
|
final List<Oaf> list = app.processClaims(rs);
|
||||||
|
@ -208,13 +227,15 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
|
|
||||||
assertValidId(r1.getCollectedfrom().get(0).getKey());
|
assertValidId(r1.getCollectedfrom().get(0).getKey());
|
||||||
assertValidId(r2.getCollectedfrom().get(0).getKey());
|
assertValidId(r2.getCollectedfrom().get(0).getKey());
|
||||||
|
|
||||||
|
// System.out.println(new ObjectMapper().writeValueAsString(r1));
|
||||||
|
// System.out.println(new ObjectMapper().writeValueAsString(r2));
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {
|
private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {
|
||||||
final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile));
|
final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile));
|
||||||
final ObjectMapper mapper = new ObjectMapper();
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
final List<TypedField> list = mapper.readValue(json, new TypeReference<List<TypedField>>() {
|
final List<TypedField> list = mapper.readValue(json, new TypeReference<List<TypedField>>() {});
|
||||||
});
|
|
||||||
|
|
||||||
for (final TypedField tf : list) {
|
for (final TypedField tf : list) {
|
||||||
if (tf.getValue() == null) {
|
if (tf.getValue() == null) {
|
||||||
|
@ -270,7 +291,7 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
final String[] values = ((List<?>) tf.getValue())
|
final String[] values = ((List<?>) tf.getValue())
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(Object::toString)
|
.map(o -> o.toString())
|
||||||
.toArray(String[]::new);
|
.toArray(String[]::new);
|
||||||
|
|
||||||
Mockito.when(arr.getArray()).thenReturn(values);
|
Mockito.when(arr.getArray()).thenReturn(values);
|
||||||
|
@ -331,7 +352,6 @@ class MigrateDbEntitiesApplicationTest {
|
||||||
return new Float(getValueAs(name, fields).toString());
|
return new Float(getValueAs(name, fields).toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
private <T> T getValueAs(final String name, final List<TypedField> fields) {
|
private <T> T getValueAs(final String name, final List<TypedField> fields) {
|
||||||
return fields
|
return fields
|
||||||
.stream()
|
.stream()
|
||||||
|
|
|
@ -222,6 +222,11 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"field": "datasourcetypeui",
|
||||||
|
"type": "string",
|
||||||
|
"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"field": "provenanceaction",
|
"field": "provenanceaction",
|
||||||
"type": "not_used",
|
"type": "not_used",
|
||||||
|
@ -241,5 +246,27 @@
|
||||||
"field": "issnLinking",
|
"field": "issnLinking",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": "2579-5447"
|
"value": "2579-5447"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "jurisdiction",
|
||||||
|
"type": "string",
|
||||||
|
"value": "National@@@eosc:jurisdictions"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "thematic",
|
||||||
|
"type": "boolean",
|
||||||
|
"value": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "knowledgegraph",
|
||||||
|
"type": "boolean",
|
||||||
|
"value": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "contentpolicies",
|
||||||
|
"type": "array",
|
||||||
|
"value": [
|
||||||
|
"Journal article@@@eosc:contentpolicies"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
|
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
|
* CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity
|
||||||
* E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join
|
* T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i)
|
||||||
* (R.target = T_i.id) save the tuples (R_i, T_i)
|
|
||||||
*/
|
*/
|
||||||
public class CreateRelatedEntitiesJob_phase1 {
|
public class CreateRelatedEntitiesJob_phase1 {
|
||||||
|
|
||||||
|
@ -42,71 +48,65 @@ public class CreateRelatedEntitiesJob_phase1 {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
Objects
|
PrepareRelationsJob.class
|
||||||
.requireNonNull(
|
.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"));
|
||||||
CreateRelatedEntitiesJob_phase1.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")));
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
final Boolean isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
String inputRelationsPath = parser.get("inputRelationsPath");
|
final String inputRelationsPath = parser.get("inputRelationsPath");
|
||||||
log.info("inputRelationsPath: {}", inputRelationsPath);
|
log.info("inputRelationsPath: {}", inputRelationsPath);
|
||||||
|
|
||||||
String inputEntityPath = parser.get("inputEntityPath");
|
final String inputEntityPath = parser.get("inputEntityPath");
|
||||||
log.info("inputEntityPath: {}", inputEntityPath);
|
log.info("inputEntityPath: {}", inputEntityPath);
|
||||||
|
|
||||||
String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
String graphTableClassName = parser.get("graphTableClassName");
|
final String graphTableClassName = parser.get("graphTableClassName");
|
||||||
log.info("graphTableClassName: {}", graphTableClassName);
|
log.info("graphTableClassName: {}", graphTableClassName);
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
final Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
||||||
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||||
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath);
|
joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <E extends OafEntity> void joinRelationEntity(
|
private static <E extends OafEntity> void joinRelationEntity(
|
||||||
SparkSession spark,
|
final SparkSession spark,
|
||||||
String inputRelationsPath,
|
final String inputRelationsPath,
|
||||||
String inputEntityPath,
|
final String inputEntityPath,
|
||||||
Class<E> clazz,
|
final Class<E> clazz,
|
||||||
String outputPath) {
|
final String outputPath) {
|
||||||
|
|
||||||
Dataset<Tuple2<String, Relation>> relsByTarget = readPathRelation(spark, inputRelationsPath)
|
final Dataset<Tuple2<String, Relation>> relsByTarget = readPathRelation(spark, inputRelationsPath)
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getTarget(),
|
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getTarget(),
|
||||||
r),
|
r),
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class)))
|
Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class)))
|
||||||
.cache();
|
.cache();
|
||||||
|
|
||||||
Dataset<Tuple2<String, RelatedEntity>> entities = readPathEntity(spark, inputEntityPath, clazz)
|
final Dataset<Tuple2<String, RelatedEntity>> entities = readPathEntity(spark, inputEntityPath, clazz)
|
||||||
.filter("dataInfo.invisible == false")
|
.filter("dataInfo.invisible == false")
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<E, Tuple2<String, RelatedEntity>>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)),
|
(MapFunction<E, Tuple2<String, RelatedEntity>>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)),
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class)))
|
Encoders
|
||||||
|
.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class)))
|
||||||
.cache();
|
.cache();
|
||||||
|
|
||||||
relsByTarget
|
relsByTarget
|
||||||
|
@ -121,7 +121,9 @@ public class CreateRelatedEntitiesJob_phase1 {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <E extends OafEntity> Dataset<E> readPathEntity(
|
private static <E extends OafEntity> Dataset<E> readPathEntity(
|
||||||
SparkSession spark, String inputEntityPath, Class<E> entityClazz) {
|
final SparkSession spark,
|
||||||
|
final String inputEntityPath,
|
||||||
|
final Class<E> entityClazz) {
|
||||||
|
|
||||||
log.info("Reading Graph table from: {}", inputEntityPath);
|
log.info("Reading Graph table from: {}", inputEntityPath);
|
||||||
return spark
|
return spark
|
||||||
|
@ -132,7 +134,7 @@ public class CreateRelatedEntitiesJob_phase1 {
|
||||||
Encoders.bean(entityClazz));
|
Encoders.bean(entityClazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <E extends OafEntity> RelatedEntity asRelatedEntity(E entity, Class<E> clazz) {
|
public static <E extends OafEntity> RelatedEntity asRelatedEntity(final E entity, final Class<E> clazz) {
|
||||||
|
|
||||||
final RelatedEntity re = new RelatedEntity();
|
final RelatedEntity re = new RelatedEntity();
|
||||||
re.setId(entity.getId());
|
re.setId(entity.getId());
|
||||||
|
@ -146,14 +148,10 @@ public class CreateRelatedEntitiesJob_phase1 {
|
||||||
case dataset:
|
case dataset:
|
||||||
case otherresearchproduct:
|
case otherresearchproduct:
|
||||||
case software:
|
case software:
|
||||||
Result result = (Result) entity;
|
final Result result = (Result) entity;
|
||||||
|
|
||||||
if (result.getTitle() != null && !result.getTitle().isEmpty()) {
|
if (result.getTitle() != null && !result.getTitle().isEmpty()) {
|
||||||
final StructuredProperty title = result
|
final StructuredProperty title = result.getTitle().stream().findFirst().get();
|
||||||
.getTitle()
|
|
||||||
.stream()
|
|
||||||
.findFirst()
|
|
||||||
.orElseThrow(() -> new IllegalStateException("missing title in " + entity.getId()));
|
|
||||||
title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH));
|
title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH));
|
||||||
re.setTitle(title);
|
re.setTitle(title);
|
||||||
}
|
}
|
||||||
|
@ -177,16 +175,17 @@ public class CreateRelatedEntitiesJob_phase1 {
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case datasource:
|
case datasource:
|
||||||
Datasource d = (Datasource) entity;
|
final Datasource d = (Datasource) entity;
|
||||||
|
|
||||||
re.setOfficialname(getValue(d.getOfficialname()));
|
re.setOfficialname(getValue(d.getOfficialname()));
|
||||||
re.setWebsiteurl(getValue(d.getWebsiteurl()));
|
re.setWebsiteurl(getValue(d.getWebsiteurl()));
|
||||||
re.setDatasourcetype(d.getDatasourcetype());
|
re.setDatasourcetype(d.getDatasourcetype());
|
||||||
|
re.setDatasourcetypeui(d.getDatasourcetypeui());
|
||||||
re.setOpenairecompatibility(d.getOpenairecompatibility());
|
re.setOpenairecompatibility(d.getOpenairecompatibility());
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case organization:
|
case organization:
|
||||||
Organization o = (Organization) entity;
|
final Organization o = (Organization) entity;
|
||||||
|
|
||||||
re.setLegalname(getValue(o.getLegalname()));
|
re.setLegalname(getValue(o.getLegalname()));
|
||||||
re.setLegalshortname(getValue(o.getLegalshortname()));
|
re.setLegalshortname(getValue(o.getLegalshortname()));
|
||||||
|
@ -194,50 +193,50 @@ public class CreateRelatedEntitiesJob_phase1 {
|
||||||
re.setWebsiteurl(getValue(o.getWebsiteurl()));
|
re.setWebsiteurl(getValue(o.getWebsiteurl()));
|
||||||
break;
|
break;
|
||||||
case project:
|
case project:
|
||||||
Project p = (Project) entity;
|
final Project p = (Project) entity;
|
||||||
|
|
||||||
re.setProjectTitle(getValue(p.getTitle()));
|
re.setProjectTitle(getValue(p.getTitle()));
|
||||||
re.setCode(getValue(p.getCode()));
|
re.setCode(getValue(p.getCode()));
|
||||||
re.setAcronym(getValue(p.getAcronym()));
|
re.setAcronym(getValue(p.getAcronym()));
|
||||||
re.setContracttype(p.getContracttype());
|
re.setContracttype(p.getContracttype());
|
||||||
|
|
||||||
List<Field<String>> f = p.getFundingtree();
|
final List<Field<String>> f = p.getFundingtree();
|
||||||
if (!f.isEmpty()) {
|
if (!f.isEmpty()) {
|
||||||
re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList()));
|
re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return re;
|
return re;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getValue(Field<String> field) {
|
private static String getValue(final Field<String> field) {
|
||||||
return getFieldValueWithDefault(field, "");
|
return getFieldValueWithDefault(field, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T> T getFieldValueWithDefault(Field<T> f, T defaultValue) {
|
private static <T> T getFieldValueWithDefault(final Field<T> f, final T defaultValue) {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(f)
|
.ofNullable(f)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(Field::getValue)
|
.map(x -> x.getValue())
|
||||||
.orElse(defaultValue);
|
.orElse(defaultValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text
|
* Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file,
|
||||||
* file
|
|
||||||
*
|
*
|
||||||
* @param spark the SparkSession
|
* @param spark
|
||||||
* @param relationPath the path storing the relation objects
|
* @param relationPath
|
||||||
* @return the Dataset<SortableRelation> containing all the relationships
|
* @return the Dataset<SortableRelation> containing all the relationships
|
||||||
*/
|
*/
|
||||||
private static Dataset<Relation> readPathRelation(
|
private static Dataset<Relation> readPathRelation(
|
||||||
SparkSession spark, final String relationPath) {
|
final SparkSession spark,
|
||||||
|
final String relationPath) {
|
||||||
|
|
||||||
log.info("Reading relations from: {}", relationPath);
|
log.info("Reading relations from: {}", relationPath);
|
||||||
return spark.read().load(relationPath).as(Encoders.bean(Relation.class));
|
return spark.read().load(relationPath).as(Encoders.bean(Relation.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
private static void removeOutputDir(final SparkSession spark, final String path) {
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,63 +39,53 @@ public class XmlConverterJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
|
private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
|
||||||
|
|
||||||
public static final String SCHEMA_LOCATION = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
|
public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
XmlConverterJob.class
|
XmlConverterJob.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json")));
|
||||||
"/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json")));
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
final Boolean isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
String inputPath = parser.get("inputPath");
|
final String inputPath = parser.get("inputPath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
String isLookupUrl = parser.get("isLookupUrl");
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
String otherDsTypeId = parser.get("otherDsTypeId");
|
final SparkConf conf = new SparkConf();
|
||||||
log.info("otherDsTypeId: {}", otherDsTypeId);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||||
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
convertToXml(
|
convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl));
|
||||||
spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void convertToXml(
|
private static void convertToXml(
|
||||||
SparkSession spark,
|
final SparkSession spark,
|
||||||
String inputPath,
|
final String inputPath,
|
||||||
String outputPath,
|
final String outputPath,
|
||||||
ContextMapper contextMapper,
|
final ContextMapper contextMapper) {
|
||||||
String otherDsTypeId) {
|
|
||||||
|
|
||||||
final XmlRecordFactory recordFactory = new XmlRecordFactory(
|
final XmlRecordFactory recordFactory = new XmlRecordFactory(
|
||||||
prepareAccumulators(spark.sparkContext()),
|
prepareAccumulators(spark.sparkContext()),
|
||||||
contextMapper,
|
contextMapper,
|
||||||
false,
|
false,
|
||||||
SCHEMA_LOCATION,
|
schemaLocation);
|
||||||
otherDsTypeId);
|
|
||||||
|
|
||||||
final List<String> paths = HdfsSupport
|
final List<String> paths = HdfsSupport
|
||||||
.listFiles(inputPath, spark.sparkContext().hadoopConfiguration());
|
.listFiles(inputPath, spark.sparkContext().hadoopConfiguration());
|
||||||
|
@ -115,16 +105,15 @@ public class XmlConverterJob {
|
||||||
.mapToPair(
|
.mapToPair(
|
||||||
(PairFunction<Tuple2<String, String>, Text, Text>) t -> new Tuple2<>(new Text(t._1()),
|
(PairFunction<Tuple2<String, String>, Text, Text>) t -> new Tuple2<>(new Text(t._1()),
|
||||||
new Text(t._2())))
|
new Text(t._2())))
|
||||||
.saveAsHadoopFile(
|
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
||||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
private static void removeOutputDir(final SparkSession spark, final String path) {
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sc) {
|
private static Map<String, LongAccumulator> prepareAccumulators(final SparkContext sc) {
|
||||||
Map<String, LongAccumulator> accumulators = Maps.newHashMap();
|
final Map<String, LongAccumulator> accumulators = Maps.newHashMap();
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put(
|
||||||
"resultResult_similarity_isAmongTopNSimilarDocuments",
|
"resultResult_similarity_isAmongTopNSimilarDocuments",
|
||||||
|
@ -135,15 +124,13 @@ public class XmlConverterJob {
|
||||||
sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments"));
|
sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments"));
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put(
|
||||||
"resultResult_supplement_isSupplementTo",
|
"resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo"));
|
||||||
sc.longAccumulator("resultResult_supplement_isSupplementTo"));
|
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put(
|
||||||
"resultResult_supplement_isSupplementedBy",
|
"resultResult_supplement_isSupplementedBy",
|
||||||
sc.longAccumulator("resultResult_supplement_isSupplementedBy"));
|
sc.longAccumulator("resultResult_supplement_isSupplementedBy"));
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn"));
|
||||||
"resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn"));
|
|
||||||
accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges"));
|
accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges"));
|
||||||
|
|
||||||
accumulators
|
accumulators
|
||||||
|
@ -151,16 +138,11 @@ public class XmlConverterJob {
|
||||||
"resultResult_publicationDataset_isRelatedTo",
|
"resultResult_publicationDataset_isRelatedTo",
|
||||||
sc.longAccumulator("resultResult_publicationDataset_isRelatedTo"));
|
sc.longAccumulator("resultResult_publicationDataset_isRelatedTo"));
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo"));
|
||||||
"resultResult_relationship_isRelatedTo",
|
|
||||||
sc.longAccumulator("resultResult_relationship_isRelatedTo"));
|
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy"));
|
||||||
"resultProject_outcome_isProducedBy",
|
|
||||||
sc.longAccumulator("resultProject_outcome_isProducedBy"));
|
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces"));
|
||||||
"resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces"));
|
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put(
|
||||||
"resultOrganization_affiliation_isAuthorInstitutionOf",
|
"resultOrganization_affiliation_isAuthorInstitutionOf",
|
||||||
|
@ -183,9 +165,7 @@ public class XmlConverterJob {
|
||||||
"organizationOrganization_dedup_isMergedIn",
|
"organizationOrganization_dedup_isMergedIn",
|
||||||
sc.longAccumulator("organizationOrganization_dedup_isMergedIn"));
|
sc.longAccumulator("organizationOrganization_dedup_isMergedIn"));
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces"));
|
||||||
"organizationOrganization_dedup_merges",
|
|
||||||
sc.longAccumulator("organizationOrganization_dedup_merges"));
|
|
||||||
accumulators
|
accumulators
|
||||||
.put(
|
.put(
|
||||||
"datasourceOrganization_provision_isProvidedBy",
|
"datasourceOrganization_provision_isProvidedBy",
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -16,11 +16,5 @@
|
||||||
"paramLongName": "isLookupUrl",
|
"paramLongName": "isLookupUrl",
|
||||||
"paramDescription": "URL of the isLookUp Service",
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "odt",
|
|
||||||
"paramLongName": "otherDsTypeId",
|
|
||||||
"paramDescription": "list of datasource types to populate field datasourcetypeui",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -25,10 +25,6 @@
|
||||||
<name>targetMaxRelations</name>
|
<name>targetMaxRelations</name>
|
||||||
<description>maximum number of relations allowed for a each entity grouping by target</description>
|
<description>maximum number of relations allowed for a each entity grouping by target</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
|
||||||
<name>otherDsTypeId</name>
|
|
||||||
<description>mapping used to populate datasourceTypeUi field</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
<property>
|
||||||
<name>format</name>
|
<name>format</name>
|
||||||
<description>metadata format name (DMF|TMF)</description>
|
<description>metadata format name (DMF|TMF)</description>
|
||||||
|
@ -582,7 +578,6 @@
|
||||||
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/xml</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/xml</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
<arg>--otherDsTypeId</arg><arg>${otherDsTypeId}</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="should_index"/>
|
<ok to="should_index"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
import javax.xml.transform.Transformer;
|
import javax.xml.transform.Transformer;
|
||||||
import javax.xml.transform.TransformerException;
|
import javax.xml.transform.TransformerException;
|
||||||
|
@ -33,7 +32,7 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
||||||
*
|
*
|
||||||
* The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities.
|
* The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities.
|
||||||
*/
|
*/
|
||||||
class IndexRecordTransformerTest {
|
public class IndexRecordTransformerTest {
|
||||||
|
|
||||||
public static final String VERSION = "2021-04-15T10:05:53Z";
|
public static final String VERSION = "2021-04-15T10:05:53Z";
|
||||||
public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl";
|
public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl";
|
||||||
|
@ -46,23 +45,23 @@ class IndexRecordTransformerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testPreBuiltRecordTransformation() throws IOException, TransformerException {
|
public void testPreBuiltRecordTransformation() throws IOException, TransformerException {
|
||||||
String record = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("record.xml")));
|
final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
|
||||||
|
|
||||||
testRecordTransformation(record);
|
testRecordTransformation(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testPublicationRecordTransformation() throws IOException, TransformerException {
|
public void testPublicationRecordTransformation() throws IOException, TransformerException {
|
||||||
|
|
||||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||||
XmlRecordFactoryTest.otherDsTypeId);
|
XmlConverterJob.schemaLocation);
|
||||||
|
|
||||||
Publication p = load("publication.json", Publication.class);
|
final Publication p = load("publication.json", Publication.class);
|
||||||
Project pj = load("project.json", Project.class);
|
final Project pj = load("project.json", Project.class);
|
||||||
Relation rel = load("relToValidatedProject.json", Relation.class);
|
final Relation rel = load("relToValidatedProject.json", Relation.class);
|
||||||
|
|
||||||
JoinedEntity<Publication> je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity<>(p);
|
||||||
je
|
je
|
||||||
.setLinks(
|
.setLinks(
|
||||||
Lists
|
Lists
|
||||||
|
@ -70,25 +69,25 @@ class IndexRecordTransformerTest {
|
||||||
new RelatedEntityWrapper(rel,
|
new RelatedEntityWrapper(rel,
|
||||||
CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class))));
|
CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class))));
|
||||||
|
|
||||||
String record = xmlRecordFactory.build(je);
|
final String record = xmlRecordFactory.build(je);
|
||||||
|
|
||||||
assertNotNull(record);
|
assertNotNull(record);
|
||||||
|
|
||||||
testRecordTransformation(record);
|
testRecordTransformation(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testRecordTransformation(String record) throws IOException, TransformerException {
|
private void testRecordTransformation(final String record) throws IOException, TransformerException {
|
||||||
String fields = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("fields.xml")));
|
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
|
||||||
String xslt = IOUtils
|
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
|
||||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")));
|
|
||||||
|
|
||||||
String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
|
final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
|
||||||
|
|
||||||
Transformer tr = SaxonTransformerFactory.newInstance(transformer);
|
final Transformer tr = SaxonTransformerFactory.newInstance(transformer);
|
||||||
|
|
||||||
String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
||||||
|
|
||||||
SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML);
|
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID)
|
||||||
|
.parseDocument(indexRecordXML);
|
||||||
|
|
||||||
final String xmlDoc = ClientUtils.toXML(solrDoc);
|
final String xmlDoc = ClientUtils.toXML(solrDoc);
|
||||||
|
|
||||||
|
@ -96,9 +95,9 @@ class IndexRecordTransformerTest {
|
||||||
System.out.println(xmlDoc);
|
System.out.println(xmlDoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> T load(String fileName, Class<T> clazz) throws IOException {
|
private <T> T load(final String fileName, final Class<T> clazz) throws IOException {
|
||||||
return XmlRecordFactoryTest.OBJECT_MAPPER
|
return XmlRecordFactoryTest.OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(fileName))), clazz);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
@ -13,7 +14,6 @@ import org.dom4j.DocumentException;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
@ -24,34 +24,31 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
||||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
class XmlRecordFactoryTest {
|
public class XmlRecordFactoryTest {
|
||||||
|
|
||||||
public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource";
|
|
||||||
|
|
||||||
public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testXMLRecordFactory() throws IOException, DocumentException {
|
public void testXMLRecordFactory() throws IOException, DocumentException {
|
||||||
|
|
||||||
ContextMapper contextMapper = new ContextMapper();
|
final ContextMapper contextMapper = new ContextMapper();
|
||||||
|
|
||||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||||
otherDsTypeId);
|
XmlConverterJob.schemaLocation);
|
||||||
|
|
||||||
Publication p = OBJECT_MAPPER
|
final Publication p = OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||||
|
|
||||||
String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
Document doc = new SAXReader().read(new StringReader(xml));
|
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||||
|
|
||||||
assertNotNull(doc);
|
assertNotNull(doc);
|
||||||
|
|
||||||
|
@ -72,93 +69,64 @@ class XmlRecordFactoryTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException {
|
public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException {
|
||||||
|
|
||||||
ContextMapper contextMapper = new ContextMapper();
|
final ContextMapper contextMapper = new ContextMapper();
|
||||||
|
|
||||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||||
otherDsTypeId);
|
XmlConverterJob.schemaLocation);
|
||||||
|
|
||||||
Publication p = OBJECT_MAPPER
|
final Publication p = OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||||
Project pj = OBJECT_MAPPER
|
final Project pj = OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
|
||||||
Relation rel = OBJECT_MAPPER
|
final Relation rel = OBJECT_MAPPER
|
||||||
.readValue(
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class);
|
||||||
(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class);
|
final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
||||||
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||||
List<RelatedEntityWrapper> links = Lists.newArrayList();
|
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||||
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
|
||||||
links.add(rew);
|
links.add(rew);
|
||||||
JoinedEntity je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity<>(p);
|
||||||
je.setLinks(links);
|
je.setLinks(links);
|
||||||
|
|
||||||
String xml = xmlRecordFactory.build(je);
|
final String xml = xmlRecordFactory.build(je);
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
Document doc = new SAXReader().read(new StringReader(xml));
|
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||||
assertNotNull(doc);
|
assertNotNull(doc);
|
||||||
System.out.println(doc.asXML());
|
System.out.println(doc.asXML());
|
||||||
Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date"));
|
Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException {
|
public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException {
|
||||||
|
|
||||||
ContextMapper contextMapper = new ContextMapper();
|
final ContextMapper contextMapper = new ContextMapper();
|
||||||
|
|
||||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||||
otherDsTypeId);
|
XmlConverterJob.schemaLocation);
|
||||||
|
|
||||||
Publication p = OBJECT_MAPPER
|
final Publication p = OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||||
Project pj = OBJECT_MAPPER
|
final Project pj = OBJECT_MAPPER
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
|
||||||
Relation rel = OBJECT_MAPPER
|
final Relation rel = OBJECT_MAPPER
|
||||||
.readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class);
|
.readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class);
|
||||||
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
||||||
List<RelatedEntityWrapper> links = Lists.newArrayList();
|
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||||
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||||
links.add(rew);
|
links.add(rew);
|
||||||
JoinedEntity je = new JoinedEntity<>(p);
|
final JoinedEntity je = new JoinedEntity<>(p);
|
||||||
je.setLinks(links);
|
je.setLinks(links);
|
||||||
|
|
||||||
String xml = xmlRecordFactory.build(je);
|
final String xml = xmlRecordFactory.build(je);
|
||||||
|
|
||||||
assertNotNull(xml);
|
assertNotNull(xml);
|
||||||
|
|
||||||
Document doc = new SAXReader().read(new StringReader(xml));
|
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||||
assertNotNull(doc);
|
assertNotNull(doc);
|
||||||
System.out.println(doc.asXML());
|
System.out.println(doc.asXML());
|
||||||
assertEquals("", doc.valueOf("//rel/validated"));
|
assertEquals("", doc.valueOf("//rel/validated"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
void testEnermapsRecord() throws IOException, DocumentException, SAXException {
|
|
||||||
|
|
||||||
String contextmap = "<entries><entry id=\"enermaps\" label=\"Energy Research\" name=\"context\" type=\"community\"/>"
|
|
||||||
+
|
|
||||||
"<entry id=\"enermaps::selection\" label=\"Featured dataset\" name=\"category\"/>" +
|
|
||||||
"<entry id=\"enermaps::selection::tgs00004\" label=\"Dataset title\" name=\"concept\"/>" +
|
|
||||||
"</entries>";
|
|
||||||
|
|
||||||
ContextMapper contextMapper = ContextMapper.fromXml(contextmap);
|
|
||||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
|
||||||
otherDsTypeId);
|
|
||||||
|
|
||||||
Dataset d = OBJECT_MAPPER
|
|
||||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class);
|
|
||||||
|
|
||||||
JoinedEntity je = new JoinedEntity<>(d);
|
|
||||||
|
|
||||||
String xml = xmlRecordFactory.build(je);
|
|
||||||
|
|
||||||
assertNotNull(xml);
|
|
||||||
|
|
||||||
Document doc = new SAXReader().read(new StringReader(xml));
|
|
||||||
assertNotNull(doc);
|
|
||||||
System.out.println(doc.asXML());
|
|
||||||
assertEquals("enermaps::selection::tgs00004", doc.valueOf("//concept/@id"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
17
pom.xml
17
pom.xml
|
@ -205,11 +205,6 @@
|
||||||
<artifactId>dateparser</artifactId>
|
<artifactId>dateparser</artifactId>
|
||||||
<version>1.0.7</version>
|
<version>1.0.7</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>me.xuender</groupId>
|
|
||||||
<artifactId>unidecode</artifactId>
|
|
||||||
<version>0.0.7</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
|
@ -519,16 +514,6 @@
|
||||||
<version>${common.text.version}</version>
|
<version>${common.text.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.opencsv</groupId>
|
|
||||||
<artifactId>opencsv</artifactId>
|
|
||||||
<version>5.5</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.github.classgraph</groupId>
|
|
||||||
<artifactId>classgraph</artifactId>
|
|
||||||
<version>4.8.71</version>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
|
@ -751,7 +736,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[2.7.15]</dhp-schemas.version>
|
<dhp-schemas.version>[2.7.15-SNAPSHOT]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue