forked from D-Net/dnet-hadoop
Merge pull request 'towards EOSC datasource profiles' (#130) from datasource_model_eosc_beta into beta
Reviewed-on: D-Net/dnet-hadoop#130
This commit is contained in:
commit
3bcac7e88c
|
@ -1,17 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.actionmanager.project;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.*;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
|
@ -19,7 +9,20 @@ import java.io.IOException;
|
|||
import java.io.InputStreamReader;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||
|
||||
public class DownloadCsvTest {
|
||||
|
||||
|
@ -103,8 +106,8 @@ public class DownloadCsvTest {
|
|||
GetCSV
|
||||
.getCsv(
|
||||
fs,
|
||||
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
|
||||
, workingDir + "/projects",
|
||||
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||
workingDir + "/projects",
|
||||
CSVProject.class.getName(), ';');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
|
||||
|
|
|
@ -33,7 +33,7 @@ public class Process implements Serializable {
|
|||
ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
|
||||
}
|
||||
ri.setId(Utils.getContextId(ci.getId()));
|
||||
ri.setOriginalId(ci.getId());
|
||||
ri.setAcronym(ci.getId());
|
||||
|
||||
ri.setDescription(ci.getDescription());
|
||||
ri.setName(ci.getName());
|
||||
|
|
|
@ -1,15 +1,51 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_MERGED_IN;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.MERGES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.sql.Array;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
@ -50,8 +86,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class);
|
||||
|
||||
private static final DataInfo DATA_INFO_CLAIM = dataInfo(
|
||||
false, null, false, false,
|
||||
qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9");
|
||||
false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS),
|
||||
"0.9");
|
||||
|
||||
private static final List<KeyValue> COLLECTED_FROM_CLAIM = listKeyValues(
|
||||
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
||||
|
@ -69,10 +105,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
MigrateDbEntitiesApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))));
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")));
|
||||
|
||||
parser.parseArgument(args);
|
||||
|
||||
|
@ -86,7 +120,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
log.info("postgresPassword: xxx");
|
||||
|
||||
final String dbSchema = parser.get("dbschema");
|
||||
log.info("dbSchema {}: ", dbSchema);
|
||||
log.info("dbSchema {}: " + dbSchema);
|
||||
|
||||
final String isLookupUrl = parser.get("isLookupUrl");
|
||||
log.info("isLookupUrl: {}", isLookupUrl);
|
||||
|
@ -139,8 +173,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
case openorgs_dedup: // generates organization entities and relations for openorgs dedup
|
||||
log.info("Processing Openorgs...");
|
||||
smdbe
|
||||
.execute(
|
||||
"queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||
.execute("queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||
|
||||
log.info("Processing Openorgs Sim Rels...");
|
||||
smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels);
|
||||
|
@ -149,8 +182,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
case openorgs: // generates organization entities and relations for provision
|
||||
log.info("Processing Openorgs For Provision...");
|
||||
smdbe
|
||||
.execute(
|
||||
"queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||
.execute("queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||
|
||||
log.info("Processing Openorgs Merge Rels...");
|
||||
smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels);
|
||||
|
@ -228,6 +260,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB
|
||||
ds.setOaiprovenance(null); // Values not present in the DB
|
||||
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
||||
ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui")));
|
||||
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
||||
ds.setOfficialname(field(rs.getString("officialname"), info));
|
||||
ds.setEnglishname(field(rs.getString("englishname"), info));
|
||||
|
@ -269,6 +302,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
ds.setDataInfo(info);
|
||||
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
||||
ds.setThematic(rs.getBoolean("thematic"));
|
||||
ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
|
||||
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
||||
|
||||
return Arrays.asList(ds);
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
|
@ -494,8 +532,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
throw new IllegalStateException(
|
||||
String
|
||||
.format(
|
||||
"invalid claim, sourceId: %s, targetId: %s, semantics: %s",
|
||||
sourceId, targetId, semantics));
|
||||
"invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId,
|
||||
semantics));
|
||||
}
|
||||
r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES);
|
||||
r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY);
|
||||
|
@ -515,8 +553,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
}
|
||||
}
|
||||
|
||||
private Relation prepareRelation(String sourceId, String targetId, String validationDate) {
|
||||
Relation r = new Relation();
|
||||
private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) {
|
||||
final Relation r = new Relation();
|
||||
if (StringUtils.isNotBlank(validationDate)) {
|
||||
r.setValidated(true);
|
||||
r.setValidationDate(validationDate);
|
||||
|
@ -529,7 +567,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
return r;
|
||||
}
|
||||
|
||||
private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) {
|
||||
private Relation setRelationSemantic(final Relation r, final String relType, final String subRelType,
|
||||
final String relClass) {
|
||||
r.setRelType(relType);
|
||||
r.setSubRelType(subRelType);
|
||||
r.setRelClass(relClass);
|
||||
|
@ -602,6 +641,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
return res;
|
||||
}
|
||||
|
||||
private List<Qualifier> prepareListOfQualifiers(final Array array) throws SQLException {
|
||||
final List<Qualifier> res = new ArrayList<>();
|
||||
if (array != null) {
|
||||
for (final String s : (String[]) array.getArray()) {
|
||||
final Qualifier q = prepareQualifierSplitting(s);
|
||||
if (q != null) {
|
||||
res.add(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public List<Oaf> processOrgOrgMergeRels(final ResultSet rs) {
|
||||
try {
|
||||
final DataInfo info = prepareDataInfo(rs); // TODO
|
||||
|
@ -658,6 +710,18 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
r1.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
// removed because there's no difference between two sides //TODO
|
||||
// final Relation r2 = new Relation();
|
||||
// r2.setRelType(ORG_ORG_RELTYPE);
|
||||
// r2.setSubRelType(ORG_ORG_SUBRELTYPE);
|
||||
// r2.setRelClass(relClass);
|
||||
// r2.setSource(orgId2);
|
||||
// r2.setTarget(orgId1);
|
||||
// r2.setCollectedfrom(collectedFrom);
|
||||
// r2.setDataInfo(info);
|
||||
// r2.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
// return Arrays.asList(r1, r2);
|
||||
|
||||
return Arrays.asList(r1);
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
|
|
|
@ -165,7 +165,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
return Arrays.asList(instance);
|
||||
}
|
||||
|
||||
protected String trimAndDecodeUrl(String url){
|
||||
protected String trimAndDecodeUrl(String url) {
|
||||
try {
|
||||
return URLDecoder.decode(url.trim(), "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
|
|
|
@ -84,13 +84,18 @@ SELECT
|
|||
dc.id AS collectedfromid,
|
||||
dc.officialname AS collectedfromname,
|
||||
d.typology||'@@@dnet:datasource_typologies' AS datasourcetype,
|
||||
d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui,
|
||||
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction,
|
||||
d.issn AS issnPrinted,
|
||||
d.eissn AS issnOnline,
|
||||
d.lissn AS issnLinking
|
||||
d.lissn AS issnLinking,
|
||||
de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
||||
de.thematic AS thematic,
|
||||
de.knowledge_graph AS knowledgegraph,
|
||||
array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies
|
||||
|
||||
FROM dsm_datasources d
|
||||
|
||||
LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id)
|
||||
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id)
|
||||
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource)
|
||||
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource)
|
||||
|
@ -126,4 +131,8 @@ GROUP BY
|
|||
dc.officialname,
|
||||
d.issn,
|
||||
d.eissn,
|
||||
d.lissn
|
||||
d.lissn,
|
||||
de.jurisdiction,
|
||||
de.thematic,
|
||||
de.knowledge_graph,
|
||||
de.content_policies
|
||||
|
|
|
@ -11,7 +11,6 @@ import java.util.List;
|
|||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.DocumentException;
|
||||
|
@ -21,6 +20,7 @@ import org.junit.jupiter.api.extension.ExtendWith;
|
|||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
|
@ -760,7 +760,6 @@ class MappersTest {
|
|||
assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testXMLEncodedURL() throws IOException, DocumentException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml")));
|
||||
|
@ -779,7 +778,8 @@ class MappersTest {
|
|||
|
||||
@Test
|
||||
void testXMLEncodedURL_ODF() throws IOException, DocumentException {
|
||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml")));
|
||||
final String xml = IOUtils
|
||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml")));
|
||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||
|
||||
System.out.println("***************");
|
||||
|
@ -788,7 +788,7 @@ class MappersTest {
|
|||
|
||||
final Dataset p = (Dataset) list.get(0);
|
||||
assertTrue(p.getInstance().size() > 0);
|
||||
for(String url : p.getInstance().get(0).getUrl()){
|
||||
for (String url : p.getInstance().get(0).getUrl()) {
|
||||
System.out.println(url);
|
||||
assertTrue(!url.contains("&"));
|
||||
}
|
||||
|
|
|
@ -28,11 +28,16 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class MigrateDbEntitiesApplicationTest {
|
||||
public class MigrateDbEntitiesApplicationTest {
|
||||
|
||||
private MigrateDbEntitiesApplication app;
|
||||
|
||||
|
@ -46,11 +51,8 @@ class MigrateDbEntitiesApplicationTest {
|
|||
public void setUp() {
|
||||
lenient()
|
||||
.when(vocs.getTermAsQualifier(anyString(), anyString()))
|
||||
.thenAnswer(
|
||||
invocation -> OafMapperUtils
|
||||
.qualifier(
|
||||
invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
|
||||
invocation.getArgument(0)));
|
||||
.thenAnswer(invocation -> OafMapperUtils
|
||||
.qualifier(invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(0)));
|
||||
|
||||
lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true);
|
||||
|
||||
|
@ -58,7 +60,7 @@ class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testProcessDatasource() throws Exception {
|
||||
public void testProcessDatasource() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("datasources_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processDatasource(rs);
|
||||
|
@ -78,10 +80,27 @@ class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted());
|
||||
assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline());
|
||||
assertEquals(getValueAsString("issnLinking", fields), ds.getJournal().getIssnLinking());
|
||||
|
||||
assertEquals("pubsrepository::journal", ds.getDatasourcetype().getClassid());
|
||||
assertEquals("dnet:datasource_typologies", ds.getDatasourcetype().getSchemeid());
|
||||
|
||||
assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid());
|
||||
assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid());
|
||||
|
||||
assertEquals("National", ds.getJurisdiction().getClassid());
|
||||
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
||||
|
||||
assertTrue(ds.getThematic());
|
||||
assertTrue(ds.getKnowledgegraph());
|
||||
|
||||
assertEquals(1, ds.getContentpolicies().size());
|
||||
assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid());
|
||||
assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testProcessProject() throws Exception {
|
||||
public void testProcessProject() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("projects_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processProject(rs);
|
||||
|
@ -99,7 +118,7 @@ class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testProcessOrganization() throws Exception {
|
||||
public void testProcessOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("organizations_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processOrganization(rs);
|
||||
|
@ -119,14 +138,14 @@ class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid());
|
||||
assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename());
|
||||
assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue());
|
||||
List<String> alternativenames = getValueAsList("alternativenames", fields);
|
||||
final List<String> alternativenames = getValueAsList("alternativenames", fields);
|
||||
assertEquals(2, alternativenames.size());
|
||||
assertTrue(alternativenames.contains("Pippo"));
|
||||
assertTrue(alternativenames.contains("Foo"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testProcessDatasourceOrganization() throws Exception {
|
||||
public void testProcessDatasourceOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processDatasourceOrganization(rs);
|
||||
|
@ -143,7 +162,7 @@ class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testProcessProjectOrganization() throws Exception {
|
||||
public void testProcessProjectOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("projectorganization_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processProjectOrganization(rs);
|
||||
|
@ -162,7 +181,7 @@ class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testProcessClaims_context() throws Exception {
|
||||
public void testProcessClaims_context() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("claimscontext_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processClaims(rs);
|
||||
|
@ -177,7 +196,7 @@ class MigrateDbEntitiesApplicationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testProcessClaims_rels() throws Exception {
|
||||
public void testProcessClaims_rels() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("claimsrel_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processClaims(rs);
|
||||
|
@ -208,13 +227,15 @@ class MigrateDbEntitiesApplicationTest {
|
|||
|
||||
assertValidId(r1.getCollectedfrom().get(0).getKey());
|
||||
assertValidId(r2.getCollectedfrom().get(0).getKey());
|
||||
|
||||
// System.out.println(new ObjectMapper().writeValueAsString(r1));
|
||||
// System.out.println(new ObjectMapper().writeValueAsString(r2));
|
||||
}
|
||||
|
||||
private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {
|
||||
final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile));
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
final List<TypedField> list = mapper.readValue(json, new TypeReference<List<TypedField>>() {
|
||||
});
|
||||
final List<TypedField> list = mapper.readValue(json, new TypeReference<List<TypedField>>() {});
|
||||
|
||||
for (final TypedField tf : list) {
|
||||
if (tf.getValue() == null) {
|
||||
|
@ -270,7 +291,7 @@ class MigrateDbEntitiesApplicationTest {
|
|||
final String[] values = ((List<?>) tf.getValue())
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(Object::toString)
|
||||
.map(o -> o.toString())
|
||||
.toArray(String[]::new);
|
||||
|
||||
Mockito.when(arr.getArray()).thenReturn(values);
|
||||
|
@ -331,7 +352,6 @@ class MigrateDbEntitiesApplicationTest {
|
|||
return new Float(getValueAs(name, fields).toString());
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private <T> T getValueAs(final String name, final List<TypedField> fields) {
|
||||
return fields
|
||||
.stream()
|
||||
|
|
|
@ -222,6 +222,11 @@
|
|||
"type": "string",
|
||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies"
|
||||
},
|
||||
{
|
||||
"field": "datasourcetypeui",
|
||||
"type": "string",
|
||||
"value": "pubsrepository::journal@@@dnet:datasource_typologies_ui"
|
||||
},
|
||||
{
|
||||
"field": "provenanceaction",
|
||||
"type": "not_used",
|
||||
|
@ -241,5 +246,27 @@
|
|||
"field": "issnLinking",
|
||||
"type": "string",
|
||||
"value": "2579-5447"
|
||||
},
|
||||
{
|
||||
"field": "jurisdiction",
|
||||
"type": "string",
|
||||
"value": "National@@@eosc:jurisdictions"
|
||||
},
|
||||
{
|
||||
"field": "thematic",
|
||||
"type": "boolean",
|
||||
"value": true
|
||||
},
|
||||
{
|
||||
"field": "knowledgegraph",
|
||||
"type": "boolean",
|
||||
"value": true
|
||||
},
|
||||
{
|
||||
"field": "contentpolicies",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"Journal article@@@eosc:contentpolicies"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
|
|||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
||||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
|
||||
import scala.Tuple2;
|
||||
|
||||
/**
|
||||
* CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
|
||||
* E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join
|
||||
* (R.target = T_i.id) save the tuples (R_i, T_i)
|
||||
* CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity
|
||||
* T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i)
|
||||
*/
|
||||
public class CreateRelatedEntitiesJob_phase1 {
|
||||
|
||||
|
@ -42,71 +48,65 @@ public class CreateRelatedEntitiesJob_phase1 {
|
|||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
public static void main(final String[] args) throws Exception {
|
||||
|
||||
String jsonConfiguration = IOUtils
|
||||
final String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
Objects
|
||||
.requireNonNull(
|
||||
CreateRelatedEntitiesJob_phase1.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")));
|
||||
PrepareRelationsJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"));
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
final Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String inputRelationsPath = parser.get("inputRelationsPath");
|
||||
final String inputRelationsPath = parser.get("inputRelationsPath");
|
||||
log.info("inputRelationsPath: {}", inputRelationsPath);
|
||||
|
||||
String inputEntityPath = parser.get("inputEntityPath");
|
||||
final String inputEntityPath = parser.get("inputEntityPath");
|
||||
log.info("inputEntityPath: {}", inputEntityPath);
|
||||
|
||||
String outputPath = parser.get("outputPath");
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
String graphTableClassName = parser.get("graphTableClassName");
|
||||
final String graphTableClassName = parser.get("graphTableClassName");
|
||||
log.info("graphTableClassName: {}", graphTableClassName);
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
||||
final Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
final SparkConf conf = new SparkConf();
|
||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath);
|
||||
});
|
||||
}
|
||||
|
||||
private static <E extends OafEntity> void joinRelationEntity(
|
||||
SparkSession spark,
|
||||
String inputRelationsPath,
|
||||
String inputEntityPath,
|
||||
Class<E> clazz,
|
||||
String outputPath) {
|
||||
final SparkSession spark,
|
||||
final String inputRelationsPath,
|
||||
final String inputEntityPath,
|
||||
final Class<E> clazz,
|
||||
final String outputPath) {
|
||||
|
||||
Dataset<Tuple2<String, Relation>> relsByTarget = readPathRelation(spark, inputRelationsPath)
|
||||
final Dataset<Tuple2<String, Relation>> relsByTarget = readPathRelation(spark, inputRelationsPath)
|
||||
.map(
|
||||
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getTarget(),
|
||||
r),
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class)))
|
||||
.cache();
|
||||
|
||||
Dataset<Tuple2<String, RelatedEntity>> entities = readPathEntity(spark, inputEntityPath, clazz)
|
||||
final Dataset<Tuple2<String, RelatedEntity>> entities = readPathEntity(spark, inputEntityPath, clazz)
|
||||
.filter("dataInfo.invisible == false")
|
||||
.map(
|
||||
(MapFunction<E, Tuple2<String, RelatedEntity>>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)),
|
||||
Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class)))
|
||||
Encoders
|
||||
.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class)))
|
||||
.cache();
|
||||
|
||||
relsByTarget
|
||||
|
@ -121,7 +121,9 @@ public class CreateRelatedEntitiesJob_phase1 {
|
|||
}
|
||||
|
||||
private static <E extends OafEntity> Dataset<E> readPathEntity(
|
||||
SparkSession spark, String inputEntityPath, Class<E> entityClazz) {
|
||||
final SparkSession spark,
|
||||
final String inputEntityPath,
|
||||
final Class<E> entityClazz) {
|
||||
|
||||
log.info("Reading Graph table from: {}", inputEntityPath);
|
||||
return spark
|
||||
|
@ -132,7 +134,7 @@ public class CreateRelatedEntitiesJob_phase1 {
|
|||
Encoders.bean(entityClazz));
|
||||
}
|
||||
|
||||
public static <E extends OafEntity> RelatedEntity asRelatedEntity(E entity, Class<E> clazz) {
|
||||
public static <E extends OafEntity> RelatedEntity asRelatedEntity(final E entity, final Class<E> clazz) {
|
||||
|
||||
final RelatedEntity re = new RelatedEntity();
|
||||
re.setId(entity.getId());
|
||||
|
@ -146,14 +148,10 @@ public class CreateRelatedEntitiesJob_phase1 {
|
|||
case dataset:
|
||||
case otherresearchproduct:
|
||||
case software:
|
||||
Result result = (Result) entity;
|
||||
final Result result = (Result) entity;
|
||||
|
||||
if (result.getTitle() != null && !result.getTitle().isEmpty()) {
|
||||
final StructuredProperty title = result
|
||||
.getTitle()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new IllegalStateException("missing title in " + entity.getId()));
|
||||
final StructuredProperty title = result.getTitle().stream().findFirst().get();
|
||||
title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH));
|
||||
re.setTitle(title);
|
||||
}
|
||||
|
@ -177,16 +175,17 @@ public class CreateRelatedEntitiesJob_phase1 {
|
|||
|
||||
break;
|
||||
case datasource:
|
||||
Datasource d = (Datasource) entity;
|
||||
final Datasource d = (Datasource) entity;
|
||||
|
||||
re.setOfficialname(getValue(d.getOfficialname()));
|
||||
re.setWebsiteurl(getValue(d.getWebsiteurl()));
|
||||
re.setDatasourcetype(d.getDatasourcetype());
|
||||
re.setDatasourcetypeui(d.getDatasourcetypeui());
|
||||
re.setOpenairecompatibility(d.getOpenairecompatibility());
|
||||
|
||||
break;
|
||||
case organization:
|
||||
Organization o = (Organization) entity;
|
||||
final Organization o = (Organization) entity;
|
||||
|
||||
re.setLegalname(getValue(o.getLegalname()));
|
||||
re.setLegalshortname(getValue(o.getLegalshortname()));
|
||||
|
@ -194,50 +193,50 @@ public class CreateRelatedEntitiesJob_phase1 {
|
|||
re.setWebsiteurl(getValue(o.getWebsiteurl()));
|
||||
break;
|
||||
case project:
|
||||
Project p = (Project) entity;
|
||||
final Project p = (Project) entity;
|
||||
|
||||
re.setProjectTitle(getValue(p.getTitle()));
|
||||
re.setCode(getValue(p.getCode()));
|
||||
re.setAcronym(getValue(p.getAcronym()));
|
||||
re.setContracttype(p.getContracttype());
|
||||
|
||||
List<Field<String>> f = p.getFundingtree();
|
||||
final List<Field<String>> f = p.getFundingtree();
|
||||
if (!f.isEmpty()) {
|
||||
re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList()));
|
||||
re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
return re;
|
||||
}
|
||||
|
||||
private static String getValue(Field<String> field) {
|
||||
private static String getValue(final Field<String> field) {
|
||||
return getFieldValueWithDefault(field, "");
|
||||
}
|
||||
|
||||
private static <T> T getFieldValueWithDefault(Field<T> f, T defaultValue) {
|
||||
private static <T> T getFieldValueWithDefault(final Field<T> f, final T defaultValue) {
|
||||
return Optional
|
||||
.ofNullable(f)
|
||||
.filter(Objects::nonNull)
|
||||
.map(Field::getValue)
|
||||
.map(x -> x.getValue())
|
||||
.orElse(defaultValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text
|
||||
* file
|
||||
* Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file,
|
||||
*
|
||||
* @param spark the SparkSession
|
||||
* @param relationPath the path storing the relation objects
|
||||
* @param spark
|
||||
* @param relationPath
|
||||
* @return the Dataset<SortableRelation> containing all the relationships
|
||||
*/
|
||||
private static Dataset<Relation> readPathRelation(
|
||||
SparkSession spark, final String relationPath) {
|
||||
final SparkSession spark,
|
||||
final String relationPath) {
|
||||
|
||||
log.info("Reading relations from: {}", relationPath);
|
||||
return spark.read().load(relationPath).as(Encoders.bean(Relation.class));
|
||||
}
|
||||
|
||||
private static void removeOutputDir(SparkSession spark, String path) {
|
||||
private static void removeOutputDir(final SparkSession spark, final String path) {
|
||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,63 +39,53 @@ public class XmlConverterJob {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
|
||||
|
||||
public static final String SCHEMA_LOCATION = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
|
||||
public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
public static void main(final String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
XmlConverterJob.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json")));
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
final Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String inputPath = parser.get("inputPath");
|
||||
final String inputPath = parser.get("inputPath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
String outputPath = parser.get("outputPath");
|
||||
final String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
String isLookupUrl = parser.get("isLookupUrl");
|
||||
final String isLookupUrl = parser.get("isLookupUrl");
|
||||
log.info("isLookupUrl: {}", isLookupUrl);
|
||||
|
||||
String otherDsTypeId = parser.get("otherDsTypeId");
|
||||
log.info("otherDsTypeId: {}", otherDsTypeId);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
final SparkConf conf = new SparkConf();
|
||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
|
||||
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
convertToXml(
|
||||
spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId);
|
||||
convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl));
|
||||
});
|
||||
}
|
||||
|
||||
private static void convertToXml(
|
||||
SparkSession spark,
|
||||
String inputPath,
|
||||
String outputPath,
|
||||
ContextMapper contextMapper,
|
||||
String otherDsTypeId) {
|
||||
final SparkSession spark,
|
||||
final String inputPath,
|
||||
final String outputPath,
|
||||
final ContextMapper contextMapper) {
|
||||
|
||||
final XmlRecordFactory recordFactory = new XmlRecordFactory(
|
||||
prepareAccumulators(spark.sparkContext()),
|
||||
contextMapper,
|
||||
false,
|
||||
SCHEMA_LOCATION,
|
||||
otherDsTypeId);
|
||||
schemaLocation);
|
||||
|
||||
final List<String> paths = HdfsSupport
|
||||
.listFiles(inputPath, spark.sparkContext().hadoopConfiguration());
|
||||
|
@ -115,16 +105,15 @@ public class XmlConverterJob {
|
|||
.mapToPair(
|
||||
(PairFunction<Tuple2<String, String>, Text, Text>) t -> new Tuple2<>(new Text(t._1()),
|
||||
new Text(t._2())))
|
||||
.saveAsHadoopFile(
|
||||
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
||||
}
|
||||
|
||||
private static void removeOutputDir(SparkSession spark, String path) {
|
||||
private static void removeOutputDir(final SparkSession spark, final String path) {
|
||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||
}
|
||||
|
||||
private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sc) {
|
||||
Map<String, LongAccumulator> accumulators = Maps.newHashMap();
|
||||
private static Map<String, LongAccumulator> prepareAccumulators(final SparkContext sc) {
|
||||
final Map<String, LongAccumulator> accumulators = Maps.newHashMap();
|
||||
accumulators
|
||||
.put(
|
||||
"resultResult_similarity_isAmongTopNSimilarDocuments",
|
||||
|
@ -135,15 +124,13 @@ public class XmlConverterJob {
|
|||
sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments"));
|
||||
accumulators
|
||||
.put(
|
||||
"resultResult_supplement_isSupplementTo",
|
||||
sc.longAccumulator("resultResult_supplement_isSupplementTo"));
|
||||
"resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo"));
|
||||
accumulators
|
||||
.put(
|
||||
"resultResult_supplement_isSupplementedBy",
|
||||
sc.longAccumulator("resultResult_supplement_isSupplementedBy"));
|
||||
accumulators
|
||||
.put(
|
||||
"resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn"));
|
||||
.put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn"));
|
||||
accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges"));
|
||||
|
||||
accumulators
|
||||
|
@ -151,16 +138,11 @@ public class XmlConverterJob {
|
|||
"resultResult_publicationDataset_isRelatedTo",
|
||||
sc.longAccumulator("resultResult_publicationDataset_isRelatedTo"));
|
||||
accumulators
|
||||
.put(
|
||||
"resultResult_relationship_isRelatedTo",
|
||||
sc.longAccumulator("resultResult_relationship_isRelatedTo"));
|
||||
.put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo"));
|
||||
accumulators
|
||||
.put(
|
||||
"resultProject_outcome_isProducedBy",
|
||||
sc.longAccumulator("resultProject_outcome_isProducedBy"));
|
||||
.put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy"));
|
||||
accumulators
|
||||
.put(
|
||||
"resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces"));
|
||||
.put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces"));
|
||||
accumulators
|
||||
.put(
|
||||
"resultOrganization_affiliation_isAuthorInstitutionOf",
|
||||
|
@ -183,9 +165,7 @@ public class XmlConverterJob {
|
|||
"organizationOrganization_dedup_isMergedIn",
|
||||
sc.longAccumulator("organizationOrganization_dedup_isMergedIn"));
|
||||
accumulators
|
||||
.put(
|
||||
"organizationOrganization_dedup_merges",
|
||||
sc.longAccumulator("organizationOrganization_dedup_merges"));
|
||||
.put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces"));
|
||||
accumulators
|
||||
.put(
|
||||
"datasourceOrganization_provision_isProvidedBy",
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -16,11 +16,5 @@
|
|||
"paramLongName": "isLookupUrl",
|
||||
"paramDescription": "URL of the isLookUp Service",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "odt",
|
||||
"paramLongName": "otherDsTypeId",
|
||||
"paramDescription": "list of datasource types to populate field datasourcetypeui",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
||||
|
|
|
@ -25,10 +25,6 @@
|
|||
<name>targetMaxRelations</name>
|
||||
<description>maximum number of relations allowed for a each entity grouping by target</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>otherDsTypeId</name>
|
||||
<description>mapping used to populate datasourceTypeUi field</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>format</name>
|
||||
<description>metadata format name (DMF|TMF)</description>
|
||||
|
@ -582,7 +578,6 @@
|
|||
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
|
||||
<arg>--outputPath</arg><arg>${workingDir}/xml</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--otherDsTypeId</arg><arg>${otherDsTypeId}</arg>
|
||||
</spark>
|
||||
<ok to="should_index"/>
|
||||
<error to="Kill"/>
|
||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision;
|
|||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
|
@ -33,7 +32,7 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
|||
*
|
||||
* The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities.
|
||||
*/
|
||||
class IndexRecordTransformerTest {
|
||||
public class IndexRecordTransformerTest {
|
||||
|
||||
public static final String VERSION = "2021-04-15T10:05:53Z";
|
||||
public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl";
|
||||
|
@ -46,23 +45,23 @@ class IndexRecordTransformerTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testPreBuiltRecordTransformation() throws IOException, TransformerException {
|
||||
String record = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("record.xml")));
|
||||
public void testPreBuiltRecordTransformation() throws IOException, TransformerException {
|
||||
final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
|
||||
|
||||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPublicationRecordTransformation() throws IOException, TransformerException {
|
||||
public void testPublicationRecordTransformation() throws IOException, TransformerException {
|
||||
|
||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
||||
XmlRecordFactoryTest.otherDsTypeId);
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
XmlConverterJob.schemaLocation);
|
||||
|
||||
Publication p = load("publication.json", Publication.class);
|
||||
Project pj = load("project.json", Project.class);
|
||||
Relation rel = load("relToValidatedProject.json", Relation.class);
|
||||
final Publication p = load("publication.json", Publication.class);
|
||||
final Project pj = load("project.json", Project.class);
|
||||
final Relation rel = load("relToValidatedProject.json", Relation.class);
|
||||
|
||||
JoinedEntity<Publication> je = new JoinedEntity<>(p);
|
||||
final JoinedEntity je = new JoinedEntity<>(p);
|
||||
je
|
||||
.setLinks(
|
||||
Lists
|
||||
|
@ -70,25 +69,25 @@ class IndexRecordTransformerTest {
|
|||
new RelatedEntityWrapper(rel,
|
||||
CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class))));
|
||||
|
||||
String record = xmlRecordFactory.build(je);
|
||||
final String record = xmlRecordFactory.build(je);
|
||||
|
||||
assertNotNull(record);
|
||||
|
||||
testRecordTransformation(record);
|
||||
}
|
||||
|
||||
private void testRecordTransformation(String record) throws IOException, TransformerException {
|
||||
String fields = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("fields.xml")));
|
||||
String xslt = IOUtils
|
||||
.toString(Objects.requireNonNull(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")));
|
||||
private void testRecordTransformation(final String record) throws IOException, TransformerException {
|
||||
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
|
||||
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
|
||||
|
||||
String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
|
||||
final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
|
||||
|
||||
Transformer tr = SaxonTransformerFactory.newInstance(transformer);
|
||||
final Transformer tr = SaxonTransformerFactory.newInstance(transformer);
|
||||
|
||||
String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
||||
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
||||
|
||||
SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML);
|
||||
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID)
|
||||
.parseDocument(indexRecordXML);
|
||||
|
||||
final String xmlDoc = ClientUtils.toXML(solrDoc);
|
||||
|
||||
|
@ -96,9 +95,9 @@ class IndexRecordTransformerTest {
|
|||
System.out.println(xmlDoc);
|
||||
}
|
||||
|
||||
private <T> T load(String fileName, Class<T> clazz) throws IOException {
|
||||
private <T> T load(final String fileName, final Class<T> clazz) throws IOException {
|
||||
return XmlRecordFactoryTest.OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(fileName))), clazz);
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -13,7 +14,6 @@ import org.dom4j.DocumentException;
|
|||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
@ -24,34 +24,31 @@ import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
|
|||
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
||||
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
||||
class XmlRecordFactoryTest {
|
||||
|
||||
public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource";
|
||||
public class XmlRecordFactoryTest {
|
||||
|
||||
public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
@Test
|
||||
void testXMLRecordFactory() throws IOException, DocumentException {
|
||||
public void testXMLRecordFactory() throws IOException, DocumentException {
|
||||
|
||||
ContextMapper contextMapper = new ContextMapper();
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
||||
otherDsTypeId);
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
XmlConverterJob.schemaLocation);
|
||||
|
||||
Publication p = OBJECT_MAPPER
|
||||
final Publication p = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||
|
||||
String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
||||
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
Document doc = new SAXReader().read(new StringReader(xml));
|
||||
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||
|
||||
assertNotNull(doc);
|
||||
|
||||
|
@ -72,93 +69,64 @@ class XmlRecordFactoryTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException {
|
||||
public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException {
|
||||
|
||||
ContextMapper contextMapper = new ContextMapper();
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
||||
otherDsTypeId);
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
XmlConverterJob.schemaLocation);
|
||||
|
||||
Publication p = OBJECT_MAPPER
|
||||
final Publication p = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||
Project pj = OBJECT_MAPPER
|
||||
final Project pj = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
|
||||
Relation rel = OBJECT_MAPPER
|
||||
.readValue(
|
||||
(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class);
|
||||
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
||||
List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||
final Relation rel = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class);
|
||||
final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
||||
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||
links.add(rew);
|
||||
JoinedEntity je = new JoinedEntity<>(p);
|
||||
final JoinedEntity je = new JoinedEntity<>(p);
|
||||
je.setLinks(links);
|
||||
|
||||
String xml = xmlRecordFactory.build(je);
|
||||
final String xml = xmlRecordFactory.build(je);
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
Document doc = new SAXReader().read(new StringReader(xml));
|
||||
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||
assertNotNull(doc);
|
||||
System.out.println(doc.asXML());
|
||||
Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException {
|
||||
public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException {
|
||||
|
||||
ContextMapper contextMapper = new ContextMapper();
|
||||
final ContextMapper contextMapper = new ContextMapper();
|
||||
|
||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
||||
otherDsTypeId);
|
||||
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
||||
XmlConverterJob.schemaLocation);
|
||||
|
||||
Publication p = OBJECT_MAPPER
|
||||
final Publication p = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
|
||||
Project pj = OBJECT_MAPPER
|
||||
final Project pj = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
|
||||
Relation rel = OBJECT_MAPPER
|
||||
.readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class);
|
||||
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
||||
List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||
final Relation rel = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class);
|
||||
final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
|
||||
final List<RelatedEntityWrapper> links = Lists.newArrayList();
|
||||
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
|
||||
links.add(rew);
|
||||
JoinedEntity je = new JoinedEntity<>(p);
|
||||
final JoinedEntity je = new JoinedEntity<>(p);
|
||||
je.setLinks(links);
|
||||
|
||||
String xml = xmlRecordFactory.build(je);
|
||||
final String xml = xmlRecordFactory.build(je);
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
Document doc = new SAXReader().read(new StringReader(xml));
|
||||
final Document doc = new SAXReader().read(new StringReader(xml));
|
||||
assertNotNull(doc);
|
||||
System.out.println(doc.asXML());
|
||||
assertEquals("", doc.valueOf("//rel/validated"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testEnermapsRecord() throws IOException, DocumentException, SAXException {
|
||||
|
||||
String contextmap = "<entries><entry id=\"enermaps\" label=\"Energy Research\" name=\"context\" type=\"community\"/>"
|
||||
+
|
||||
"<entry id=\"enermaps::selection\" label=\"Featured dataset\" name=\"category\"/>" +
|
||||
"<entry id=\"enermaps::selection::tgs00004\" label=\"Dataset title\" name=\"concept\"/>" +
|
||||
"</entries>";
|
||||
|
||||
ContextMapper contextMapper = ContextMapper.fromXml(contextmap);
|
||||
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION,
|
||||
otherDsTypeId);
|
||||
|
||||
Dataset d = OBJECT_MAPPER
|
||||
.readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class);
|
||||
|
||||
JoinedEntity je = new JoinedEntity<>(d);
|
||||
|
||||
String xml = xmlRecordFactory.build(je);
|
||||
|
||||
assertNotNull(xml);
|
||||
|
||||
Document doc = new SAXReader().read(new StringReader(xml));
|
||||
assertNotNull(doc);
|
||||
System.out.println(doc.asXML());
|
||||
assertEquals("enermaps::selection::tgs00004", doc.valueOf("//concept/@id"));
|
||||
}
|
||||
}
|
||||
|
|
17
pom.xml
17
pom.xml
|
@ -205,11 +205,6 @@
|
|||
<artifactId>dateparser</artifactId>
|
||||
<version>1.0.7</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>me.xuender</groupId>
|
||||
<artifactId>unidecode</artifactId>
|
||||
<version>0.0.7</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
|
@ -519,16 +514,6 @@
|
|||
<version>${common.text.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.opencsv</groupId>
|
||||
<artifactId>opencsv</artifactId>
|
||||
<version>5.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.github.classgraph</groupId>
|
||||
<artifactId>classgraph</artifactId>
|
||||
<version>4.8.71</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
|
@ -751,7 +736,7 @@
|
|||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
<vtd.version>[2.12,3.0)</vtd.version>
|
||||
<dhp-schemas.version>[2.7.15]</dhp-schemas.version>
|
||||
<dhp-schemas.version>[2.7.15-SNAPSHOT]</dhp-schemas.version>
|
||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||
|
|
Loading…
Reference in New Issue