refactoring

This commit is contained in:
Miriam Baglioni 2023-10-31 10:42:43 +01:00
parent 10ea974d56
commit e3c1ae809d
8 changed files with 168 additions and 178 deletions

View File

@ -18,7 +18,6 @@ public class ProjectModel implements Serializable {
private String gratId; private String gratId;
public String getFunder() { public String getFunder() {
return funder; return funder;
} }

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump; package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -8,21 +10,17 @@ import java.util.List;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.fasterxml.jackson.core.JsonProcessingException;
import eu.dnetlib.dhp.communityapi.model.*;
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.communityapi.model.*;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
public class UtilCommunityAPI { public class UtilCommunityAPI {
private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class); private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
@ -82,15 +80,12 @@ public class UtilCommunityAPI {
} }
public List<ContextInfo> getContextInformation() throws IOException { public List<ContextInfo> getContextInformation() throws IOException {
List<ContextInfo> ret = new ArrayList<>();
getValidCommunities() return getValidCommunities()
.forEach(c -> .stream()
ret.add(getContext(c))); .map(c -> getContext(c))
.collect(Collectors.toList());
return ret;
} }
@ -118,12 +113,7 @@ public class UtilCommunityAPI {
cinfo.setId(c.getId()); cinfo.setId(c.getId());
cinfo.setDatasourceList(getDatasourceList(c.getId())); cinfo.setDatasourceList(getDatasourceList(c.getId()));
cinfo.setProjectList(getProjectList(c.getId())); cinfo.setProjectList(getProjectList(c.getId()));
// try {
// if(cinfo.getId().equals("ni"))
// System.out.println(new ObjectMapper().writeValueAsString(cinfo));
// } catch (JsonProcessingException e) {
// throw new RuntimeException(e);
// }
return cinfo; return cinfo;
}).collect(Collectors.toList()); }).collect(Collectors.toList());
} }
@ -132,7 +122,9 @@ public class UtilCommunityAPI {
List<String> datasourceList = new ArrayList<>(); List<String> datasourceList = new ArrayList<>();
try { try {
new ObjectMapper().readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id), new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
DatasourceList.class) DatasourceList.class)
.stream() .stream()
.forEach(ds -> { .forEach(ds -> {
@ -152,14 +144,16 @@ public class UtilCommunityAPI {
private List<String> getProjectList(String id) { private List<String> getProjectList(String id) {
int page = -1; int page = -1;
int size = 100; int size = 100;
ContentModel cm = null;; ContentModel cm = null;
;
ArrayList<String> projectList = new ArrayList<>(); ArrayList<String> projectList = new ArrayList<>();
do { do {
page++; page++;
try { try {
cm = new ObjectMapper() cm = new ObjectMapper()
.readValue( .readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityProjects( eu.dnetlib.dhp.communityapi.QueryCommunityAPI
.communityProjects(
id, String.valueOf(page), String.valueOf(size)), id, String.valueOf(page), String.valueOf(size)),
ContentModel.class); ContentModel.class);
if (cm.getContent().size() > 0) { if (cm.getContent().size() > 0) {
@ -177,7 +171,6 @@ public class UtilCommunityAPI {
return projectList; return projectList;
} }
/** /**
* it returns for each organization the list of associated communities * it returns for each organization the list of associated communities
*/ */
@ -189,7 +182,8 @@ public class UtilCommunityAPI {
try { try {
List<String> associatedOrgs = MAPPER List<String> associatedOrgs = MAPPER
.readValue( .readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class); eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id),
OrganizationList.class);
associatedOrgs.forEach(o -> { associatedOrgs.forEach(o -> {
if (!organizationMap if (!organizationMap
.keySet() .keySet()

View File

@ -9,7 +9,6 @@ import java.nio.charset.StandardCharsets;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.function.Function; import java.util.function.Function;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
@ -21,6 +20,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative; import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
@ -36,7 +36,6 @@ public class CreateContextEntities implements Serializable {
private final transient Configuration conf; private final transient Configuration conf;
private final transient BufferedWriter writer; private final transient BufferedWriter writer;
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
@ -53,7 +52,6 @@ public class CreateContextEntities implements Serializable {
final String hdfsNameNode = parser.get("nameNode"); final String hdfsNameNode = parser.get("nameNode");
log.info("nameNode: {}", hdfsNameNode); log.info("nameNode: {}", hdfsNameNode);
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode); final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
log.info("Processing contexts..."); log.info("Processing contexts...");

View File

@ -10,7 +10,6 @@ import java.util.Optional;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.function.Function; import java.util.function.Function;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
@ -22,6 +21,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException; import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate; import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
@ -35,6 +35,7 @@ public class CreateContextRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
private final transient Configuration conf; private final transient Configuration conf;
private final transient BufferedWriter writer; private final transient BufferedWriter writer;
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
@ -59,13 +60,11 @@ public class CreateContextRelation implements Serializable {
final String hdfsNameNode = parser.get("nameNode"); final String hdfsNameNode = parser.get("nameNode");
log.info("hdfsNameNode: {}", hdfsNameNode); log.info("hdfsNameNode: {}", hdfsNameNode);
final String masterDuplicatePath = parser.get("masterDuplicate"); final String masterDuplicatePath = parser.get("masterDuplicate");
log.info("masterDuplicatePath: {}", masterDuplicatePath); log.info("masterDuplicatePath: {}", masterDuplicatePath);
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode); final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode);
log.info("Creating relation for datasources and projects..."); log.info("Creating relation for datasources and projects...");
cce cce
.execute( .execute(
@ -111,7 +110,6 @@ public class CreateContextRelation implements Serializable {
} }
public void execute(final Function<ContextInfo, List<Relation>> producer) throws IOException { public void execute(final Function<ContextInfo, List<Relation>> producer) throws IOException {
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity); final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);

View File

@ -54,13 +54,11 @@ public class Process implements Serializable {
List<Relation> relationList = new ArrayList<>(); List<Relation> relationList = new ArrayList<>();
ci ci
.getDatasourceList() .getDatasourceList()
.forEach(ds -> .forEach(ds -> relationList.addAll(addRelations(ci, ds, ModelSupport.idPrefixEntity.get("10"))));
relationList.addAll(addRelations(ci, ds, ModelSupport.idPrefixEntity.get("10"))));
ci ci
.getProjectList() .getProjectList()
.forEach(p -> .forEach(p -> relationList.addAll(addRelations(ci, p, ModelSupport.idPrefixEntity.get("40"))));
relationList.addAll(addRelations(ci, p, ModelSupport.idPrefixEntity.get("40"))));
return relationList; return relationList;
@ -98,5 +96,4 @@ public class Process implements Serializable {
return relationList; return relationList;
} }
} }

View File

@ -10,8 +10,6 @@ import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.function.Consumer; import java.util.function.Consumer;
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -23,6 +21,8 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Provenance; import eu.dnetlib.dhp.oa.model.Provenance;

File diff suppressed because one or more lines are too long

View File

@ -6,8 +6,6 @@ import java.util.*;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.communityapi.QueryCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -16,6 +14,8 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.communityapi.QueryCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate; import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
import eu.dnetlib.dhp.oa.model.graph.Relation; import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
@ -35,7 +35,6 @@ class CreateRelationTest {
List<ContextInfo> cInfoList = new ArrayList<>(); List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci); final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
consumer.accept(mapper.readValue(contextInfo1, ContextInfo.class)); consumer.accept(mapper.readValue(contextInfo1, ContextInfo.class));
consumer.accept(mapper.readValue(contextInfo2, ContextInfo.class)); consumer.accept(mapper.readValue(contextInfo2, ContextInfo.class));
consumer.accept(mapper.readValue(contextInfo3, ContextInfo.class)); consumer.accept(mapper.readValue(contextInfo3, ContextInfo.class));
@ -118,11 +117,11 @@ class CreateRelationTest {
tmp.contains("opendoar____::71f6278d140af599e06ad9bf1ba03cb0") && tmp.contains("opendoar____::71f6278d140af599e06ad9bf1ba03cb0") &&
tmp.contains("opendoar____::f5c59267dae7d123f54b741a76f28f84") && tmp.contains("opendoar____::f5c59267dae7d123f54b741a76f28f84") &&
tmp.contains("opendoar____::cda72177eba360ff16b7f836e2754370") && tmp.contains("opendoar____::cda72177eba360ff16b7f836e2754370") &&
tmp.contains("opendoar____::39e4973ba3321b80f37d9b55f63ed8b8") ) tmp.contains("opendoar____::39e4973ba3321b80f37d9b55f63ed8b8"));
;
Assertions
Assertions.assertTrue(rList .assertTrue(
rList
.stream() .stream()
.filter( .filter(
r -> r r -> r
@ -135,7 +134,9 @@ class CreateRelationTest {
Constants.CONTEXT_NS_PREFIX, Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("eut")))) DHPUtils.md5("eut"))))
.map(r -> r.getTargetType()) .map(r -> r.getTargetType())
.collect(Collectors.toSet()).stream().allMatch(t -> t.equals("datasource"))); .collect(Collectors.toSet())
.stream()
.allMatch(t -> t.equals("datasource")));
Assertions Assertions
.assertEquals( .assertEquals(
@ -155,7 +156,9 @@ class CreateRelationTest {
.collect(Collectors.toList()) .collect(Collectors.toList())
.size()); .size());
Assertions.assertEquals(7,rList Assertions
.assertEquals(
7, rList
.stream() .stream()
.filter( .filter(
r -> r r -> r
@ -165,10 +168,14 @@ class CreateRelationTest {
.format( .format(
"%s::%s", "%s::%s",
Constants.CONTEXT_NS_PREFIX, Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("ni"))) && DHPUtils.md5("ni")))
r.getTargetType().equals("datasource")).count()); &&
r.getTargetType().equals("datasource"))
.count());
Assertions.assertEquals(8,rList Assertions
.assertEquals(
8, rList
.stream() .stream()
.filter( .filter(
r -> r r -> r
@ -178,11 +185,10 @@ class CreateRelationTest {
.format( .format(
"%s::%s", "%s::%s",
Constants.CONTEXT_NS_PREFIX, Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("ni"))) && DHPUtils.md5("ni")))
r.getTargetType().equals("project")).count()); &&
r.getTargetType().equals("project"))
.count());
} }
} }