refactoring

This commit is contained in:
Miriam Baglioni 2023-10-31 10:42:43 +01:00
parent 10ea974d56
commit e3c1ae809d
8 changed files with 168 additions and 178 deletions

View File

@ -18,7 +18,6 @@ public class ProjectModel implements Serializable {
private String gratId;
public String getFunder() {
return funder;
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -8,21 +10,17 @@ import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import com.fasterxml.jackson.core.JsonProcessingException;
import eu.dnetlib.dhp.communityapi.model.*;
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.communityapi.model.*;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.utils.DHPUtils;
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
public class UtilCommunityAPI {
private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
@ -82,15 +80,12 @@ public class UtilCommunityAPI {
}
public List<ContextInfo> getContextInformation() throws IOException {
List<ContextInfo> ret = new ArrayList<>();
getValidCommunities()
.forEach(c ->
ret.add(getContext(c)));
return ret;
return getValidCommunities()
.stream()
.map(c -> getContext(c))
.collect(Collectors.toList());
}
@ -118,12 +113,7 @@ public class UtilCommunityAPI {
cinfo.setId(c.getId());
cinfo.setDatasourceList(getDatasourceList(c.getId()));
cinfo.setProjectList(getProjectList(c.getId()));
// try {
// if(cinfo.getId().equals("ni"))
// System.out.println(new ObjectMapper().writeValueAsString(cinfo));
// } catch (JsonProcessingException e) {
// throw new RuntimeException(e);
// }
return cinfo;
}).collect(Collectors.toList());
}
@ -132,7 +122,9 @@ public class UtilCommunityAPI {
List<String> datasourceList = new ArrayList<>();
try {
new ObjectMapper().readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
DatasourceList.class)
.stream()
.forEach(ds -> {
@ -152,14 +144,16 @@ public class UtilCommunityAPI {
private List<String> getProjectList(String id) {
int page = -1;
int size = 100;
ContentModel cm = null;;
ContentModel cm = null;
;
ArrayList<String> projectList = new ArrayList<>();
do {
page++;
try {
cm = new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityProjects(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI
.communityProjects(
id, String.valueOf(page), String.valueOf(size)),
ContentModel.class);
if (cm.getContent().size() > 0) {
@ -177,7 +171,6 @@ public class UtilCommunityAPI {
return projectList;
}
/**
* it returns for each organization the list of associated communities
*/
@ -189,7 +182,8 @@ public class UtilCommunityAPI {
try {
List<String> associatedOrgs = MAPPER
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id),
OrganizationList.class);
associatedOrgs.forEach(o -> {
if (!organizationMap
.keySet()

View File

@ -9,7 +9,6 @@ import java.nio.charset.StandardCharsets;
import java.util.function.Consumer;
import java.util.function.Function;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
@ -21,6 +20,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
@ -36,7 +36,6 @@ public class CreateContextEntities implements Serializable {
private final transient Configuration conf;
private final transient BufferedWriter writer;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
@ -53,7 +52,6 @@ public class CreateContextEntities implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("nameNode: {}", hdfsNameNode);
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
log.info("Processing contexts...");

View File

@ -10,7 +10,6 @@ import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Function;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
@ -22,6 +21,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
@ -35,6 +35,7 @@ public class CreateContextRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
private final transient Configuration conf;
private final transient BufferedWriter writer;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
@ -59,13 +60,11 @@ public class CreateContextRelation implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("hdfsNameNode: {}", hdfsNameNode);
final String masterDuplicatePath = parser.get("masterDuplicate");
log.info("masterDuplicatePath: {}", masterDuplicatePath);
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode);
log.info("Creating relation for datasources and projects...");
cce
.execute(
@ -111,7 +110,6 @@ public class CreateContextRelation implements Serializable {
}
public void execute(final Function<ContextInfo, List<Relation>> producer) throws IOException {
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);

View File

@ -54,13 +54,11 @@ public class Process implements Serializable {
List<Relation> relationList = new ArrayList<>();
ci
.getDatasourceList()
.forEach(ds ->
relationList.addAll(addRelations(ci, ds, ModelSupport.idPrefixEntity.get("10"))));
.forEach(ds -> relationList.addAll(addRelations(ci, ds, ModelSupport.idPrefixEntity.get("10"))));
ci
.getProjectList()
.forEach(p ->
relationList.addAll(addRelations(ci, p, ModelSupport.idPrefixEntity.get("40"))));
.forEach(p -> relationList.addAll(addRelations(ci, p, ModelSupport.idPrefixEntity.get("40"))));
return relationList;
@ -98,5 +96,4 @@ public class Process implements Serializable {
return relationList;
}
}

View File

@ -10,8 +10,6 @@ import java.util.Objects;
import java.util.Optional;
import java.util.function.Consumer;
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
@ -23,6 +21,8 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.model.Provenance;

File diff suppressed because one or more lines are too long

View File

@ -6,8 +6,6 @@ import java.util.*;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.communityapi.QueryCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -16,6 +14,8 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.communityapi.QueryCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.schema.common.ModelSupport;
@ -35,7 +35,6 @@ class CreateRelationTest {
List<ContextInfo> cInfoList = new ArrayList<>();
final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
consumer.accept(mapper.readValue(contextInfo1, ContextInfo.class));
consumer.accept(mapper.readValue(contextInfo2, ContextInfo.class));
consumer.accept(mapper.readValue(contextInfo3, ContextInfo.class));
@ -118,11 +117,11 @@ class CreateRelationTest {
tmp.contains("opendoar____::71f6278d140af599e06ad9bf1ba03cb0") &&
tmp.contains("opendoar____::f5c59267dae7d123f54b741a76f28f84") &&
tmp.contains("opendoar____::cda72177eba360ff16b7f836e2754370") &&
tmp.contains("opendoar____::39e4973ba3321b80f37d9b55f63ed8b8") )
;
tmp.contains("opendoar____::39e4973ba3321b80f37d9b55f63ed8b8"));
Assertions.assertTrue(rList
Assertions
.assertTrue(
rList
.stream()
.filter(
r -> r
@ -135,7 +134,9 @@ class CreateRelationTest {
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("eut"))))
.map(r -> r.getTargetType())
.collect(Collectors.toSet()).stream().allMatch(t -> t.equals("datasource")));
.collect(Collectors.toSet())
.stream()
.allMatch(t -> t.equals("datasource")));
Assertions
.assertEquals(
@ -155,7 +156,9 @@ class CreateRelationTest {
.collect(Collectors.toList())
.size());
Assertions.assertEquals(7,rList
Assertions
.assertEquals(
7, rList
.stream()
.filter(
r -> r
@ -165,10 +168,14 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("ni"))) &&
r.getTargetType().equals("datasource")).count());
DHPUtils.md5("ni")))
&&
r.getTargetType().equals("datasource"))
.count());
Assertions.assertEquals(8,rList
Assertions
.assertEquals(
8, rList
.stream()
.filter(
r -> r
@ -178,11 +185,10 @@ class CreateRelationTest {
.format(
"%s::%s",
Constants.CONTEXT_NS_PREFIX,
DHPUtils.md5("ni"))) &&
r.getTargetType().equals("project")).count());
DHPUtils.md5("ni")))
&&
r.getTargetType().equals("project"))
.count());
}
}