diff --git a/api/pom.xml b/api/pom.xml
index 0109977..7a3e978 100644
--- a/api/pom.xml
+++ b/api/pom.xml
@@ -37,6 +37,11 @@
dhp-common
${project.version}
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ compile
+
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java
index a5148ea..0b7cdaf 100644
--- a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java
@@ -40,6 +40,22 @@ public class QueryCommunityAPI {
}
+ public static String communityDatasource(String id) throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
+
+ }
+
+ public static String communityPropagationOrganization(String id) throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
+ }
+
+ public static String communityProjects(String id, String page, String size) throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
+ }
+
private static String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java
index ea00a16..92a1a5e 100644
--- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java
@@ -2,6 +2,7 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
+import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@@ -19,6 +20,26 @@ public class CommunityModel implements Serializable {
private String type;
+ private List subject;
+
+ private String zenodoCOmmunity;
+
+ public List getSubject() {
+ return subject;
+ }
+
+ public void setSubject(List subject) {
+ this.subject = subject;
+ }
+
+ public String getZenodoCOmmunity() {
+ return zenodoCOmmunity;
+ }
+
+ public void setZenodoCOmmunity(String zenodoCOmmunity) {
+ this.zenodoCOmmunity = zenodoCOmmunity;
+ }
+
public String getType() {
return type;
}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java
index f5aa967..2a816c2 100644
--- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java
@@ -14,6 +14,27 @@ public class ProjectModel implements Serializable {
private String openaireId;
+ private String funder;
+
+ private String gratId;
+
+
+ public String getFunder() {
+ return funder;
+ }
+
+ public void setFunder(String funder) {
+ this.funder = funder;
+ }
+
+ public String getGratId() {
+ return gratId;
+ }
+
+ public void setGratId(String gratId) {
+ this.gratId = gratId;
+ }
+
public String getOpenaireId() {
return openaireId;
}
diff --git a/dump/pom.xml b/dump/pom.xml
index 3ae18f2..01d29b6 100644
--- a/dump/pom.xml
+++ b/dump/pom.xml
@@ -67,6 +67,12 @@
classgraph
4.8.71
+
+ eu.dnetlib.dhp
+ api
+ 1.2.5-SNAPSHOT
+ compile
+
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java
deleted file mode 100644
index 17e8b24..0000000
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java
+++ /dev/null
@@ -1,78 +0,0 @@
-
-package eu.dnetlib.dhp.oa.graph.dump;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.communityapi.model.CommunityModel;
-import eu.dnetlib.dhp.communityapi.model.CommunitySummary;
-import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
-import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
-import eu.dnetlib.dhp.utils.DHPUtils;
-
-public class QueryCommunityAPI {
-
- private static final Logger log = LoggerFactory.getLogger(QueryCommunityAPI.class);
-
- public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
- throws IOException {
- if (singleCommunity)
- return getMap(Arrays.asList(getCommunity(communityId)));
- return getMap(getValidCommunities());
-
- }
-
- private CommunityMap getMap(List communities) {
- final CommunityMap map = new CommunityMap();
- communities.forEach(c -> map.put(c.getId(), c.getName()));
- return map;
- }
-
- public List getCommunityCsv(List comms) {
- return comms.stream().map(c -> {
- try {
- CommunityModel community = getCommunity(c);
- StringBuilder builder = new StringBuilder();
- builder.append(DHPUtils.md5(community.getId()));
- builder.append(Constants.SEP);
- builder.append(community.getName());
- builder.append(Constants.SEP);
- builder.append(community.getId());
- builder.append(Constants.SEP);
- builder
- .append(
- community.getDescription());
- return builder.toString();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }).collect(Collectors.toList());
-
- }
-
- private List getValidCommunities() throws IOException {
- ObjectMapper mapper = new ObjectMapper();
- return mapper
- .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
- .stream()
- .filter(
- community -> community.getStatus().equals("all") &&
- (community.getType().equals("ri") || community.getType().equals("community")))
- .collect(Collectors.toList());
-
- }
-
- private CommunityModel getCommunity(String id) throws IOException {
- ObjectMapper mapper = new ObjectMapper();
- return mapper
- .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
-
- }
-}
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java
index d891a16..e93a2ea 100644
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java
@@ -13,13 +13,10 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
@@ -31,11 +28,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
- private final transient QueryCommunityAPI queryInformationSystem;
+ private final transient UtilCommunityAPI queryInformationSystem;
private final transient BufferedWriter writer;
- public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
+ public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException {
final Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
@@ -45,7 +42,7 @@ public class SaveCommunityMap implements Serializable {
fileSystem.delete(hdfsWritePath, true);
}
- queryInformationSystem = new QueryCommunityAPI();
+ queryInformationSystem = new UtilCommunityAPI();
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
@@ -67,9 +64,6 @@ public class SaveCommunityMap implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
- final String isLookUpUrl = parser.get("isLookUpUrl");
- log.info("isLookUpUrl: {}", isLookUpUrl);
-
final Boolean singleCommunity = Optional
.ofNullable(parser.get("singleDeposition"))
.map(Boolean::valueOf)
@@ -77,14 +71,14 @@ public class SaveCommunityMap implements Serializable {
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
- final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
+ final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode);
scm.saveCommunityMap(singleCommunity, community_id);
}
private void saveCommunityMap(boolean singleCommunity, String communityId)
- throws IOException {
+ throws IOException {
final String communityMapString = Utils.OBJECT_MAPPER
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
log.info("communityMap {} ", communityMapString);
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java
new file mode 100644
index 0000000..c352d0d
--- /dev/null
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java
@@ -0,0 +1,196 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.dhp.communityapi.model.*;
+import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+
+import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
+import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
+
+public class UtilCommunityAPI {
+
+ private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
+
+ public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
+ throws IOException {
+ if (singleCommunity)
+ return getMap(Arrays.asList(getCommunity(communityId)));
+ return getMap(getValidCommunities());
+
+ }
+
+ private CommunityMap getMap(List communities) {
+ final CommunityMap map = new CommunityMap();
+ communities.forEach(c -> map.put(c.getId(), c.getName()));
+ return map;
+ }
+
+ public List getCommunityCsv(List comms) {
+ return comms.stream().map(c -> {
+ try {
+ CommunityModel community = getCommunity(c);
+ StringBuilder builder = new StringBuilder();
+ builder.append(DHPUtils.md5(community.getId()));
+ builder.append(Constants.SEP);
+ builder.append(community.getName());
+ builder.append(Constants.SEP);
+ builder.append(community.getId());
+ builder.append(Constants.SEP);
+ builder
+ .append(
+ community.getDescription());
+ return builder.toString();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }).collect(Collectors.toList());
+
+ }
+
+ private List getValidCommunities() throws IOException {
+ ObjectMapper mapper = new ObjectMapper();
+ return mapper
+ .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
+ .stream()
+ .filter(
+ community -> community.getStatus().equals("all") &&
+ (community.getType().equals("ri") || community.getType().equals("community")))
+ .collect(Collectors.toList());
+
+ }
+
+ private CommunityModel getCommunity(String id) throws IOException {
+ ObjectMapper mapper = new ObjectMapper();
+ return mapper
+ .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
+
+ }
+
+
+ public List getContextInformation(final Consumer consumer) throws IOException {
+ List ret = new ArrayList<>();
+ getValidCommunities()
+ .forEach(c -> {
+ ContextInfo cinfo = new ContextInfo();
+ cinfo.setId(c.getId());
+ cinfo.setDescription(c.getDescription());
+ CommunityModel cm =null;
+ try {
+ cm = getCommunity(c.getId());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ cinfo.setSubject(cm.getSubject());
+ cinfo.setZenodocommunity(c.getZenodoCOmmunity());
+ cinfo.setType(c.getType());
+ ret.add(cinfo);
+ });
+
+ return ret;
+
+ }
+
+ public void getContextRelation(final Consumer consumer) throws IOException {
+ getValidCommunities().forEach(c -> {
+ ContextInfo cinfo = new ContextInfo();
+ cinfo.setId(c.getId());
+ cinfo.setDatasourceList( getDatasourceList(c.getId()));
+ cinfo.setProjectList(getProjectList(c.getId()));
+ consumer.accept(cinfo);
+ });
+ }
+
+ private List getDatasourceList(String id) {
+ List datasourceList = new ArrayList<>();
+ try {
+
+ new ObjectMapper().readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
+ DatasourceList.class)
+ .stream()
+ .forEach(ds ->{
+ if(Optional.ofNullable(ds.getOpenaireId()).isPresent()){
+
+ datasourceList.add(ds.getOpenaireId());
+ }
+
+ });
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return datasourceList;
+ }
+
+ private List getProjectList( String id) {
+ int page = -1;
+ int size = 100;
+ ContentModel cm = null;;
+ ArrayList projectList = new ArrayList<>();
+ do {
+ page++;
+ try {
+ cm = new ObjectMapper()
+ .readValue(
+ eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityProjects(
+ id, String.valueOf(page), String.valueOf(size)),
+ ContentModel.class);
+ if (cm.getContent().size() > 0) {
+ cm.getContent().forEach(p -> {
+ if(Optional.ofNullable(p.getOpenaireId()).isPresent())
+ projectList.add(p.getOpenaireId());
+
+ });
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ } while (!cm.getLast());
+
+ return projectList;
+ }
+
+
+ /**
+ * it returns for each organization the list of associated communities
+ */
+ public CommunityEntityMap getCommunityOrganization() throws IOException {
+ CommunityEntityMap organizationMap = new CommunityEntityMap();
+ getValidCommunities()
+ .forEach(community -> {
+ String id = community.getId();
+ try {
+ List associatedOrgs = MAPPER
+ .readValue(
+ eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
+ associatedOrgs.forEach(o -> {
+ if (!organizationMap
+ .keySet()
+ .contains(o))
+ organizationMap.put(o, new ArrayList<>());
+ organizationMap.get(o).add(community.getId());
+ });
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ return organizationMap;
+ }
+
+}
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java
index 0356bd4..c190ac1 100644
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java
@@ -9,6 +9,7 @@ import java.nio.charset.StandardCharsets;
import java.util.function.Consumer;
import java.util.function.Function;
+import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -22,7 +23,6 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
@@ -52,13 +52,11 @@ public class CreateContextEntities implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("nameNode: {}", hdfsNameNode);
- final String isLookUpUrl = parser.get("isLookUpUrl");
- log.info("isLookUpUrl: {}", isLookUpUrl);
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
log.info("Processing contexts...");
- cce.execute(Process::getEntity, isLookUpUrl);
+ cce.execute(Process::getEntity);
cce.close();
@@ -87,11 +85,10 @@ public class CreateContextEntities implements Serializable {
}
- public void execute(final Function producer, String isLookUpUrl)
- throws ISLookUpException {
+ public void execute(final Function producer)
+ throws IOException {
- QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
- queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
+ UtilCommunityAPI queryInformationSystem = new UtilCommunityAPI();
final Consumer consumer = ci -> writeEntity(producer.apply(ci));
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java
index 0708fc9..2f9e6ee 100644
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java
@@ -10,9 +10,9 @@ import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Function;
+import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -25,11 +25,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
-import eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB;
import eu.dnetlib.dhp.oa.model.graph.*;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.Datasource;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
@@ -39,11 +35,6 @@ public class CreateContextRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
private final transient Configuration conf;
private final transient BufferedWriter writer;
- private final transient QueryInformationSystem queryInformationSystem;
-
- private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
- private static final String CONTEX_RELATION_PROJECT = "projects";
-
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
@@ -68,27 +59,17 @@ public class CreateContextRelation implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("hdfsNameNode: {}", hdfsNameNode);
- final String isLookUpUrl = parser.get("isLookUpUrl");
- log.info("isLookUpUrl: {}", isLookUpUrl);
final String masterDuplicatePath = parser.get("masterDuplicate");
log.info("masterDuplicatePath: {}", masterDuplicatePath);
- final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
+ final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode);
- final List masterDuplicateList = cce.readMasterDuplicate(masterDuplicatePath);
- log.info("Creating relation for datasource...");
+ log.info("Creating relation for datasources and projects...");
cce
.execute(
- Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class),
- masterDuplicateList);
-
- log.info("Creating relations for projects... ");
- cce
- .execute(
- Process::getRelation, CONTEX_RELATION_PROJECT,
- ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
+ Process::getRelation);
cce.close();
@@ -112,15 +93,11 @@ public class CreateContextRelation implements Serializable {
writer.close();
}
- public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
- throws IOException, ISLookUpException {
+ public CreateContextRelation(String hdfsPath, String hdfsNameNode)
+ throws IOException{
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
- queryInformationSystem = new QueryInformationSystem();
- queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
- queryInformationSystem.execContextRelationQuery();
-
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
@@ -134,17 +111,13 @@ public class CreateContextRelation implements Serializable {
}
- public void execute(final Function> producer, String category, String prefix) {
- execute(producer, category, prefix, null);
- }
-
- public void execute(final Function> producer, String category, String prefix,
- List masterDuplicateList) {
+ public void execute(final Function> producer) throws IOException {
final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
- queryInformationSystem.getContextRelation(consumer, category, prefix, masterDuplicateList);
+ UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
+ queryCommunityAPI.getContextRelation(consumer);
}
protected void writeEntity(final Relation r) {
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java
deleted file mode 100644
index b982b26..0000000
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java
+++ /dev/null
@@ -1,246 +0,0 @@
-
-package eu.dnetlib.dhp.oa.graph.dump.complete;
-
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.function.Consumer;
-
-import org.dom4j.Document;
-import org.dom4j.DocumentException;
-import org.dom4j.Element;
-import org.dom4j.Node;
-import org.dom4j.io.SAXReader;
-import org.jetbrains.annotations.NotNull;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.SAXException;
-
-import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
-import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.utils.DHPUtils;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
-
-public class QueryInformationSystem {
- private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
- private ISLookUpService isLookUp;
- private List contextRelationResult;
-
- private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
- +
- " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
- " and $x//context/param[./@name = 'status']/text() = 'all' " +
- " return " +
- "$x//context";
-
- private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
- +
- "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
- +
- "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
- "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
- +
- "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
-
- public void getContextInformation(final Consumer consumer) throws ISLookUpException {
-
- isLookUp
- .quickSearchProfile(XQUERY_ENTITY)
- .forEach(c -> {
- ContextInfo cinfo = new ContextInfo();
- String[] cSplit = c.split("@@");
- cinfo.setId(cSplit[0]);
- cinfo.setName(cSplit[1]);
- log.info("community name : {}", cSplit[1]);
- cinfo.setDescription(cSplit[2]);
- if (!cSplit[3].trim().equals("")) {
- cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
- }
- cinfo.setZenodocommunity(cSplit[4]);
- cinfo.setType(cSplit[5]);
- consumer.accept(cinfo);
- });
-
- }
-
- public List getContextInformation() throws ISLookUpException {
- List ret = new ArrayList<>();
- isLookUp
- .quickSearchProfile(XQUERY_ENTITY)
- .forEach(c -> {
- ContextInfo cinfo = new ContextInfo();
- String[] cSplit = c.split("@@");
- cinfo.setId(cSplit[0]);
- cinfo.setName(cSplit[1]);
- cinfo.setDescription(cSplit[2]);
- if (!cSplit[3].trim().equals("")) {
- cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
- }
- cinfo.setZenodocommunity(cSplit[4]);
- cinfo.setType(cSplit[5]);
- ret.add(cinfo);
- });
-
- return ret;
-
- }
-
- public List getContextRelationResult() {
- return contextRelationResult;
- }
-
- public void setContextRelationResult(List contextRelationResult) {
- this.contextRelationResult = contextRelationResult;
- }
-
- public ISLookUpService getIsLookUp() {
- return isLookUp;
- }
-
- public void setIsLookUp(ISLookUpService isLookUpService) {
- this.isLookUp = isLookUpService;
- }
-
- public void execContextRelationQuery() throws ISLookUpException {
- contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
-
- }
-
- public void getContextRelation(final Consumer consumer, String category, String prefix) {
- getContextRelation(consumer, category, prefix, null);
- }
-
- public void getContextRelation(final Consumer consumer, String category, String prefix,
- List masterDuplicateList) {
-
- contextRelationResult.forEach(xml -> {
- ContextInfo cinfo = new ContextInfo();
- final Document doc;
-
- try {
- final SAXReader reader = new SAXReader();
- reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
- doc = reader.read(new StringReader(xml));
- Element root = doc.getRootElement();
- cinfo.setId(root.attributeValue("id"));
-
- Iterator it = root.elementIterator();
- while (it.hasNext()) {
- Element el = it.next();
- if (el.getName().equals("category")) {
- String categoryId = el.attributeValue("id");
- categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
- if (categoryId.equals(category)) {
- cinfo.setDatasourceList(getCategoryList(el, prefix, masterDuplicateList));
- }
- }
-
- }
- consumer.accept(cinfo);
- } catch (DocumentException | SAXException e) {
- e.printStackTrace();
- }
-
- });
-
- }
-
- @NotNull
- private List getCategoryList(Element el, String prefix, List masterDuplicateList) {
- List datasourceList = new ArrayList<>();
- for (Object node : el.selectNodes(".//concept")) {
- String oid = getOpenaireId((Node) node, prefix);
- if (oid != null)
- if (masterDuplicateList == null)
- datasourceList.add(oid);
- else
- datasourceList.add(getMaster(oid, masterDuplicateList));
- }
-
- return datasourceList;
- }
-
- private String getMaster(String oid, List masterDuplicateList) {
- for (MasterDuplicate md : masterDuplicateList) {
- if (md.getDuplicate().equals(oid))
- return md.getMaster();
- }
- return oid;
- }
-
- private String getOpenaireId(Node el, String prefix) {
- for (Object node : el.selectNodes(".//param")) {
- Node n = (Node) node;
- if (n.valueOf("./@name").equals("openaireId")) {
- String id = n.getText();
- if (id.startsWith(prefix + "|"))
- return id;
- return prefix + "|" + id;
- }
- }
-
- return makeOpenaireId(el, prefix);
-
- }
-
- private String makeOpenaireId(Node el, String prefix) {
- if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
- return null;
- }
- String funder = "";
- String grantId = null;
- String funding = null;
- for (Object node : el.selectNodes(".//param")) {
- Node n = (Node) node;
- switch (n.valueOf("./@name")) {
- case "funding":
- funding = n.getText();
- break;
- case "funder":
- funder = n.getText();
- break;
- case "CD_PROJECT_NUMBER":
- grantId = n.getText();
- break;
- default:
- break;
- }
- }
- String nsp = null;
-
- switch (funder.toLowerCase()) {
- case "ec":
- if (funding == null) {
- return null;
- }
- if (funding.toLowerCase().contains("h2020")) {
- nsp = "corda__h2020::";
- } else if (funding.toLowerCase().contains("he")) {
- nsp = "corda_____he::";
- } else {
- nsp = "corda_______::";
- }
- break;
- case "tubitak":
- nsp = "tubitakf____::";
- break;
- case "dfg":
- nsp = "dfgf________::";
- break;
- default:
- StringBuilder bld = new StringBuilder();
- bld.append(funder.toLowerCase());
- for (int i = funder.length(); i < 12; i++)
- bld.append("_");
- bld.append("::");
- nsp = bld.toString();
- }
-
- return prefix + "|" + nsp + DHPUtils.md5(grantId);
- }
-
-}
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java
index 527e324..fab8fb9 100644
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java
@@ -10,6 +10,8 @@ import java.util.Objects;
import java.util.Optional;
import java.util.function.Consumer;
+import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
+import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
@@ -58,8 +60,9 @@ public class SparkOrganizationRelation implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
- final OrganizationMap organizationMap = new Gson()
- .fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
+ UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
+ final CommunityEntityMap organizationMap = queryCommunityAPI.getCommunityOrganization();
+
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
log.info("organization map : {}", serializedOrganizationMap);
@@ -79,7 +82,7 @@ public class SparkOrganizationRelation implements Serializable {
}
- private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
+ private static void extractRelation(SparkSession spark, String inputPath, CommunityEntityMap organizationMap,
String outputPath, String communityMapPath) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
@@ -129,7 +132,7 @@ public class SparkOrganizationRelation implements Serializable {
}
@NotNull
- private static Consumer getMergedRelsConsumer(OrganizationMap organizationMap,
+ private static Consumer getMergedRelsConsumer(CommunityEntityMap organizationMap,
List relList, CommunityMap communityMap) {
return mergedRels -> {
String oId = mergedRels.getOrganizationId();
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java
index b8a3b90..2231dac 100644
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java
@@ -17,14 +17,11 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.oa.graph.dump.QueryCommunityAPI;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
/**
* @author miriam.baglioni
@@ -37,7 +34,7 @@ public class DumpCommunities implements Serializable {
private final BufferedWriter writer;
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
+ " description \n";
- private final transient QueryCommunityAPI queryCommunityAPI;
+ private final transient UtilCommunityAPI queryCommunityAPI;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@@ -57,7 +54,7 @@ public class DumpCommunities implements Serializable {
final List communities = Arrays.asList(split(parser.get("communities"), ";"));
- final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl"));
+ final DumpCommunities dc = new DumpCommunities(outputPath, nameNode);
dc.writeCommunity(communities);
@@ -79,9 +76,9 @@ public class DumpCommunities implements Serializable {
writer.close();
}
- public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception {
+ public DumpCommunities(String hdfsPath, String hdfsNameNode) throws Exception {
final Configuration conf = new Configuration();
- queryCommunityAPI = new QueryCommunityAPI();
+ queryCommunityAPI = new UtilCommunityAPI();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml
index 5d62bd9..a6e68d0 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml
@@ -88,7 +88,6 @@
eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap
--outputPath${workingDir}/communityMap
--nameNode${nameNode}
- --isLookUpUrl${isLookUpUrl}
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml
index bacf0a3..c29798d 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml
@@ -81,7 +81,6 @@
eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities
--outputPath${outputPath}/community
--nameNode${nameNode}
- --isLookUpUrl${isLookUpUrl}
--communities${communities}
@@ -143,7 +142,6 @@
--sourcePath${sourcePath}
--resultTableNameeu.dnetlib.dhp.schema.oaf.Publication
-
--workingPath${outputPath}/workingDir
--resultTypepublication
@@ -169,7 +167,6 @@
--sourcePath${sourcePath}
--resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset
-
--workingPath${outputPath}/workingDir
--resultTypedataset
@@ -195,7 +192,6 @@
--sourcePath${sourcePath}
--resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct
-
--workingPath${outputPath}/workingDir
--resultTypeotherresearchproduct
@@ -221,7 +217,6 @@
--sourcePath${sourcePath}
--resultTableNameeu.dnetlib.dhp.schema.oaf.Software
-
--workingPath${outputPath}/workingDir
--resultTypesoftware
@@ -252,9 +247,7 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--workingPath${outputPath}/workingDir
-
--outputPath${outputPath}
-
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json
index 225819f..31d8619 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json
@@ -1,12 +1,6 @@
[
- {
- "paramName":"is",
- "paramLongName":"isLookUpUrl",
- "paramDescription": "URL of the isLookUp Service",
- "paramRequired": true
- },
{
"paramName":"nn",
"paramLongName":"nameNode",
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json
index 9946e94..ba359ce 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json
@@ -1,11 +1,6 @@
[
- {
- "paramName":"is",
- "paramLongName":"isLookUpUrl",
- "paramDescription": "URL of the isLookUp Service",
- "paramRequired": false
- },
+
{
"paramName": "hdfs",
"paramLongName": "hdfsPath",
@@ -17,12 +12,8 @@
"paramLongName": "nameNode",
"paramDescription": "the name node",
"paramRequired": true
- },{
- "paramName": "md",
- "paramLongName": "masterDuplicate",
- "paramDescription": "the master duplicate path for datasource deduplication",
- "paramRequired": false
-}
+ }
+
]
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json
index c27a923..70abce6 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json
@@ -1,11 +1,6 @@
[
- {
- "paramName":"ocm",
- "paramLongName":"organizationCommunityMap",
- "paramDescription": "the organization community map association",
- "paramRequired": false
- },
+
{
"paramName":"s",
"paramLongName":"sourcePath",
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml
index 165afad..e17c6ce 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml
@@ -167,7 +167,6 @@
eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap
--outputPath${workingDir}/communityMap
--nameNode${nameNode}
- --isLookUpUrl${isLookUpUrl}
--singleDeposition${singleDeposition}
--communityId${communityId}
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml
index 4b9983b..9d4350f 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml
@@ -85,20 +85,8 @@
-
+
-
-
- eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB
- --hdfsPath${workingDir}/masterduplicate
- --hdfsNameNode${nameNode}
- --postgresUrl${postgresURL}
- --postgresUser${postgresUser}
- --postgresPassword${postgresPassword}
-
-
-
-
@@ -349,7 +337,6 @@
eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities
--hdfsPath${outputPath}/communities_infrastructures/community_infrastructure.json.gz
--nameNode${nameNode}
- --isLookUpUrl${isLookUpUrl}
@@ -360,8 +347,6 @@
eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation
--hdfsPath${workingDir}/relation/context
--nameNode${nameNode}
- --isLookUpUrl${isLookUpUrl}
- --masterDuplicate${workingDir}/masterduplicate
@@ -386,7 +371,6 @@
--sourcePath${sourcePath}/relation
--outputPath${workingDir}/relation/contextOrg
- --organizationCommunityMap${organizationCommunityMap}
--communityMapPath${communityMapPath}
diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java
index 9f3cb84..6fe055b 100644
--- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java
+++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java
@@ -1,11 +1,7 @@
package eu.dnetlib.dhp.oa.graph.dump;
-import static org.mockito.Mockito.lenient;
-
import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
import java.util.Map;
import org.dom4j.DocumentException;
@@ -13,24 +9,22 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
-import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.xml.sax.SAXException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
class QueryInformationSystemTest {
- private QueryCommunityAPI queryInformationSystem;
+ private UtilCommunityAPI queryInformationSystem;
private Map map;
@BeforeEach
public void setUp() throws ISLookUpException, DocumentException, SAXException, IOException {
- queryInformationSystem = new QueryCommunityAPI();
+ queryInformationSystem = new UtilCommunityAPI();
map = queryInformationSystem.getCommunityMap(false, null);
}