diff --git a/api/pom.xml b/api/pom.xml
new file mode 100644
index 0000000..c719582
--- /dev/null
+++ b/api/pom.xml
@@ -0,0 +1,49 @@
+
+
+ 4.0.0
+
+
+ eu.dnetlib.dhp
+ dhp-graph-dump
+ 1.2.5-SNAPSHOT
+
+
+ eu.dnetlib.dhp
+ api
+ 1.2.5-SNAPSHOT
+
+
+ 8
+ 8
+
+
+
+
+
+
+ dom4j
+ dom4j
+
+
+
+ jaxen
+ jaxen
+
+
+
+ eu.dnetlib.dhp
+ dhp-common
+ ${project.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ compile
+
+
+
+
+
+
\ No newline at end of file
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java
new file mode 100644
index 0000000..fca6406
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java
@@ -0,0 +1,75 @@
+package eu.dnetlib.dhp.communityapi;
+
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
+
+/**
+ * @author miriam.baglioni
+ * @Date 06/10/23
+ */
+public class QueryCommunityAPI {
+ private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/";
+
+ private static String get(String geturl) throws IOException {
+ URL url = new URL(geturl);
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setDoOutput(true);
+ conn.setRequestMethod("GET");
+
+ int responseCode = conn.getResponseCode();
+ String body = getBody(conn);
+ conn.disconnect();
+ if (responseCode != HttpURLConnection.HTTP_OK)
+ throw new IOException("Unexpected code " + responseCode + body);
+
+ return body;
+ }
+
+ public static String communities() throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/communities");
+ }
+
+ public static String community(String id) throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/" + id);
+
+ }
+
+ public static String communityDatasource(String id) throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
+
+ }
+
+ public static String communityPropagationOrganization(String id) throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
+ }
+
+ public static String communityProjects(String id, String page, String size) throws IOException {
+
+ return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
+ }
+
+ private static String getBody(HttpURLConnection conn) throws IOException {
+ String body = "{}";
+ try (BufferedReader br = new BufferedReader(
+ new InputStreamReader(conn.getInputStream(), "utf-8"))) {
+ StringBuilder response = new StringBuilder();
+ String responseLine = null;
+ while ((responseLine = br.readLine()) != null) {
+ response.append(responseLine.trim());
+ }
+
+ body = response.toString();
+
+ }
+ return body;
+ }
+
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java
new file mode 100644
index 0000000..a1de823
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java
@@ -0,0 +1,30 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+
+@JsonAutoDetect
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class CommunityContentprovider {
+ private String openaireId;
+
+ private String enabled;
+
+ public String getEnabled() {
+ return enabled;
+ }
+
+ public void setEnabled(String enabled) {
+ this.enabled = enabled;
+ }
+
+ public String getOpenaireId() {
+ return openaireId;
+ }
+
+ public void setOpenaireId(final String openaireId) {
+ this.openaireId = openaireId;
+ }
+
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java
new file mode 100644
index 0000000..efc0399
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java
@@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+public class CommunityEntityMap extends HashMap> {
+
+ public CommunityEntityMap() {
+ super();
+ }
+
+ public List get(String key) {
+
+ if (super.get(key) == null) {
+ return new ArrayList<>();
+ }
+ return super.get(key);
+ }
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java
new file mode 100644
index 0000000..144dfd7
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java
@@ -0,0 +1,82 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import java.io.Serializable;
+import java.util.List;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+
+/**
+ * @author miriam.baglioni
+ * @Date 06/10/23
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class CommunityModel implements Serializable {
+ private String id;
+ private String name;
+ private String description;
+
+ private String status;
+
+ private String type;
+
+ private List subjects;
+
+ private String zenodoCommunity;
+
+ public List getSubjects() {
+ return subjects;
+ }
+
+ public void setSubjects(List subjects) {
+ this.subjects = subjects;
+ }
+
+ public String getZenodoCommunity() {
+ return zenodoCommunity;
+ }
+
+ public void setZenodoCommunity(String zenodoCommunity) {
+ this.zenodoCommunity = zenodoCommunity;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public String getStatus() {
+ return status;
+ }
+
+ public void setStatus(String status) {
+ this.status = status;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public void setDescription(String description) {
+ this.description = description;
+ }
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java
new file mode 100644
index 0000000..93bbe83
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java
@@ -0,0 +1,15 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+/**
+ * @author miriam.baglioni
+ * @Date 06/10/23
+ */
+public class CommunitySummary extends ArrayList implements Serializable {
+ public CommunitySummary() {
+ super();
+ }
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java
new file mode 100644
index 0000000..ea0ed33
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java
@@ -0,0 +1,51 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import java.io.Serializable;
+import java.util.List;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+
+/**
+ * @author miriam.baglioni
+ * @Date 09/10/23
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class ContentModel implements Serializable {
+ private List content;
+ private Integer totalPages;
+ private Boolean last;
+ private Integer number;
+
+ public List getContent() {
+ return content;
+ }
+
+ public void setContent(List content) {
+ this.content = content;
+ }
+
+ public Integer getTotalPages() {
+ return totalPages;
+ }
+
+ public void setTotalPages(Integer totalPages) {
+ this.totalPages = totalPages;
+ }
+
+ public Boolean getLast() {
+ return last;
+ }
+
+ public void setLast(Boolean last) {
+ this.last = last;
+ }
+
+ public Integer getNumber() {
+ return number;
+ }
+
+ public void setNumber(Integer number) {
+ this.number = number;
+ }
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java
new file mode 100644
index 0000000..9a2f44a
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java
@@ -0,0 +1,11 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+public class DatasourceList extends ArrayList implements Serializable {
+ public DatasourceList() {
+ super();
+ }
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java
new file mode 100644
index 0000000..96305ff
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java
@@ -0,0 +1,16 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+/**
+ * @author miriam.baglioni
+ * @Date 09/10/23
+ */
+public class OrganizationList extends ArrayList implements Serializable {
+
+ public OrganizationList() {
+ super();
+ }
+}
diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java
new file mode 100644
index 0000000..94b6114
--- /dev/null
+++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java
@@ -0,0 +1,44 @@
+
+package eu.dnetlib.dhp.communityapi.model;
+
+import java.io.Serializable;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+
+/**
+ * @author miriam.baglioni
+ * @Date 09/10/23
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class ProjectModel implements Serializable {
+
+ private String openaireId;
+
+ private String funder;
+
+ private String gratId;
+
+ public String getFunder() {
+ return funder;
+ }
+
+ public void setFunder(String funder) {
+ this.funder = funder;
+ }
+
+ public String getGratId() {
+ return gratId;
+ }
+
+ public void setGratId(String gratId) {
+ this.gratId = gratId;
+ }
+
+ public String getOpenaireId() {
+ return openaireId;
+ }
+
+ public void setOpenaireId(String openaireId) {
+ this.openaireId = openaireId;
+ }
+}
diff --git a/dump/pom.xml b/dump/pom.xml
index 60bc4b4..e9a8100 100644
--- a/dump/pom.xml
+++ b/dump/pom.xml
@@ -54,7 +54,17 @@
dump-schema
1.2.5-SNAPSHOT
-
+
+ eu.dnetlib.dhp
+ api
+ 1.2.5-SNAPSHOT
+
+
+ eu.dnetlib.dhp
+ api
+ 1.2.5-SNAPSHOT
+ compile
+
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java
new file mode 100644
index 0000000..22eca32
--- /dev/null
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java
@@ -0,0 +1,144 @@
+
+package eu.dnetlib.dhp.oa.graph.dump;
+
+import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.communityapi.model.*;
+
+import eu.dnetlib.dhp.utils.DHPUtils;
+
+public class UtilCommunityAPI {
+
+ private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
+
+ public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
+ throws IOException {
+ if (singleCommunity)
+ return getMap(Arrays.asList(getCommunity(communityId)));
+ return getMap(getValidCommunities());
+
+ }
+
+ private CommunityMap getMap(List communities) {
+ final CommunityMap map = new CommunityMap();
+ communities.forEach(c -> map.put(c.getId(), c.getName()));
+ return map;
+ }
+
+
+ private List getValidCommunities() throws IOException {
+ ObjectMapper mapper = new ObjectMapper();
+ return mapper
+ .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
+ .stream()
+ .filter(
+ community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public"))
+ &&
+ (community.getType().equals("ri") || community.getType().equals("community")))
+ .collect(Collectors.toList());
+
+ }
+
+ private CommunityModel getCommunity(String id) throws IOException {
+ ObjectMapper mapper = new ObjectMapper();
+ return mapper
+ .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
+
+ }
+
+
+
+ private List getDatasourceList(String id) {
+ List datasourceList = new ArrayList<>();
+ try {
+
+ new ObjectMapper()
+ .readValue(
+ eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
+ DatasourceList.class)
+ .stream()
+ .forEach(ds -> {
+ if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) {
+
+ datasourceList.add(ds.getOpenaireId());
+ }
+
+ });
+
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return datasourceList;
+ }
+
+ private List getProjectList(String id) {
+ int page = -1;
+ int size = 100;
+ ContentModel cm = null;
+ ;
+ ArrayList projectList = new ArrayList<>();
+ do {
+ page++;
+ try {
+ cm = new ObjectMapper()
+ .readValue(
+ eu.dnetlib.dhp.communityapi.QueryCommunityAPI
+ .communityProjects(
+ id, String.valueOf(page), String.valueOf(size)),
+ ContentModel.class);
+ if (cm.getContent().size() > 0) {
+ cm.getContent().forEach(p -> {
+ if (Optional.ofNullable(p.getOpenaireId()).isPresent())
+ projectList.add(p.getOpenaireId());
+
+ });
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ } while (!cm.getLast());
+
+ return projectList;
+ }
+
+ /**
+ * it returns for each organization the list of associated communities
+ */
+ public CommunityEntityMap getCommunityOrganization() throws IOException {
+ CommunityEntityMap organizationMap = new CommunityEntityMap();
+ getValidCommunities()
+ .forEach(community -> {
+ String id = community.getId();
+ try {
+ List associatedOrgs = MAPPER
+ .readValue(
+ eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id),
+ OrganizationList.class);
+ associatedOrgs.forEach(o -> {
+ if (!organizationMap
+ .keySet()
+ .contains(o))
+ organizationMap.put(o, new ArrayList<>());
+ organizationMap.get(o).add(community.getId());
+ });
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ return organizationMap;
+ }
+
+}
diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java
index 414214f..5caedf4 100644
--- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java
+++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java
@@ -8,21 +8,19 @@ import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
+import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
- * This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
+ * This class connects with the community APIs for production. It saves the information about the
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
* - research infrastructure/initiative , the value is the label of the research community - research
* infrastructure/initiative.
@@ -31,11 +29,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
- private final transient QueryInformationSystem queryInformationSystem;
+ private final transient UtilCommunityAPI queryInformationSystem;
private final transient BufferedWriter writer;
- public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
+ public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException {
final Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
@@ -45,8 +43,7 @@ public class SaveCommunityMap implements Serializable {
fileSystem.delete(hdfsWritePath, true);
}
- queryInformationSystem = new QueryInformationSystem();
- queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
+ queryInformationSystem = new UtilCommunityAPI();
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
@@ -54,10 +51,10 @@ public class SaveCommunityMap implements Serializable {
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
- .toString(
- SaveCommunityMap.class
- .getResourceAsStream(
- "/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json"));
+ .toString(
+ SaveCommunityMap.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
@@ -68,24 +65,29 @@ public class SaveCommunityMap implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
- final String isLookUpUrl = parser.get("isLookUpUrl");
- log.info("isLookUpUrl: {}", isLookUpUrl);
+ final Boolean singleCommunity = Optional
+ .ofNullable(parser.get("singleDeposition"))
+ .map(Boolean::valueOf)
+ .orElse(false);
- final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
+ final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
- scm.saveCommunityMap();
+ final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode);
+
+ scm.saveCommunityMap(singleCommunity, community_id);
}
- private void saveCommunityMap()
- throws ISLookUpException, IOException, DocumentException, SAXException {
+ private void saveCommunityMap(boolean singleCommunity, String communityId)
+ throws IOException {
final String communityMapString = Utils.OBJECT_MAPPER
- .writeValueAsString(queryInformationSystem.getCommunityMap());
+ .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
log.info("communityMap {} ", communityMapString);
writer
- .write(
- communityMapString);
+ .write(
+ communityMapString);
writer.close();
}
}
+
diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties
index 612a16e..1250d9e 100644
--- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties
+++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties
@@ -1,5 +1,5 @@
#PROPERTIES FOR EOSC DUMP
-sourcePath=/tmp/miriam/graphCopy
+sourcePath=/tmp/prod_provision/graph/20_graph_blacklisted
outputPath=/tmp/miriam/graph_dumps/eosc_prod_extended
#accessToken for the openaire sandbox following
accessToken=OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4
diff --git a/pom.xml b/pom.xml
index 20eee8d..6641dbf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,6 +6,7 @@
dump-schema
dump
+ api