diff --git a/api/pom.xml b/api/pom.xml new file mode 100644 index 0000000..c719582 --- /dev/null +++ b/api/pom.xml @@ -0,0 +1,49 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp-graph-dump + 1.2.5-SNAPSHOT + + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + + + 8 + 8 + + + + + + + dom4j + dom4j + + + + jaxen + jaxen + + + + eu.dnetlib.dhp + dhp-common + ${project.version} + + + com.fasterxml.jackson.core + jackson-annotations + compile + + + + + + \ No newline at end of file diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java new file mode 100644 index 0000000..fca6406 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java @@ -0,0 +1,75 @@ +package eu.dnetlib.dhp.communityapi; + + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class QueryCommunityAPI { + private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; + + private static String get(String geturl) throws IOException { + URL url = new URL(geturl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + int responseCode = conn.getResponseCode(); + String body = getBody(conn); + conn.disconnect(); + if (responseCode != HttpURLConnection.HTTP_OK) + throw new IOException("Unexpected code " + responseCode + body); + + return body; + } + + public static String communities() throws IOException { + + return get(PRODUCTION_BASE_URL + "community/communities"); + } + + public static String community(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id); + + } + + public static String communityDatasource(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + + } + + public static String communityPropagationOrganization(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + } + + public static String communityProjects(String id, String page, String size) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } + + private static String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java new file mode 100644 index 0000000..a1de823 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityContentprovider.java @@ -0,0 +1,30 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonAutoDetect +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityContentprovider { + private String openaireId; + + private String enabled; + + public String getEnabled() { + return enabled; + } + + public void setEnabled(String enabled) { + this.enabled = enabled; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } + +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java new file mode 100644 index 0000000..efc0399 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityEntityMap.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +public class CommunityEntityMap extends HashMap> { + + public CommunityEntityMap() { + super(); + } + + public List get(String key) { + + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java new file mode 100644 index 0000000..144dfd7 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java @@ -0,0 +1,82 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityModel implements Serializable { + private String id; + private String name; + private String description; + + private String status; + + private String type; + + private List subjects; + + private String zenodoCommunity; + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public String getZenodoCommunity() { + return zenodoCommunity; + } + + public void setZenodoCommunity(String zenodoCommunity) { + this.zenodoCommunity = zenodoCommunity; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java new file mode 100644 index 0000000..93bbe83 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunitySummary.java @@ -0,0 +1,15 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class CommunitySummary extends ArrayList implements Serializable { + public CommunitySummary() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java new file mode 100644 index 0000000..ea0ed33 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ContentModel.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ContentModel implements Serializable { + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; + + public List getContent() { + return content; + } + + public void setContent(List content) { + this.content = content; + } + + public Integer getTotalPages() { + return totalPages; + } + + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } + + public Boolean getLast() { + return last; + } + + public void setLast(Boolean last) { + this.last = last; + } + + public Integer getNumber() { + return number; + } + + public void setNumber(Integer number) { + this.number = number; + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java new file mode 100644 index 0000000..9a2f44a --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/DatasourceList.java @@ -0,0 +1,11 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +public class DatasourceList extends ArrayList implements Serializable { + public DatasourceList() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java new file mode 100644 index 0000000..96305ff --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/OrganizationList.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class OrganizationList extends ArrayList implements Serializable { + + public OrganizationList() { + super(); + } +} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java new file mode 100644 index 0000000..94b6114 --- /dev/null +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java @@ -0,0 +1,44 @@ + +package eu.dnetlib.dhp.communityapi.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ProjectModel implements Serializable { + + private String openaireId; + + private String funder; + + private String gratId; + + public String getFunder() { + return funder; + } + + public void setFunder(String funder) { + this.funder = funder; + } + + public String getGratId() { + return gratId; + } + + public void setGratId(String gratId) { + this.gratId = gratId; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } +} diff --git a/dump/pom.xml b/dump/pom.xml index 60bc4b4..e9a8100 100644 --- a/dump/pom.xml +++ b/dump/pom.xml @@ -54,7 +54,17 @@ dump-schema 1.2.5-SNAPSHOT - + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + compile + diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java new file mode 100644 index 0000000..22eca32 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java @@ -0,0 +1,144 @@ + +package eu.dnetlib.dhp.oa.graph.dump; + +import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.communityapi.model.*; + +import eu.dnetlib.dhp.utils.DHPUtils; + +public class UtilCommunityAPI { + + private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class); + + public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) + throws IOException { + if (singleCommunity) + return getMap(Arrays.asList(getCommunity(communityId))); + return getMap(getValidCommunities()); + + } + + private CommunityMap getMap(List communities) { + final CommunityMap map = new CommunityMap(); + communities.forEach(c -> map.put(c.getId(), c.getName())); + return map; + } + + + private List getValidCommunities() throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) + .stream() + .filter( + community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public")) + && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + + } + + private CommunityModel getCommunity(String id) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); + + } + + + + private List getDatasourceList(String id) { + List datasourceList = new ArrayList<>(); + try { + + new ObjectMapper() + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id), + DatasourceList.class) + .stream() + .forEach(ds -> { + if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) { + + datasourceList.add(ds.getOpenaireId()); + } + + }); + + } catch (IOException e) { + throw new RuntimeException(e); + } + return datasourceList; + } + + private List getProjectList(String id) { + int page = -1; + int size = 100; + ContentModel cm = null; + ; + ArrayList projectList = new ArrayList<>(); + do { + page++; + try { + cm = new ObjectMapper() + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI + .communityProjects( + id, String.valueOf(page), String.valueOf(size)), + ContentModel.class); + if (cm.getContent().size() > 0) { + cm.getContent().forEach(p -> { + if (Optional.ofNullable(p.getOpenaireId()).isPresent()) + projectList.add(p.getOpenaireId()); + + }); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + + return projectList; + } + + /** + * it returns for each organization the list of associated communities + */ + public CommunityEntityMap getCommunityOrganization() throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities() + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), + OrganizationList.class); + associatedOrgs.forEach(o -> { + if (!organizationMap + .keySet() + .contains(o)) + organizationMap.put(o, new ArrayList<>()); + organizationMap.get(o).add(community.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + return organizationMap; + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java index 414214f..5caedf4 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/eosc/SaveCommunityMap.java @@ -8,21 +8,19 @@ import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Optional; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** - * This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the + * This class connects with the community APIs for production. It saves the information about the * context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community * - research infrastructure/initiative , the value is the label of the research community - research * infrastructure/initiative. @@ -31,11 +29,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); - private final transient QueryInformationSystem queryInformationSystem; + private final transient UtilCommunityAPI queryInformationSystem; private final transient BufferedWriter writer; - public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { + public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException { final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); @@ -45,8 +43,7 @@ public class SaveCommunityMap implements Serializable { fileSystem.delete(hdfsWritePath, true); } - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + queryInformationSystem = new UtilCommunityAPI(); FSDataOutputStream fos = fileSystem.create(hdfsWritePath); writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); @@ -54,10 +51,10 @@ public class SaveCommunityMap implements Serializable { public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils - .toString( - SaveCommunityMap.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json")); + .toString( + SaveCommunityMap.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -68,24 +65,29 @@ public class SaveCommunityMap implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); + final Boolean singleCommunity = Optional + .ofNullable(parser.get("singleDeposition")) + .map(Boolean::valueOf) + .orElse(false); - final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); + final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null); - scm.saveCommunityMap(); + final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode); + + scm.saveCommunityMap(singleCommunity, community_id); } - private void saveCommunityMap() - throws ISLookUpException, IOException, DocumentException, SAXException { + private void saveCommunityMap(boolean singleCommunity, String communityId) + throws IOException { final String communityMapString = Utils.OBJECT_MAPPER - .writeValueAsString(queryInformationSystem.getCommunityMap()); + .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); log.info("communityMap {} ", communityMapString); writer - .write( - communityMapString); + .write( + communityMapString); writer.close(); } } + diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties index 612a16e..1250d9e 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/eosc/job.properties @@ -1,5 +1,5 @@ #PROPERTIES FOR EOSC DUMP -sourcePath=/tmp/miriam/graphCopy +sourcePath=/tmp/prod_provision/graph/20_graph_blacklisted outputPath=/tmp/miriam/graph_dumps/eosc_prod_extended #accessToken for the openaire sandbox following accessToken=OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4 diff --git a/pom.xml b/pom.xml index 20eee8d..6641dbf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,6 +6,7 @@ dump-schema dump + api