From 69dac916590e98327ba7f7a13c1946bc0423b3ff Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Oct 2023 15:45:52 +0200 Subject: [PATCH] adding the new code to use the API instead of the Information Service --- .../eu/dnetlib/dhp/api/QueryCommunityAPI.java | 83 +++++++++ .../main/java/eu/dnetlib/dhp/api/Utils.java | 169 ++++++++++++++++++ .../api/model/CommunityContentprovider.java | 43 +++++ .../dhp/api/model/CommunityEntityMap.java | 21 +++ .../dnetlib/dhp/api/model/CommunityModel.java | 108 +++++++++++ .../dhp/api/model/CommunitySummary.java | 15 ++ .../dnetlib/dhp/api/model/ContentModel.java | 51 ++++++ .../dnetlib/dhp/api/model/DatasourceList.java | 13 ++ .../dhp/api/model/OrganizationList.java | 16 ++ .../dnetlib/dhp/api/model/ProjectModel.java | 24 +++ .../community/QueryInformationSystem.java | 34 ---- .../dhp/api/QueryCommunityAPITest.java | 9 + 12 files changed, 552 insertions(+), 34 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java create mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java create mode 100644 dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java new file mode 100644 index 000000000..262ca0290 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java @@ -0,0 +1,83 @@ + +package eu.dnetlib.dhp.api; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; + +import org.jetbrains.annotations.NotNull; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class QueryCommunityAPI { + private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/"; + private static final String BETA_BASE_URL = "https://beta.services.openaire.eu/openaire/"; + + private static String get(String geturl) throws IOException { + URL url = new URL(geturl); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("GET"); + + int responseCode = conn.getResponseCode(); + String body = getBody(conn); + conn.disconnect(); + if (responseCode != HttpURLConnection.HTTP_OK) + throw new IOException("Unexpected code " + responseCode + body); + + return body; + } + + public static String communities(boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/communities"); + return get(BETA_BASE_URL + "community/communities"); + } + + public static String community(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id); + return get(BETA_BASE_URL + "community/" + id); + } + + public static String communityDatasource(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + return (BETA_BASE_URL + "community/" + id + "/contentproviders"); + + } + + public static String communityPropagationOrganization(String id, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + return get(BETA_BASE_URL + "community/" + id + "/propagationOrganizations"); + } + + public static String communityProjects(String id, String page, String size, boolean production) throws IOException { + if (production) + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + return get(BETA_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } + + @NotNull + private static String getBody(HttpURLConnection conn) throws IOException { + String body = "{}"; + try (BufferedReader br = new BufferedReader( + new InputStreamReader(conn.getInputStream(), "utf-8"))) { + StringBuilder response = new StringBuilder(); + String responseLine = null; + while ((responseLine = br.readLine()) != null) { + response.append(responseLine.trim()); + } + + body = response.toString(); + + } + return body; + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java new file mode 100644 index 000000000..43d5e7e98 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -0,0 +1,169 @@ + +package eu.dnetlib.dhp.api; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import javax.management.Query; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.amazonaws.util.StringUtils; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; + +import eu.dnetlib.dhp.api.model.*; +import eu.dnetlib.dhp.bulktag.community.Community; +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import eu.dnetlib.dhp.bulktag.community.Provider; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; +import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class Utils implements Serializable { + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final VerbResolver resolver = VerbResolverFactory.newInstance(); + + private static final Logger log = LoggerFactory.getLogger(Utils.class); + + public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException { + final Map communities = Maps.newHashMap(); + List validCommunities = new ArrayList<>(); + getValidCommunities(production) + .forEach(community -> { + try { + CommunityModel cm = MAPPER + .readValue(QueryCommunityAPI.community(community.getId(), production), CommunityModel.class); + validCommunities.add(getCommunity(cm)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + validCommunities.forEach(community -> { + try { + DatasourceList dl = MAPPER + .readValue( + QueryCommunityAPI.communityDatasource(community.getId(), production), DatasourceList.class); + community.setProviders(dl.stream().map(d -> { + if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) + return null; + Provider p = new Provider(); + p.setOpenaireId("10|" + d.getOpenaireId()); + p.setSelectionConstraints(d.getSelectioncriteria()); + if (p.getSelectionConstraints() != null) + p.getSelectionConstraints().setSelection(resolver); + return p; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList())); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + validCommunities.forEach(community -> { + if (community.isValid()) + communities.put(community.getId(), community); + }); + return new CommunityConfiguration(communities); + } + + private static Community getCommunity(CommunityModel cm) { + Community c = new Community(); + c.setId(cm.getId()); + c.setZenodoCommunities(cm.getOtherZenodoCommunities()); + if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity())) + c.getZenodoCommunities().add(cm.getZenodoCommunity()); + c.setSubjects(cm.getSubjects()); + c.getSubjects().addAll(cm.getFos()); + c.getSubjects().addAll(cm.getSdg()); + if (cm.getAdvancedConstraints() != null) { + c.setConstraints(cm.getAdvancedConstraints()); + c.getConstraints().setSelection(resolver); + } + if (cm.getRemoveConstraints() != null) { + c.setRemoveConstraints(cm.getRemoveConstraints()); + c.getRemoveConstraints().setSelection(resolver); + } + return c; + } + + public static List getValidCommunities(boolean production) throws IOException { + return MAPPER + .readValue(QueryCommunityAPI.communities(production), CommunitySummary.class) + .stream() + .filter( + community -> !community.getStatus().equals("hidden") && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + } + + /** + * it returns for each organization the list of associated communities + */ + public static CommunityEntityMap getCommunityOrganization(boolean production) throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities(production) + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue( + QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class); + associatedOrgs.forEach(o -> { + if (!organizationMap + .keySet() + .contains( + "20|" + o)) + organizationMap.put("20|" + o, new ArrayList<>()); + organizationMap.get("20|" + o).add(community.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + return organizationMap; + } + + public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException { + CommunityEntityMap projectMap = new CommunityEntityMap(); + getValidCommunities(production) + .forEach(community -> { + int page = -1; + int size = 100; + ContentModel cm = new ContentModel(); + do { + page++; + try { + cm = MAPPER + .readValue( + QueryCommunityAPI + .communityProjects( + community.getId(), String.valueOf(page), String.valueOf(size), production), + ContentModel.class); + if (cm.getContent().size() > 0) { + cm.getContent().forEach(p -> { + if (!projectMap.keySet().contains("40|" + p.getOpenaireId())) + projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>()); + projectMap.get("40|" + p.getOpenaireId()).add(community.getId()); + }); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + }); + return projectMap; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java new file mode 100644 index 000000000..9fab5a80c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.api.model; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; + +@JsonAutoDetect +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityContentprovider { + private String openaireId; + private SelectionConstraints selectioncriteria; + + private String enabled; + + public String getEnabled() { + return enabled; + } + + public void setEnabled(String enabled) { + this.enabled = enabled; + } + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(final String openaireId) { + this.openaireId = openaireId; + } + + public SelectionConstraints getSelectioncriteria() { + + return this.selectioncriteria; + } + + public void setSelectioncriteria(SelectionConstraints selectioncriteria) { + this.selectioncriteria = selectioncriteria; + + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java new file mode 100644 index 000000000..ca3eb2857 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java @@ -0,0 +1,21 @@ + +package eu.dnetlib.dhp.api.model; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +public class CommunityEntityMap extends HashMap> { + + public CommunityEntityMap() { + super(); + } + + public List get(String key) { + + if (super.get(key) == null) { + return new ArrayList<>(); + } + return super.get(key); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java new file mode 100644 index 000000000..745e7efc2 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java @@ -0,0 +1,108 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommunityModel implements Serializable { + private String id; + private String type; + private String status; + + private String zenodoCommunity; + private List subjects; + private List otherZenodoCommunities; + private List fos; + private List sdg; + private SelectionConstraints advancedConstraints; + private SelectionConstraints removeConstraints; + + public String getZenodoCommunity() { + return zenodoCommunity; + } + + public void setZenodoCommunity(String zenodoCommunity) { + this.zenodoCommunity = zenodoCommunity; + } + + public List getSubjects() { + return subjects; + } + + public void setSubjects(List subjects) { + this.subjects = subjects; + } + + public List getOtherZenodoCommunities() { + return otherZenodoCommunities; + } + + public void setOtherZenodoCommunities(List otherZenodoCommunities) { + this.otherZenodoCommunities = otherZenodoCommunities; + } + + public List getFos() { + return fos; + } + + public void setFos(List fos) { + this.fos = fos; + } + + public List getSdg() { + return sdg; + } + + public void setSdg(List sdg) { + this.sdg = sdg; + } + + public SelectionConstraints getRemoveConstraints() { + return removeConstraints; + } + + public void setRemoveConstraints(SelectionConstraints removeConstraints) { + this.removeConstraints = removeConstraints; + } + + public SelectionConstraints getAdvancedConstraints() { + return advancedConstraints; + } + + public void setAdvancedConstraints(SelectionConstraints advancedConstraints) { + this.advancedConstraints = advancedConstraints; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java new file mode 100644 index 000000000..a0541f7ee --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java @@ -0,0 +1,15 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 06/10/23 + */ +public class CommunitySummary extends ArrayList implements Serializable { + public CommunitySummary() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java new file mode 100644 index 000000000..469709f59 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ContentModel implements Serializable { + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; + + public List getContent() { + return content; + } + + public void setContent(List content) { + this.content = content; + } + + public Integer getTotalPages() { + return totalPages; + } + + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } + + public Boolean getLast() { + return last; + } + + public void setLast(Boolean last) { + this.last = last; + } + + public Integer getNumber() { + return number; + } + + public void setNumber(Integer number) { + this.number = number; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java new file mode 100644 index 000000000..30d0241c3 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +import eu.dnetlib.dhp.api.model.CommunityContentprovider; + +public class DatasourceList extends ArrayList implements Serializable { + public DatasourceList() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java new file mode 100644 index 000000000..3c81ad179 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java @@ -0,0 +1,16 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.ArrayList; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +public class OrganizationList extends ArrayList implements Serializable { + + public OrganizationList() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java new file mode 100644 index 000000000..3495d6a63 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java @@ -0,0 +1,24 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * @author miriam.baglioni + * @Date 09/10/23 + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ProjectModel implements Serializable { + + private String openaireId; + + public String getOpenaireId() { + return openaireId; + } + + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java deleted file mode 100644 index 5fe3cf81f..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ /dev/null @@ -1,34 +0,0 @@ - -package eu.dnetlib.dhp.bulktag.community; - -import java.io.IOException; -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.dom4j.DocumentException; -import org.xml.sax.SAXException; - -import com.google.common.base.Joiner; - -import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - - public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException, SAXException, IOException { - ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - final List res = isLookUp - .quickSearchProfile( - IOUtils - .toString( - QueryInformationSystem.class - .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/query.xq"))); - - final String xmlConf = "" + Joiner.on(" ").join(res) + ""; - - return CommunityConfigurationFactory.newInstance(xmlConf); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java new file mode 100644 index 000000000..03084301e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/api/QueryCommunityAPITest.java @@ -0,0 +1,9 @@ +package eu.dnetlib.dhp.api;/** + + * @author miriam.baglioni + + * @Date 20/10/23 + + */ +public class QueryCommunityAPITest { +}