adding use of communityAPIs instead of ISLOOKUPRUL

This commit is contained in:
Miriam Baglioni 2024-01-04 12:10:36 +01:00
parent 1a196d03f9
commit f6f734bf5e
15 changed files with 574 additions and 23 deletions

49
api/pom.xml Normal file
View File

@ -0,0 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-graph-dump</artifactId>
<version>1.2.5-SNAPSHOT</version>
</parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
</dependency>
<dependency>
<groupId>jaxen</groupId>
<artifactId>jaxen</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,75 @@
package eu.dnetlib.dhp.communityapi;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class QueryCommunityAPI {
private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/";
private static String get(String geturl) throws IOException {
URL url = new URL(geturl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true);
conn.setRequestMethod("GET");
int responseCode = conn.getResponseCode();
String body = getBody(conn);
conn.disconnect();
if (responseCode != HttpURLConnection.HTTP_OK)
throw new IOException("Unexpected code " + responseCode + body);
return body;
}
public static String communities() throws IOException {
return get(PRODUCTION_BASE_URL + "community/communities");
}
public static String community(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id);
}
public static String communityDatasource(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
}
public static String communityPropagationOrganization(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
}
public static String communityProjects(String id, String page, String size) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
}
private static String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder();
String responseLine = null;
while ((responseLine = br.readLine()) != null) {
response.append(responseLine.trim());
}
body = response.toString();
}
return body;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.dhp.communityapi.model;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@JsonAutoDetect
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityContentprovider {
private String openaireId;
private String enabled;
public String getEnabled() {
return enabled;
}
public void setEnabled(String enabled) {
this.enabled = enabled;
}
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(final String openaireId) {
this.openaireId = openaireId;
}
}

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.communityapi.model;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class CommunityEntityMap extends HashMap<String, List<String>> {
public CommunityEntityMap() {
super();
}
public List<String> get(String key) {
if (super.get(key) == null) {
return new ArrayList<>();
}
return super.get(key);
}
}

View File

@ -0,0 +1,82 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityModel implements Serializable {
private String id;
private String name;
private String description;
private String status;
private String type;
private List<String> subjects;
private String zenodoCommunity;
public List<String> getSubjects() {
return subjects;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public String getZenodoCommunity() {
return zenodoCommunity;
}
public void setZenodoCommunity(String zenodoCommunity) {
this.zenodoCommunity = zenodoCommunity;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -0,0 +1,15 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 06/10/23
*/
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
public CommunitySummary() {
super();
}
}

View File

@ -0,0 +1,51 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ContentModel implements Serializable {
private List<ProjectModel> content;
private Integer totalPages;
private Boolean last;
private Integer number;
public List<ProjectModel> getContent() {
return content;
}
public void setContent(List<ProjectModel> content) {
this.content = content;
}
public Integer getTotalPages() {
return totalPages;
}
public void setTotalPages(Integer totalPages) {
this.totalPages = totalPages;
}
public Boolean getLast() {
return last;
}
public void setLast(Boolean last) {
this.last = last;
}
public Integer getNumber() {
return number;
}
public void setNumber(Integer number) {
this.number = number;
}
}

View File

@ -0,0 +1,11 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.ArrayList;
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
public DatasourceList() {
super();
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.ArrayList;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
public class OrganizationList extends ArrayList<String> implements Serializable {
public OrganizationList() {
super();
}
}

View File

@ -0,0 +1,44 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class ProjectModel implements Serializable {
private String openaireId;
private String funder;
private String gratId;
public String getFunder() {
return funder;
}
public void setFunder(String funder) {
this.funder = funder;
}
public String getGratId() {
return gratId;
}
public void setGratId(String gratId) {
this.gratId = gratId;
}
public String getOpenaireId() {
return openaireId;
}
public void setOpenaireId(String openaireId) {
this.openaireId = openaireId;
}
}

View File

@ -54,7 +54,17 @@
<artifactId>dump-schema</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>

View File

@ -0,0 +1,144 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.communityapi.model.*;
import eu.dnetlib.dhp.utils.DHPUtils;
public class UtilCommunityAPI {
private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws IOException {
if (singleCommunity)
return getMap(Arrays.asList(getCommunity(communityId)));
return getMap(getValidCommunities());
}
private CommunityMap getMap(List<CommunityModel> communities) {
final CommunityMap map = new CommunityMap();
communities.forEach(c -> map.put(c.getId(), c.getName()));
return map;
}
private List<CommunityModel> getValidCommunities() throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
.stream()
.filter(
community -> (community.getStatus().equals("all") || community.getStatus().equalsIgnoreCase("public"))
&&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
private CommunityModel getCommunity(String id) throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
}
private List<String> getDatasourceList(String id) {
List<String> datasourceList = new ArrayList<>();
try {
new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
DatasourceList.class)
.stream()
.forEach(ds -> {
if (Optional.ofNullable(ds.getOpenaireId()).isPresent()) {
datasourceList.add(ds.getOpenaireId());
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
return datasourceList;
}
private List<String> getProjectList(String id) {
int page = -1;
int size = 100;
ContentModel cm = null;
;
ArrayList<String> projectList = new ArrayList<>();
do {
page++;
try {
cm = new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI
.communityProjects(
id, String.valueOf(page), String.valueOf(size)),
ContentModel.class);
if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> {
if (Optional.ofNullable(p.getOpenaireId()).isPresent())
projectList.add(p.getOpenaireId());
});
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} while (!cm.getLast());
return projectList;
}
/**
* it returns for each organization the list of associated communities
*/
public CommunityEntityMap getCommunityOrganization() throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities()
.forEach(community -> {
String id = community.getId();
try {
List<String> associatedOrgs = MAPPER
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id),
OrganizationList.class);
associatedOrgs.forEach(o -> {
if (!organizationMap
.keySet()
.contains(o))
organizationMap.put(o, new ArrayList<>());
organizationMap.get(o).add(community.getId());
});
} catch (IOException e) {
throw new RuntimeException(e);
}
});
return organizationMap;
}
}

View File

@ -8,21 +8,19 @@ import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
* This class connects with the community APIs for production. It saves the information about the
* context that will guide the dump of the results. The information saved is a HashMap. The key is the id of a community
* - research infrastructure/initiative , the value is the label of the research community - research
* infrastructure/initiative.
@ -31,11 +29,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
private final transient QueryInformationSystem queryInformationSystem;
private final transient UtilCommunityAPI queryInformationSystem;
private final transient BufferedWriter writer;
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException {
final Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
@ -45,8 +43,7 @@ public class SaveCommunityMap implements Serializable {
fileSystem.delete(hdfsWritePath, true);
}
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryInformationSystem = new UtilCommunityAPI();
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
@ -54,10 +51,10 @@ public class SaveCommunityMap implements Serializable {
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
SaveCommunityMap.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/eosc_cm_parameters.json"));
.toString(
SaveCommunityMap.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
@ -68,24 +65,29 @@ public class SaveCommunityMap implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final Boolean singleCommunity = Optional
.ofNullable(parser.get("singleDeposition"))
.map(Boolean::valueOf)
.orElse(false);
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
scm.saveCommunityMap();
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode);
scm.saveCommunityMap(singleCommunity, community_id);
}
private void saveCommunityMap()
throws ISLookUpException, IOException, DocumentException, SAXException {
private void saveCommunityMap(boolean singleCommunity, String communityId)
throws IOException {
final String communityMapString = Utils.OBJECT_MAPPER
.writeValueAsString(queryInformationSystem.getCommunityMap());
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
log.info("communityMap {} ", communityMapString);
writer
.write(
communityMapString);
.write(
communityMapString);
writer.close();
}
}

View File

@ -1,5 +1,5 @@
#PROPERTIES FOR EOSC DUMP
sourcePath=/tmp/miriam/graphCopy
sourcePath=/tmp/prod_provision/graph/20_graph_blacklisted
outputPath=/tmp/miriam/graph_dumps/eosc_prod_extended
#accessToken for the openaire sandbox following
accessToken=OzzOsyucEIHxCEfhlpsMo3myEiwpCza3trCRL7ddfGTAK9xXkIP2MbXd6Vg4

View File

@ -6,6 +6,7 @@
<modules>
<module>dump-schema</module>
<module>dump</module>
<module>api</module>
</modules>
<parent>