removing interaction with the IS. Using communityAPIs instead
This commit is contained in:
parent
e91636817c
commit
818bb4b11c
|
@ -37,6 +37,11 @@
|
||||||
<artifactId>dhp-common</artifactId>
|
<artifactId>dhp-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-annotations</artifactId>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
|
@ -40,6 +40,22 @@ public class QueryCommunityAPI {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String communityDatasource(String id) throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityPropagationOrganization(String id) throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityProjects(String id, String page, String size) throws IOException {
|
||||||
|
|
||||||
|
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
|
||||||
|
}
|
||||||
|
|
||||||
private static String getBody(HttpURLConnection conn) throws IOException {
|
private static String getBody(HttpURLConnection conn) throws IOException {
|
||||||
String body = "{}";
|
String body = "{}";
|
||||||
try (BufferedReader br = new BufferedReader(
|
try (BufferedReader br = new BufferedReader(
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
package eu.dnetlib.dhp.communityapi.model;
|
package eu.dnetlib.dhp.communityapi.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
|
|
||||||
|
@ -19,6 +20,26 @@ public class CommunityModel implements Serializable {
|
||||||
|
|
||||||
private String type;
|
private String type;
|
||||||
|
|
||||||
|
private List<String> subject;
|
||||||
|
|
||||||
|
private String zenodoCOmmunity;
|
||||||
|
|
||||||
|
public List<String> getSubject() {
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubject(List<String> subject) {
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getZenodoCOmmunity() {
|
||||||
|
return zenodoCOmmunity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setZenodoCOmmunity(String zenodoCOmmunity) {
|
||||||
|
this.zenodoCOmmunity = zenodoCOmmunity;
|
||||||
|
}
|
||||||
|
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,27 @@ public class ProjectModel implements Serializable {
|
||||||
|
|
||||||
private String openaireId;
|
private String openaireId;
|
||||||
|
|
||||||
|
private String funder;
|
||||||
|
|
||||||
|
private String gratId;
|
||||||
|
|
||||||
|
|
||||||
|
public String getFunder() {
|
||||||
|
return funder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFunder(String funder) {
|
||||||
|
this.funder = funder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getGratId() {
|
||||||
|
return gratId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setGratId(String gratId) {
|
||||||
|
this.gratId = gratId;
|
||||||
|
}
|
||||||
|
|
||||||
public String getOpenaireId() {
|
public String getOpenaireId() {
|
||||||
return openaireId;
|
return openaireId;
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,6 +67,12 @@
|
||||||
<artifactId>classgraph</artifactId>
|
<artifactId>classgraph</artifactId>
|
||||||
<version>4.8.71</version>
|
<version>4.8.71</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
|
<artifactId>api</artifactId>
|
||||||
|
<version>1.2.5-SNAPSHOT</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
|
@ -1,78 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.communityapi.model.CommunityModel;
|
|
||||||
import eu.dnetlib.dhp.communityapi.model.CommunitySummary;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
|
|
||||||
public class QueryCommunityAPI {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(QueryCommunityAPI.class);
|
|
||||||
|
|
||||||
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
|
|
||||||
throws IOException {
|
|
||||||
if (singleCommunity)
|
|
||||||
return getMap(Arrays.asList(getCommunity(communityId)));
|
|
||||||
return getMap(getValidCommunities());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private CommunityMap getMap(List<CommunityModel> communities) {
|
|
||||||
final CommunityMap map = new CommunityMap();
|
|
||||||
communities.forEach(c -> map.put(c.getId(), c.getName()));
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getCommunityCsv(List<String> comms) {
|
|
||||||
return comms.stream().map(c -> {
|
|
||||||
try {
|
|
||||||
CommunityModel community = getCommunity(c);
|
|
||||||
StringBuilder builder = new StringBuilder();
|
|
||||||
builder.append(DHPUtils.md5(community.getId()));
|
|
||||||
builder.append(Constants.SEP);
|
|
||||||
builder.append(community.getName());
|
|
||||||
builder.append(Constants.SEP);
|
|
||||||
builder.append(community.getId());
|
|
||||||
builder.append(Constants.SEP);
|
|
||||||
builder
|
|
||||||
.append(
|
|
||||||
community.getDescription());
|
|
||||||
return builder.toString();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}).collect(Collectors.toList());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<CommunityModel> getValidCommunities() throws IOException {
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
|
||||||
return mapper
|
|
||||||
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
|
|
||||||
.stream()
|
|
||||||
.filter(
|
|
||||||
community -> community.getStatus().equals("all") &&
|
|
||||||
(community.getType().equals("ri") || community.getType().equals("community")))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private CommunityModel getCommunity(String id) throws IOException {
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
|
||||||
return mapper
|
|
||||||
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -13,13 +13,10 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
|
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
|
||||||
|
@ -31,11 +28,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
public class SaveCommunityMap implements Serializable {
|
public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
|
||||||
private final transient QueryCommunityAPI queryInformationSystem;
|
private final transient UtilCommunityAPI queryInformationSystem;
|
||||||
|
|
||||||
private final transient BufferedWriter writer;
|
private final transient BufferedWriter writer;
|
||||||
|
|
||||||
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
|
public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException {
|
||||||
final Configuration conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
@ -45,7 +42,7 @@ public class SaveCommunityMap implements Serializable {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
queryInformationSystem = new QueryCommunityAPI();
|
queryInformationSystem = new UtilCommunityAPI();
|
||||||
|
|
||||||
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
|
@ -67,9 +64,6 @@ public class SaveCommunityMap implements Serializable {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
|
||||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
|
||||||
|
|
||||||
final Boolean singleCommunity = Optional
|
final Boolean singleCommunity = Optional
|
||||||
.ofNullable(parser.get("singleDeposition"))
|
.ofNullable(parser.get("singleDeposition"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
|
@ -77,7 +71,7 @@ public class SaveCommunityMap implements Serializable {
|
||||||
|
|
||||||
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
|
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
|
||||||
|
|
||||||
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
|
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode);
|
||||||
|
|
||||||
scm.saveCommunityMap(singleCommunity, community_id);
|
scm.saveCommunityMap(singleCommunity, community_id);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,196 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.communityapi.model.*;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
|
||||||
|
|
||||||
|
public class UtilCommunityAPI {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
|
||||||
|
|
||||||
|
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
|
||||||
|
throws IOException {
|
||||||
|
if (singleCommunity)
|
||||||
|
return getMap(Arrays.asList(getCommunity(communityId)));
|
||||||
|
return getMap(getValidCommunities());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private CommunityMap getMap(List<CommunityModel> communities) {
|
||||||
|
final CommunityMap map = new CommunityMap();
|
||||||
|
communities.forEach(c -> map.put(c.getId(), c.getName()));
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getCommunityCsv(List<String> comms) {
|
||||||
|
return comms.stream().map(c -> {
|
||||||
|
try {
|
||||||
|
CommunityModel community = getCommunity(c);
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append(DHPUtils.md5(community.getId()));
|
||||||
|
builder.append(Constants.SEP);
|
||||||
|
builder.append(community.getName());
|
||||||
|
builder.append(Constants.SEP);
|
||||||
|
builder.append(community.getId());
|
||||||
|
builder.append(Constants.SEP);
|
||||||
|
builder
|
||||||
|
.append(
|
||||||
|
community.getDescription());
|
||||||
|
return builder.toString();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}).collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<CommunityModel> getValidCommunities() throws IOException {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
return mapper
|
||||||
|
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
community -> community.getStatus().equals("all") &&
|
||||||
|
(community.getType().equals("ri") || community.getType().equals("community")))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private CommunityModel getCommunity(String id) throws IOException {
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
return mapper
|
||||||
|
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<ContextInfo> getContextInformation(final Consumer<ContextInfo> consumer) throws IOException {
|
||||||
|
List<ContextInfo> ret = new ArrayList<>();
|
||||||
|
getValidCommunities()
|
||||||
|
.forEach(c -> {
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
cinfo.setId(c.getId());
|
||||||
|
cinfo.setDescription(c.getDescription());
|
||||||
|
CommunityModel cm =null;
|
||||||
|
try {
|
||||||
|
cm = getCommunity(c.getId());
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
cinfo.setSubject(cm.getSubject());
|
||||||
|
cinfo.setZenodocommunity(c.getZenodoCOmmunity());
|
||||||
|
cinfo.setType(c.getType());
|
||||||
|
ret.add(cinfo);
|
||||||
|
});
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getContextRelation(final Consumer<ContextInfo> consumer) throws IOException {
|
||||||
|
getValidCommunities().forEach(c -> {
|
||||||
|
ContextInfo cinfo = new ContextInfo();
|
||||||
|
cinfo.setId(c.getId());
|
||||||
|
cinfo.setDatasourceList( getDatasourceList(c.getId()));
|
||||||
|
cinfo.setProjectList(getProjectList(c.getId()));
|
||||||
|
consumer.accept(cinfo);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getDatasourceList(String id) {
|
||||||
|
List<String> datasourceList = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
|
||||||
|
new ObjectMapper().readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
|
||||||
|
DatasourceList.class)
|
||||||
|
.stream()
|
||||||
|
.forEach(ds ->{
|
||||||
|
if(Optional.ofNullable(ds.getOpenaireId()).isPresent()){
|
||||||
|
|
||||||
|
datasourceList.add(ds.getOpenaireId());
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
return datasourceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getProjectList( String id) {
|
||||||
|
int page = -1;
|
||||||
|
int size = 100;
|
||||||
|
ContentModel cm = null;;
|
||||||
|
ArrayList<String> projectList = new ArrayList<>();
|
||||||
|
do {
|
||||||
|
page++;
|
||||||
|
try {
|
||||||
|
cm = new ObjectMapper()
|
||||||
|
.readValue(
|
||||||
|
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityProjects(
|
||||||
|
id, String.valueOf(page), String.valueOf(size)),
|
||||||
|
ContentModel.class);
|
||||||
|
if (cm.getContent().size() > 0) {
|
||||||
|
cm.getContent().forEach(p -> {
|
||||||
|
if(Optional.ofNullable(p.getOpenaireId()).isPresent())
|
||||||
|
projectList.add(p.getOpenaireId());
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
} while (!cm.getLast());
|
||||||
|
|
||||||
|
return projectList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* it returns for each organization the list of associated communities
|
||||||
|
*/
|
||||||
|
public CommunityEntityMap getCommunityOrganization() throws IOException {
|
||||||
|
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
||||||
|
getValidCommunities()
|
||||||
|
.forEach(community -> {
|
||||||
|
String id = community.getId();
|
||||||
|
try {
|
||||||
|
List<String> associatedOrgs = MAPPER
|
||||||
|
.readValue(
|
||||||
|
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
|
||||||
|
associatedOrgs.forEach(o -> {
|
||||||
|
if (!organizationMap
|
||||||
|
.keySet()
|
||||||
|
.contains(o))
|
||||||
|
organizationMap.put(o, new ArrayList<>());
|
||||||
|
organizationMap.get(o).add(community.getId());
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return organizationMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -9,6 +9,7 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
@ -22,7 +23,6 @@ import org.slf4j.LoggerFactory;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
|
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
|
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
|
||||||
|
@ -52,13 +52,11 @@ public class CreateContextEntities implements Serializable {
|
||||||
final String hdfsNameNode = parser.get("nameNode");
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
log.info("nameNode: {}", hdfsNameNode);
|
log.info("nameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
|
||||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
|
||||||
|
|
||||||
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
|
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
|
||||||
|
|
||||||
log.info("Processing contexts...");
|
log.info("Processing contexts...");
|
||||||
cce.execute(Process::getEntity, isLookUpUrl);
|
cce.execute(Process::getEntity);
|
||||||
|
|
||||||
cce.close();
|
cce.close();
|
||||||
|
|
||||||
|
@ -87,11 +85,10 @@ public class CreateContextEntities implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
|
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer)
|
||||||
throws ISLookUpException {
|
throws IOException {
|
||||||
|
|
||||||
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
UtilCommunityAPI queryInformationSystem = new UtilCommunityAPI();
|
||||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
|
||||||
|
|
||||||
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
|
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));
|
||||||
|
|
||||||
|
|
|
@ -10,9 +10,9 @@ import java.util.Optional;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -25,11 +25,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
|
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB;
|
|
||||||
import eu.dnetlib.dhp.oa.model.graph.*;
|
import eu.dnetlib.dhp.oa.model.graph.*;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
|
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
|
||||||
|
@ -39,11 +35,6 @@ public class CreateContextRelation implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
|
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
|
||||||
private final transient Configuration conf;
|
private final transient Configuration conf;
|
||||||
private final transient BufferedWriter writer;
|
private final transient BufferedWriter writer;
|
||||||
private final transient QueryInformationSystem queryInformationSystem;
|
|
||||||
|
|
||||||
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
|
|
||||||
private static final String CONTEX_RELATION_PROJECT = "projects";
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
|
@ -68,27 +59,17 @@ public class CreateContextRelation implements Serializable {
|
||||||
final String hdfsNameNode = parser.get("nameNode");
|
final String hdfsNameNode = parser.get("nameNode");
|
||||||
log.info("hdfsNameNode: {}", hdfsNameNode);
|
log.info("hdfsNameNode: {}", hdfsNameNode);
|
||||||
|
|
||||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
|
||||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
|
||||||
|
|
||||||
final String masterDuplicatePath = parser.get("masterDuplicate");
|
final String masterDuplicatePath = parser.get("masterDuplicate");
|
||||||
log.info("masterDuplicatePath: {}", masterDuplicatePath);
|
log.info("masterDuplicatePath: {}", masterDuplicatePath);
|
||||||
|
|
||||||
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
|
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode);
|
||||||
|
|
||||||
final List<MasterDuplicate> masterDuplicateList = cce.readMasterDuplicate(masterDuplicatePath);
|
|
||||||
|
|
||||||
log.info("Creating relation for datasource...");
|
log.info("Creating relation for datasources and projects...");
|
||||||
cce
|
cce
|
||||||
.execute(
|
.execute(
|
||||||
Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class),
|
Process::getRelation);
|
||||||
masterDuplicateList);
|
|
||||||
|
|
||||||
log.info("Creating relations for projects... ");
|
|
||||||
cce
|
|
||||||
.execute(
|
|
||||||
Process::getRelation, CONTEX_RELATION_PROJECT,
|
|
||||||
ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
|
|
||||||
|
|
||||||
cce.close();
|
cce.close();
|
||||||
|
|
||||||
|
@ -112,15 +93,11 @@ public class CreateContextRelation implements Serializable {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
|
public CreateContextRelation(String hdfsPath, String hdfsNameNode)
|
||||||
throws IOException, ISLookUpException {
|
throws IOException{
|
||||||
this.conf = new Configuration();
|
this.conf = new Configuration();
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
queryInformationSystem = new QueryInformationSystem();
|
|
||||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
|
||||||
queryInformationSystem.execContextRelationQuery();
|
|
||||||
|
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
@ -134,17 +111,13 @@ public class CreateContextRelation implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
|
|
||||||
|
|
||||||
execute(producer, category, prefix, null);
|
public void execute(final Function<ContextInfo, List<Relation>> producer) throws IOException {
|
||||||
}
|
|
||||||
|
|
||||||
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix,
|
|
||||||
List<MasterDuplicate> masterDuplicateList) {
|
|
||||||
|
|
||||||
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
|
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
|
||||||
|
|
||||||
queryInformationSystem.getContextRelation(consumer, category, prefix, masterDuplicateList);
|
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
|
||||||
|
queryCommunityAPI.getContextRelation(consumer);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void writeEntity(final Relation r) {
|
protected void writeEntity(final Relation r) {
|
||||||
|
|
|
@ -1,246 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.complete;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.function.Consumer;
|
|
||||||
|
|
||||||
import org.dom4j.Document;
|
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.dom4j.Element;
|
|
||||||
import org.dom4j.Node;
|
|
||||||
import org.dom4j.io.SAXReader;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class QueryInformationSystem {
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
|
|
||||||
private ISLookUpService isLookUp;
|
|
||||||
private List<String> contextRelationResult;
|
|
||||||
|
|
||||||
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
||||||
+
|
|
||||||
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
|
||||||
" and $x//context/param[./@name = 'status']/text() = 'all' " +
|
|
||||||
" return " +
|
|
||||||
"$x//context";
|
|
||||||
|
|
||||||
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
|
||||||
+
|
|
||||||
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
|
|
||||||
+
|
|
||||||
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
|
|
||||||
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
|
|
||||||
+
|
|
||||||
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
|
|
||||||
|
|
||||||
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
|
|
||||||
|
|
||||||
isLookUp
|
|
||||||
.quickSearchProfile(XQUERY_ENTITY)
|
|
||||||
.forEach(c -> {
|
|
||||||
ContextInfo cinfo = new ContextInfo();
|
|
||||||
String[] cSplit = c.split("@@");
|
|
||||||
cinfo.setId(cSplit[0]);
|
|
||||||
cinfo.setName(cSplit[1]);
|
|
||||||
log.info("community name : {}", cSplit[1]);
|
|
||||||
cinfo.setDescription(cSplit[2]);
|
|
||||||
if (!cSplit[3].trim().equals("")) {
|
|
||||||
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
|
|
||||||
}
|
|
||||||
cinfo.setZenodocommunity(cSplit[4]);
|
|
||||||
cinfo.setType(cSplit[5]);
|
|
||||||
consumer.accept(cinfo);
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<ContextInfo> getContextInformation() throws ISLookUpException {
|
|
||||||
List<ContextInfo> ret = new ArrayList<>();
|
|
||||||
isLookUp
|
|
||||||
.quickSearchProfile(XQUERY_ENTITY)
|
|
||||||
.forEach(c -> {
|
|
||||||
ContextInfo cinfo = new ContextInfo();
|
|
||||||
String[] cSplit = c.split("@@");
|
|
||||||
cinfo.setId(cSplit[0]);
|
|
||||||
cinfo.setName(cSplit[1]);
|
|
||||||
cinfo.setDescription(cSplit[2]);
|
|
||||||
if (!cSplit[3].trim().equals("")) {
|
|
||||||
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
|
|
||||||
}
|
|
||||||
cinfo.setZenodocommunity(cSplit[4]);
|
|
||||||
cinfo.setType(cSplit[5]);
|
|
||||||
ret.add(cinfo);
|
|
||||||
});
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getContextRelationResult() {
|
|
||||||
return contextRelationResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setContextRelationResult(List<String> contextRelationResult) {
|
|
||||||
this.contextRelationResult = contextRelationResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ISLookUpService getIsLookUp() {
|
|
||||||
return isLookUp;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setIsLookUp(ISLookUpService isLookUpService) {
|
|
||||||
this.isLookUp = isLookUpService;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void execContextRelationQuery() throws ISLookUpException {
|
|
||||||
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
|
|
||||||
getContextRelation(consumer, category, prefix, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix,
|
|
||||||
List<MasterDuplicate> masterDuplicateList) {
|
|
||||||
|
|
||||||
contextRelationResult.forEach(xml -> {
|
|
||||||
ContextInfo cinfo = new ContextInfo();
|
|
||||||
final Document doc;
|
|
||||||
|
|
||||||
try {
|
|
||||||
final SAXReader reader = new SAXReader();
|
|
||||||
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
|
||||||
doc = reader.read(new StringReader(xml));
|
|
||||||
Element root = doc.getRootElement();
|
|
||||||
cinfo.setId(root.attributeValue("id"));
|
|
||||||
|
|
||||||
Iterator<Element> it = root.elementIterator();
|
|
||||||
while (it.hasNext()) {
|
|
||||||
Element el = it.next();
|
|
||||||
if (el.getName().equals("category")) {
|
|
||||||
String categoryId = el.attributeValue("id");
|
|
||||||
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
|
|
||||||
if (categoryId.equals(category)) {
|
|
||||||
cinfo.setDatasourceList(getCategoryList(el, prefix, masterDuplicateList));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
consumer.accept(cinfo);
|
|
||||||
} catch (DocumentException | SAXException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
private List<String> getCategoryList(Element el, String prefix, List<MasterDuplicate> masterDuplicateList) {
|
|
||||||
List<String> datasourceList = new ArrayList<>();
|
|
||||||
for (Object node : el.selectNodes(".//concept")) {
|
|
||||||
String oid = getOpenaireId((Node) node, prefix);
|
|
||||||
if (oid != null)
|
|
||||||
if (masterDuplicateList == null)
|
|
||||||
datasourceList.add(oid);
|
|
||||||
else
|
|
||||||
datasourceList.add(getMaster(oid, masterDuplicateList));
|
|
||||||
}
|
|
||||||
|
|
||||||
return datasourceList;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getMaster(String oid, List<MasterDuplicate> masterDuplicateList) {
|
|
||||||
for (MasterDuplicate md : masterDuplicateList) {
|
|
||||||
if (md.getDuplicate().equals(oid))
|
|
||||||
return md.getMaster();
|
|
||||||
}
|
|
||||||
return oid;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getOpenaireId(Node el, String prefix) {
|
|
||||||
for (Object node : el.selectNodes(".//param")) {
|
|
||||||
Node n = (Node) node;
|
|
||||||
if (n.valueOf("./@name").equals("openaireId")) {
|
|
||||||
String id = n.getText();
|
|
||||||
if (id.startsWith(prefix + "|"))
|
|
||||||
return id;
|
|
||||||
return prefix + "|" + id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return makeOpenaireId(el, prefix);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String makeOpenaireId(Node el, String prefix) {
|
|
||||||
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
String funder = "";
|
|
||||||
String grantId = null;
|
|
||||||
String funding = null;
|
|
||||||
for (Object node : el.selectNodes(".//param")) {
|
|
||||||
Node n = (Node) node;
|
|
||||||
switch (n.valueOf("./@name")) {
|
|
||||||
case "funding":
|
|
||||||
funding = n.getText();
|
|
||||||
break;
|
|
||||||
case "funder":
|
|
||||||
funder = n.getText();
|
|
||||||
break;
|
|
||||||
case "CD_PROJECT_NUMBER":
|
|
||||||
grantId = n.getText();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
String nsp = null;
|
|
||||||
|
|
||||||
switch (funder.toLowerCase()) {
|
|
||||||
case "ec":
|
|
||||||
if (funding == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (funding.toLowerCase().contains("h2020")) {
|
|
||||||
nsp = "corda__h2020::";
|
|
||||||
} else if (funding.toLowerCase().contains("he")) {
|
|
||||||
nsp = "corda_____he::";
|
|
||||||
} else {
|
|
||||||
nsp = "corda_______::";
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case "tubitak":
|
|
||||||
nsp = "tubitakf____::";
|
|
||||||
break;
|
|
||||||
case "dfg":
|
|
||||||
nsp = "dfgf________::";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
StringBuilder bld = new StringBuilder();
|
|
||||||
bld.append(funder.toLowerCase());
|
|
||||||
for (int i = funder.length(); i < 12; i++)
|
|
||||||
bld.append("_");
|
|
||||||
bld.append("::");
|
|
||||||
nsp = bld.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
return prefix + "|" + nsp + DHPUtils.md5(grantId);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -10,6 +10,8 @@ import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -58,8 +60,9 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final OrganizationMap organizationMap = new Gson()
|
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
|
||||||
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
|
final CommunityEntityMap organizationMap = queryCommunityAPI.getCommunityOrganization();
|
||||||
|
|
||||||
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
|
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
|
||||||
log.info("organization map : {}", serializedOrganizationMap);
|
log.info("organization map : {}", serializedOrganizationMap);
|
||||||
|
|
||||||
|
@ -79,7 +82,7 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
|
private static void extractRelation(SparkSession spark, String inputPath, CommunityEntityMap organizationMap,
|
||||||
String outputPath, String communityMapPath) {
|
String outputPath, String communityMapPath) {
|
||||||
|
|
||||||
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
|
||||||
|
@ -129,7 +132,7 @@ public class SparkOrganizationRelation implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@NotNull
|
@NotNull
|
||||||
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
|
private static Consumer<MergedRels> getMergedRelsConsumer(CommunityEntityMap organizationMap,
|
||||||
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, CommunityMap communityMap) {
|
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, CommunityMap communityMap) {
|
||||||
return mergedRels -> {
|
return mergedRels -> {
|
||||||
String oId = mergedRels.getOrganizationId();
|
String oId = mergedRels.getOrganizationId();
|
||||||
|
|
|
@ -17,14 +17,11 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.dom4j.DocumentException;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.QueryCommunityAPI;
|
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
|
@ -37,7 +34,7 @@ public class DumpCommunities implements Serializable {
|
||||||
private final BufferedWriter writer;
|
private final BufferedWriter writer;
|
||||||
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
|
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
|
||||||
+ " description \n";
|
+ " description \n";
|
||||||
private final transient QueryCommunityAPI queryCommunityAPI;
|
private final transient UtilCommunityAPI queryCommunityAPI;
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
|
@ -57,7 +54,7 @@ public class DumpCommunities implements Serializable {
|
||||||
|
|
||||||
final List<String> communities = Arrays.asList(split(parser.get("communities"), ";"));
|
final List<String> communities = Arrays.asList(split(parser.get("communities"), ";"));
|
||||||
|
|
||||||
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl"));
|
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode);
|
||||||
|
|
||||||
dc.writeCommunity(communities);
|
dc.writeCommunity(communities);
|
||||||
|
|
||||||
|
@ -79,9 +76,9 @@ public class DumpCommunities implements Serializable {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception {
|
public DumpCommunities(String hdfsPath, String hdfsNameNode) throws Exception {
|
||||||
final Configuration conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
queryCommunityAPI = new QueryCommunityAPI();
|
queryCommunityAPI = new UtilCommunityAPI();
|
||||||
|
|
||||||
conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
FileSystem fileSystem = FileSystem.get(conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
|
@ -88,7 +88,6 @@
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
</java>
|
</java>
|
||||||
<ok to="find_results_for_country"/>
|
<ok to="find_results_for_country"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -81,7 +81,6 @@
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities</main-class>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/community</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/community</arg>
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
<arg>--communities</arg><arg>${communities}</arg>
|
<arg>--communities</arg><arg>${communities}</arg>
|
||||||
</java>
|
</java>
|
||||||
<ok to="select_result_dump_relation"/>
|
<ok to="select_result_dump_relation"/>
|
||||||
|
@ -143,7 +142,6 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
|
||||||
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
||||||
<arg>--resultType</arg><arg>publication</arg>
|
<arg>--resultType</arg><arg>publication</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
@ -169,7 +167,6 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
|
||||||
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
||||||
<arg>--resultType</arg><arg>dataset</arg>
|
<arg>--resultType</arg><arg>dataset</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
@ -195,7 +192,6 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
|
||||||
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
||||||
<arg>--resultType</arg><arg>otherresearchproduct</arg>
|
<arg>--resultType</arg><arg>otherresearchproduct</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
@ -221,7 +217,6 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
|
||||||
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
||||||
<arg>--resultType</arg><arg>software</arg>
|
<arg>--resultType</arg><arg>software</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
@ -252,9 +247,7 @@
|
||||||
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
|
||||||
|
|
||||||
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -1,12 +1,6 @@
|
||||||
|
|
||||||
[
|
[
|
||||||
|
|
||||||
{
|
|
||||||
"paramName":"is",
|
|
||||||
"paramLongName":"isLookUpUrl",
|
|
||||||
"paramDescription": "URL of the isLookUp Service",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName":"nn",
|
"paramName":"nn",
|
||||||
"paramLongName":"nameNode",
|
"paramLongName":"nameNode",
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
[
|
[
|
||||||
|
|
||||||
{
|
|
||||||
"paramName":"is",
|
|
||||||
"paramLongName":"isLookUpUrl",
|
|
||||||
"paramDescription": "URL of the isLookUp Service",
|
|
||||||
"paramRequired": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName": "hdfs",
|
"paramName": "hdfs",
|
||||||
"paramLongName": "hdfsPath",
|
"paramLongName": "hdfsPath",
|
||||||
|
@ -17,12 +12,8 @@
|
||||||
"paramLongName": "nameNode",
|
"paramLongName": "nameNode",
|
||||||
"paramDescription": "the name node",
|
"paramDescription": "the name node",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},{
|
|
||||||
"paramName": "md",
|
|
||||||
"paramLongName": "masterDuplicate",
|
|
||||||
"paramDescription": "the master duplicate path for datasource deduplication",
|
|
||||||
"paramRequired": false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
[
|
[
|
||||||
|
|
||||||
{
|
|
||||||
"paramName":"ocm",
|
|
||||||
"paramLongName":"organizationCommunityMap",
|
|
||||||
"paramDescription": "the organization community map association",
|
|
||||||
"paramRequired": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"paramName":"s",
|
"paramName":"s",
|
||||||
"paramLongName":"sourcePath",
|
"paramLongName":"sourcePath",
|
||||||
|
|
|
@ -167,7 +167,6 @@
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
<arg>--singleDeposition</arg><arg>${singleDeposition}</arg>
|
<arg>--singleDeposition</arg><arg>${singleDeposition}</arg>
|
||||||
<arg>--communityId</arg><arg>${communityId}</arg>
|
<arg>--communityId</arg><arg>${communityId}</arg>
|
||||||
</java>
|
</java>
|
||||||
|
|
|
@ -85,20 +85,8 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="get_master_duplicate" />
|
<start to="fork_dump" />
|
||||||
|
|
||||||
<action name="get_master_duplicate">
|
|
||||||
<java>
|
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB</main-class>
|
|
||||||
<arg>--hdfsPath</arg><arg>${workingDir}/masterduplicate</arg>
|
|
||||||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
|
||||||
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
|
|
||||||
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
|
|
||||||
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
|
|
||||||
</java>
|
|
||||||
<ok to="fork_dump"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
|
|
||||||
<fork name="fork_dump">
|
<fork name="fork_dump">
|
||||||
<path start="dump_publication"/>
|
<path start="dump_publication"/>
|
||||||
|
@ -349,7 +337,6 @@
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
|
||||||
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/community_infrastructure.json.gz</arg>
|
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/community_infrastructure.json.gz</arg>
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
</java>
|
</java>
|
||||||
<ok to="join_context"/>
|
<ok to="join_context"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -360,8 +347,6 @@
|
||||||
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
|
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
|
||||||
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
|
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
|
||||||
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
<arg>--nameNode</arg><arg>${nameNode}</arg>
|
||||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
|
||||||
<arg>--masterDuplicate</arg><arg>${workingDir}/masterduplicate</arg>
|
|
||||||
</java>
|
</java>
|
||||||
<ok to="join_context"/>
|
<ok to="join_context"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -386,7 +371,6 @@
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
|
||||||
<arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
|
<arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
|
||||||
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
|
|
||||||
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="join_context"/>
|
<ok to="join_context"/>
|
||||||
|
|
|
@ -1,11 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump;
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
import static org.mockito.Mockito.lenient;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
@ -13,24 +9,22 @@ import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.extension.ExtendWith;
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.mockito.Mock;
|
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
class QueryInformationSystemTest {
|
class QueryInformationSystemTest {
|
||||||
|
|
||||||
private QueryCommunityAPI queryInformationSystem;
|
private UtilCommunityAPI queryInformationSystem;
|
||||||
|
|
||||||
private Map<String, String> map;
|
private Map<String, String> map;
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() throws ISLookUpException, DocumentException, SAXException, IOException {
|
public void setUp() throws ISLookUpException, DocumentException, SAXException, IOException {
|
||||||
|
|
||||||
queryInformationSystem = new QueryCommunityAPI();
|
queryInformationSystem = new UtilCommunityAPI();
|
||||||
map = queryInformationSystem.getCommunityMap(false, null);
|
map = queryInformationSystem.getCommunityMap(false, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue