removing interaction with the IS. Using communityAPIs instead

This commit is contained in:
Miriam Baglioni 2023-10-30 14:28:55 +01:00
parent e91636817c
commit 818bb4b11c
21 changed files with 303 additions and 449 deletions

View File

@ -37,6 +37,11 @@
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<scope>compile</scope>
</dependency>
</dependencies>

View File

@ -40,6 +40,22 @@ public class QueryCommunityAPI {
}
public static String communityDatasource(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
}
public static String communityPropagationOrganization(String id) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
}
public static String communityProjects(String id, String page, String size) throws IOException {
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
}
private static String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@ -19,6 +20,26 @@ public class CommunityModel implements Serializable {
private String type;
private List<String> subject;
private String zenodoCOmmunity;
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
public String getZenodoCOmmunity() {
return zenodoCOmmunity;
}
public void setZenodoCOmmunity(String zenodoCOmmunity) {
this.zenodoCOmmunity = zenodoCOmmunity;
}
public String getType() {
return type;
}

View File

@ -14,6 +14,27 @@ public class ProjectModel implements Serializable {
private String openaireId;
private String funder;
private String gratId;
public String getFunder() {
return funder;
}
public void setFunder(String funder) {
this.funder = funder;
}
public String getGratId() {
return gratId;
}
public void setGratId(String gratId) {
this.gratId = gratId;
}
public String getOpenaireId() {
return openaireId;
}

View File

@ -67,6 +67,12 @@
<artifactId>classgraph</artifactId>
<version>4.8.71</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>

View File

@ -1,78 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.communityapi.model.CommunityModel;
import eu.dnetlib.dhp.communityapi.model.CommunitySummary;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.utils.DHPUtils;
public class QueryCommunityAPI {
private static final Logger log = LoggerFactory.getLogger(QueryCommunityAPI.class);
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws IOException {
if (singleCommunity)
return getMap(Arrays.asList(getCommunity(communityId)));
return getMap(getValidCommunities());
}
private CommunityMap getMap(List<CommunityModel> communities) {
final CommunityMap map = new CommunityMap();
communities.forEach(c -> map.put(c.getId(), c.getName()));
return map;
}
public List<String> getCommunityCsv(List<String> comms) {
return comms.stream().map(c -> {
try {
CommunityModel community = getCommunity(c);
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(community.getId()));
builder.append(Constants.SEP);
builder.append(community.getName());
builder.append(Constants.SEP);
builder.append(community.getId());
builder.append(Constants.SEP);
builder
.append(
community.getDescription());
return builder.toString();
} catch (IOException e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList());
}
private List<CommunityModel> getValidCommunities() throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
.stream()
.filter(
community -> community.getStatus().equals("all") &&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
private CommunityModel getCommunity(String id) throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
}
}

View File

@ -13,13 +13,10 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the
@ -31,11 +28,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
private final transient QueryCommunityAPI queryInformationSystem;
private final transient UtilCommunityAPI queryInformationSystem;
private final transient BufferedWriter writer;
public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException {
public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException {
final Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
@ -45,7 +42,7 @@ public class SaveCommunityMap implements Serializable {
fileSystem.delete(hdfsWritePath, true);
}
queryInformationSystem = new QueryCommunityAPI();
queryInformationSystem = new UtilCommunityAPI();
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
@ -67,9 +64,6 @@ public class SaveCommunityMap implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final Boolean singleCommunity = Optional
.ofNullable(parser.get("singleDeposition"))
.map(Boolean::valueOf)
@ -77,7 +71,7 @@ public class SaveCommunityMap implements Serializable {
final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null);
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl);
final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode);
scm.saveCommunityMap(singleCommunity, community_id);

View File

@ -0,0 +1,196 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.communityapi.model.*;
import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.utils.DHPUtils;
import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER;
public class UtilCommunityAPI {
private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class);
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws IOException {
if (singleCommunity)
return getMap(Arrays.asList(getCommunity(communityId)));
return getMap(getValidCommunities());
}
private CommunityMap getMap(List<CommunityModel> communities) {
final CommunityMap map = new CommunityMap();
communities.forEach(c -> map.put(c.getId(), c.getName()));
return map;
}
public List<String> getCommunityCsv(List<String> comms) {
return comms.stream().map(c -> {
try {
CommunityModel community = getCommunity(c);
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(community.getId()));
builder.append(Constants.SEP);
builder.append(community.getName());
builder.append(Constants.SEP);
builder.append(community.getId());
builder.append(Constants.SEP);
builder
.append(
community.getDescription());
return builder.toString();
} catch (IOException e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList());
}
private List<CommunityModel> getValidCommunities() throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
.stream()
.filter(
community -> community.getStatus().equals("all") &&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
private CommunityModel getCommunity(String id) throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
}
public List<ContextInfo> getContextInformation(final Consumer<ContextInfo> consumer) throws IOException {
List<ContextInfo> ret = new ArrayList<>();
getValidCommunities()
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
cinfo.setId(c.getId());
cinfo.setDescription(c.getDescription());
CommunityModel cm =null;
try {
cm = getCommunity(c.getId());
} catch (IOException e) {
throw new RuntimeException(e);
}
cinfo.setSubject(cm.getSubject());
cinfo.setZenodocommunity(c.getZenodoCOmmunity());
cinfo.setType(c.getType());
ret.add(cinfo);
});
return ret;
}
public void getContextRelation(final Consumer<ContextInfo> consumer) throws IOException {
getValidCommunities().forEach(c -> {
ContextInfo cinfo = new ContextInfo();
cinfo.setId(c.getId());
cinfo.setDatasourceList( getDatasourceList(c.getId()));
cinfo.setProjectList(getProjectList(c.getId()));
consumer.accept(cinfo);
});
}
private List<String> getDatasourceList(String id) {
List<String> datasourceList = new ArrayList<>();
try {
new ObjectMapper().readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id),
DatasourceList.class)
.stream()
.forEach(ds ->{
if(Optional.ofNullable(ds.getOpenaireId()).isPresent()){
datasourceList.add(ds.getOpenaireId());
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
return datasourceList;
}
private List<String> getProjectList( String id) {
int page = -1;
int size = 100;
ContentModel cm = null;;
ArrayList<String> projectList = new ArrayList<>();
do {
page++;
try {
cm = new ObjectMapper()
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityProjects(
id, String.valueOf(page), String.valueOf(size)),
ContentModel.class);
if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> {
if(Optional.ofNullable(p.getOpenaireId()).isPresent())
projectList.add(p.getOpenaireId());
});
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} while (!cm.getLast());
return projectList;
}
/**
* it returns for each organization the list of associated communities
*/
public CommunityEntityMap getCommunityOrganization() throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities()
.forEach(community -> {
String id = community.getId();
try {
List<String> associatedOrgs = MAPPER
.readValue(
eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
associatedOrgs.forEach(o -> {
if (!organizationMap
.keySet()
.contains(o))
organizationMap.put(o, new ArrayList<>());
organizationMap.get(o).add(community.getId());
});
} catch (IOException e) {
throw new RuntimeException(e);
}
});
return organizationMap;
}
}

View File

@ -9,6 +9,7 @@ import java.nio.charset.StandardCharsets;
import java.util.function.Consumer;
import java.util.function.Function;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
@ -22,7 +23,6 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and
@ -52,13 +52,11 @@ public class CreateContextEntities implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("nameNode: {}", hdfsNameNode);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode);
log.info("Processing contexts...");
cce.execute(Process::getEntity, isLookUpUrl);
cce.execute(Process::getEntity);
cce.close();
@ -87,11 +85,10 @@ public class CreateContextEntities implements Serializable {
}
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer, String isLookUpUrl)
throws ISLookUpException {
public <R extends ResearchInitiative> void execute(final Function<ContextInfo, R> producer)
throws IOException {
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
UtilCommunityAPI queryInformationSystem = new UtilCommunityAPI();
final Consumer<ContextInfo> consumer = ci -> writeEntity(producer.apply(ci));

View File

@ -10,9 +10,9 @@ import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Function;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -25,11 +25,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
import eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
/**
* Writes the set of new Relation between the context and datasources. At the moment the relation between the context
@ -39,11 +35,6 @@ public class CreateContextRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class);
private final transient Configuration conf;
private final transient BufferedWriter writer;
private final transient QueryInformationSystem queryInformationSystem;
private static final String CONTEX_RELATION_DATASOURCE = "contentproviders";
private static final String CONTEX_RELATION_PROJECT = "projects";
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
@ -68,27 +59,17 @@ public class CreateContextRelation implements Serializable {
final String hdfsNameNode = parser.get("nameNode");
log.info("hdfsNameNode: {}", hdfsNameNode);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
final String masterDuplicatePath = parser.get("masterDuplicate");
log.info("masterDuplicatePath: {}", masterDuplicatePath);
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl);
final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode);
final List<MasterDuplicate> masterDuplicateList = cce.readMasterDuplicate(masterDuplicatePath);
log.info("Creating relation for datasource...");
log.info("Creating relation for datasources and projects...");
cce
.execute(
Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class),
masterDuplicateList);
log.info("Creating relations for projects... ");
cce
.execute(
Process::getRelation, CONTEX_RELATION_PROJECT,
ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
Process::getRelation);
cce.close();
@ -112,15 +93,11 @@ public class CreateContextRelation implements Serializable {
writer.close();
}
public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl)
throws IOException, ISLookUpException {
public CreateContextRelation(String hdfsPath, String hdfsNameNode)
throws IOException{
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryInformationSystem.execContextRelationQuery();
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
@ -134,17 +111,13 @@ public class CreateContextRelation implements Serializable {
}
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix) {
execute(producer, category, prefix, null);
}
public void execute(final Function<ContextInfo, List<Relation>> producer, String category, String prefix,
List<MasterDuplicate> masterDuplicateList) {
public void execute(final Function<ContextInfo, List<Relation>> producer) throws IOException {
final Consumer<ContextInfo> consumer = ci -> producer.apply(ci).forEach(this::writeEntity);
queryInformationSystem.getContextRelation(consumer, category, prefix, masterDuplicateList);
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
queryCommunityAPI.getContextRelation(consumer);
}
protected void writeEntity(final Relation r) {

View File

@ -1,246 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump.complete;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate;
import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
private ISLookUpService isLookUp;
private List<String> contextRelationResult;
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and $x//context/param[./@name = 'status']/text() = 'all' " +
" return " +
"$x//context";
private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
"where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return "
+
"concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " +
"$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', "
+
"$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)";
public void getContextInformation(final Consumer<ContextInfo> consumer) throws ISLookUpException {
isLookUp
.quickSearchProfile(XQUERY_ENTITY)
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
log.info("community name : {}", cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
}
cinfo.setZenodocommunity(cSplit[4]);
cinfo.setType(cSplit[5]);
consumer.accept(cinfo);
});
}
public List<ContextInfo> getContextInformation() throws ISLookUpException {
List<ContextInfo> ret = new ArrayList<>();
isLookUp
.quickSearchProfile(XQUERY_ENTITY)
.forEach(c -> {
ContextInfo cinfo = new ContextInfo();
String[] cSplit = c.split("@@");
cinfo.setId(cSplit[0]);
cinfo.setName(cSplit[1]);
cinfo.setDescription(cSplit[2]);
if (!cSplit[3].trim().equals("")) {
cinfo.setSubject(Arrays.asList(cSplit[3].split(",")));
}
cinfo.setZenodocommunity(cSplit[4]);
cinfo.setType(cSplit[5]);
ret.add(cinfo);
});
return ret;
}
public List<String> getContextRelationResult() {
return contextRelationResult;
}
public void setContextRelationResult(List<String> contextRelationResult) {
this.contextRelationResult = contextRelationResult;
}
public ISLookUpService getIsLookUp() {
return isLookUp;
}
public void setIsLookUp(ISLookUpService isLookUpService) {
this.isLookUp = isLookUpService;
}
public void execContextRelationQuery() throws ISLookUpException {
contextRelationResult = isLookUp.quickSearchProfile(XQUERY);
}
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix) {
getContextRelation(consumer, category, prefix, null);
}
public void getContextRelation(final Consumer<ContextInfo> consumer, String category, String prefix,
List<MasterDuplicate> masterDuplicateList) {
contextRelationResult.forEach(xml -> {
ContextInfo cinfo = new ContextInfo();
final Document doc;
try {
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
cinfo.setId(root.attributeValue("id"));
Iterator<Element> it = root.elementIterator();
while (it.hasNext()) {
Element el = it.next();
if (el.getName().equals("category")) {
String categoryId = el.attributeValue("id");
categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2);
if (categoryId.equals(category)) {
cinfo.setDatasourceList(getCategoryList(el, prefix, masterDuplicateList));
}
}
}
consumer.accept(cinfo);
} catch (DocumentException | SAXException e) {
e.printStackTrace();
}
});
}
@NotNull
private List<String> getCategoryList(Element el, String prefix, List<MasterDuplicate> masterDuplicateList) {
List<String> datasourceList = new ArrayList<>();
for (Object node : el.selectNodes(".//concept")) {
String oid = getOpenaireId((Node) node, prefix);
if (oid != null)
if (masterDuplicateList == null)
datasourceList.add(oid);
else
datasourceList.add(getMaster(oid, masterDuplicateList));
}
return datasourceList;
}
private String getMaster(String oid, List<MasterDuplicate> masterDuplicateList) {
for (MasterDuplicate md : masterDuplicateList) {
if (md.getDuplicate().equals(oid))
return md.getMaster();
}
return oid;
}
private String getOpenaireId(Node el, String prefix) {
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
if (n.valueOf("./@name").equals("openaireId")) {
String id = n.getText();
if (id.startsWith(prefix + "|"))
return id;
return prefix + "|" + id;
}
}
return makeOpenaireId(el, prefix);
}
private String makeOpenaireId(Node el, String prefix) {
if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) {
return null;
}
String funder = "";
String grantId = null;
String funding = null;
for (Object node : el.selectNodes(".//param")) {
Node n = (Node) node;
switch (n.valueOf("./@name")) {
case "funding":
funding = n.getText();
break;
case "funder":
funder = n.getText();
break;
case "CD_PROJECT_NUMBER":
grantId = n.getText();
break;
default:
break;
}
}
String nsp = null;
switch (funder.toLowerCase()) {
case "ec":
if (funding == null) {
return null;
}
if (funding.toLowerCase().contains("h2020")) {
nsp = "corda__h2020::";
} else if (funding.toLowerCase().contains("he")) {
nsp = "corda_____he::";
} else {
nsp = "corda_______::";
}
break;
case "tubitak":
nsp = "tubitakf____::";
break;
case "dfg":
nsp = "dfgf________::";
break;
default:
StringBuilder bld = new StringBuilder();
bld.append(funder.toLowerCase());
for (int i = funder.length(); i < 12; i++)
bld.append("_");
bld.append("::");
nsp = bld.toString();
}
return prefix + "|" + nsp + DHPUtils.md5(grantId);
}
}

View File

@ -10,6 +10,8 @@ import java.util.Objects;
import java.util.Optional;
import java.util.function.Consumer;
import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
@ -58,8 +60,9 @@ public class SparkOrganizationRelation implements Serializable {
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final OrganizationMap organizationMap = new Gson()
.fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class);
UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI();
final CommunityEntityMap organizationMap = queryCommunityAPI.getCommunityOrganization();
final String serializedOrganizationMap = new Gson().toJson(organizationMap);
log.info("organization map : {}", serializedOrganizationMap);
@ -79,7 +82,7 @@ public class SparkOrganizationRelation implements Serializable {
}
private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap,
private static void extractRelation(SparkSession spark, String inputPath, CommunityEntityMap organizationMap,
String outputPath, String communityMapPath) {
CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
@ -129,7 +132,7 @@ public class SparkOrganizationRelation implements Serializable {
}
@NotNull
private static Consumer<MergedRels> getMergedRelsConsumer(OrganizationMap organizationMap,
private static Consumer<MergedRels> getMergedRelsConsumer(CommunityEntityMap organizationMap,
List<eu.dnetlib.dhp.oa.model.graph.Relation> relList, CommunityMap communityMap) {
return mergedRels -> {
String oId = mergedRels.getOrganizationId();

View File

@ -17,14 +17,11 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.QueryCommunityAPI;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI;
/**
* @author miriam.baglioni
@ -37,7 +34,7 @@ public class DumpCommunities implements Serializable {
private final BufferedWriter writer;
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
+ " description \n";
private final transient QueryCommunityAPI queryCommunityAPI;
private final transient UtilCommunityAPI queryCommunityAPI;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -57,7 +54,7 @@ public class DumpCommunities implements Serializable {
final List<String> communities = Arrays.asList(split(parser.get("communities"), ";"));
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl"));
final DumpCommunities dc = new DumpCommunities(outputPath, nameNode);
dc.writeCommunity(communities);
@ -79,9 +76,9 @@ public class DumpCommunities implements Serializable {
writer.close();
}
public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception {
public DumpCommunities(String hdfsPath, String hdfsNameNode) throws Exception {
final Configuration conf = new Configuration();
queryCommunityAPI = new QueryCommunityAPI();
queryCommunityAPI = new UtilCommunityAPI();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);

View File

@ -88,7 +88,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="find_results_for_country"/>
<error to="Kill"/>

View File

@ -81,7 +81,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities</main-class>
<arg>--outputPath</arg><arg>${outputPath}/community</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</java>
<ok to="select_result_dump_relation"/>
@ -143,7 +142,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>publication</arg>
</spark>
@ -169,7 +167,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>dataset</arg>
</spark>
@ -195,7 +192,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
</spark>
@ -221,7 +217,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>software</arg>
</spark>
@ -252,9 +247,7 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>

View File

@ -1,12 +1,6 @@
[
{
"paramName":"is",
"paramLongName":"isLookUpUrl",
"paramDescription": "URL of the isLookUp Service",
"paramRequired": true
},
{
"paramName":"nn",
"paramLongName":"nameNode",

View File

@ -1,11 +1,6 @@
[
{
"paramName":"is",
"paramLongName":"isLookUpUrl",
"paramDescription": "URL of the isLookUp Service",
"paramRequired": false
},
{
"paramName": "hdfs",
"paramLongName": "hdfsPath",
@ -17,12 +12,8 @@
"paramLongName": "nameNode",
"paramDescription": "the name node",
"paramRequired": true
},{
"paramName": "md",
"paramLongName": "masterDuplicate",
"paramDescription": "the master duplicate path for datasource deduplication",
"paramRequired": false
}
}
]

View File

@ -1,11 +1,6 @@
[
{
"paramName":"ocm",
"paramLongName":"organizationCommunityMap",
"paramDescription": "the organization community map association",
"paramRequired": false
},
{
"paramName":"s",
"paramLongName":"sourcePath",

View File

@ -167,7 +167,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--singleDeposition</arg><arg>${singleDeposition}</arg>
<arg>--communityId</arg><arg>${communityId}</arg>
</java>

View File

@ -85,20 +85,8 @@
</configuration>
</global>
<start to="get_master_duplicate" />
<start to="fork_dump" />
<action name="get_master_duplicate">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/masterduplicate</arg>
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--postgresUrl</arg><arg>${postgresURL}</arg>
<arg>--postgresUser</arg><arg>${postgresUser}</arg>
<arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
</java>
<ok to="fork_dump"/>
<error to="Kill"/>
</action>
<fork name="fork_dump">
<path start="dump_publication"/>
@ -349,7 +337,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/communities_infrastructures/community_infrastructure.json.gz</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
</java>
<ok to="join_context"/>
<error to="Kill"/>
@ -360,8 +347,6 @@
<main-class>eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation</main-class>
<arg>--hdfsPath</arg><arg>${workingDir}/relation/context</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
<arg>--masterDuplicate</arg><arg>${workingDir}/masterduplicate</arg>
</java>
<ok to="join_context"/>
<error to="Kill"/>
@ -386,7 +371,6 @@
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
<arg>--outputPath</arg><arg>${workingDir}/relation/contextOrg</arg>
<arg>--organizationCommunityMap</arg><arg>${organizationCommunityMap}</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
</spark>
<ok to="join_context"/>

View File

@ -1,11 +1,7 @@
package eu.dnetlib.dhp.oa.graph.dump;
import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.dom4j.DocumentException;
@ -13,24 +9,22 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.xml.sax.SAXException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
class QueryInformationSystemTest {
private QueryCommunityAPI queryInformationSystem;
private UtilCommunityAPI queryInformationSystem;
private Map<String, String> map;
@BeforeEach
public void setUp() throws ISLookUpException, DocumentException, SAXException, IOException {
queryInformationSystem = new QueryCommunityAPI();
queryInformationSystem = new UtilCommunityAPI();
map = queryInformationSystem.getCommunityMap(false, null);
}