This commit is contained in:
Miriam Baglioni 2023-10-25 17:32:09 +02:00
parent eb407ba0d3
commit e91636817c
12 changed files with 106 additions and 311 deletions

View File

@ -1,93 +0,0 @@
package eu.dnetlib.dhp.communityapi;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.communityapi.model.CommunitySummary;
/**
* @author miriam.baglioni
* @Date 09/10/23
*/
public class Utils implements Serializable {
private static final Logger log = LoggerFactory.getLogger(Utils.class);
public static List<CommunitySummary> getValidCommunities() throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(QueryCommunityAPI.communities(), CommunitySummary.class)
.stream()
.filter(
community -> community.getStatus().equals("all") &&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
/**
* it returns for each organization the list of associated communities
*/
// public static CommunityEntityMap getCommunityOrganization() throws IOException {
// CommunityEntityMap organizationMap = new CommunityEntityMap();
// getValidCommunities(production)
// .forEach(community -> {
// String id = community.getId();
// try {
// List<String> associatedOrgs = MAPPER
// .readValue(
// QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class);
// associatedOrgs.forEach(o -> {
// if (!organizationMap
// .keySet()
// .contains(
// "20|" + o))
// organizationMap.put("20|" + o, new ArrayList<>());
// organizationMap.get("20|" + o).add(community.getId());
// });
// } catch (IOException e) {
// throw new RuntimeException(e);
// }
// });
//
// return organizationMap;
// }
//
// public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException {
// CommunityEntityMap projectMap = new CommunityEntityMap();
// getValidCommunities(production)
// .forEach(community -> {
// int page = -1;
// int size = 100;
// ContentModel cm = new ContentModel();
// do {
// page++;
// try {
// cm = MAPPER
// .readValue(
// QueryCommunityAPI
// .communityProjects(
// community.getId(), String.valueOf(page), String.valueOf(size), production),
// ContentModel.class);
// if (cm.getContent().size() > 0) {
// cm.getContent().forEach(p -> {
// if (!projectMap.keySet().contains("40|" + p.getOpenaireId()))
// projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>());
// projectMap.get("40|" + p.getOpenaireId()).add(community.getId());
// });
// }
// } catch (IOException e) {
// throw new RuntimeException(e);
// }
// } while (!cm.getLast());
// });
// return projectMap;
// }
}

View File

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.communityapi.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@ -13,7 +12,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@JsonIgnoreProperties(ignoreUnknown = true)
public class CommunityModel implements Serializable {
private String id;
private String label;
private String name;
private String description;
private String status;
@ -44,12 +43,12 @@ public class CommunityModel implements Serializable {
this.id = id;
}
public String getLabel() {
return label;
public String getName() {
return name;
}
public void setLabel(String label) {
this.label = label;
public void setName(String name) {
this.name = name;
}
public String getDescription() {

View File

@ -53,6 +53,11 @@
<artifactId>dump-schema</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>api</artifactId>
<version>1.2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>

View File

@ -0,0 +1,78 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.communityapi.model.CommunityModel;
import eu.dnetlib.dhp.communityapi.model.CommunitySummary;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.utils.DHPUtils;
public class QueryCommunityAPI {
private static final Logger log = LoggerFactory.getLogger(QueryCommunityAPI.class);
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws IOException {
if (singleCommunity)
return getMap(Arrays.asList(getCommunity(communityId)));
return getMap(getValidCommunities());
}
private CommunityMap getMap(List<CommunityModel> communities) {
final CommunityMap map = new CommunityMap();
communities.forEach(c -> map.put(c.getId(), c.getName()));
return map;
}
public List<String> getCommunityCsv(List<String> comms) {
return comms.stream().map(c -> {
try {
CommunityModel community = getCommunity(c);
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(community.getId()));
builder.append(Constants.SEP);
builder.append(community.getName());
builder.append(Constants.SEP);
builder.append(community.getId());
builder.append(Constants.SEP);
builder
.append(
community.getDescription());
return builder.toString();
} catch (IOException e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList());
}
private List<CommunityModel> getValidCommunities() throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class)
.stream()
.filter(
community -> community.getStatus().equals("all") &&
(community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
}
private CommunityModel getCommunity(String id) throws IOException {
ObjectMapper mapper = new ObjectMapper();
return mapper
.readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class);
}
}

View File

@ -1,110 +0,0 @@
package eu.dnetlib.dhp.oa.graph.dump;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.csv.Constants;
import eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities;
import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class QueryInformationSystem {
private ISLookUpService isLookUp;
private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class);
private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and ($x//context/param[./@name = 'status']/text() = 'all') "
+
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and $x//CONFIGURATION/context[./@id=%s] "
+
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
public CommunityMap getCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, DocumentException, SAXException {
if (singleCommunity)
return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'")));
return getMap(isLookUp.quickSearchProfile(XQUERY_ALL));
}
public ISLookUpService getIsLookUp() {
return isLookUp;
}
public void setIsLookUp(ISLookUpService isLookUpService) {
this.isLookUp = isLookUpService;
}
private CommunityMap getMap(List<String> communityMap) throws DocumentException, SAXException {
final CommunityMap map = new CommunityMap();
for (String xml : communityMap) {
final Document doc;
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
}
return map;
}
public List<String> getCommunityCsv(String toString) throws ISLookUpException, SAXException, DocumentException {
List<String> communities = new ArrayList<>();
for (String xml : isLookUp.quickSearchProfile(toString)) {
log.info(xml);
final Document doc;
final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
doc = reader.read(new StringReader(xml));
Element root = doc.getRootElement();
StringBuilder builder = new StringBuilder();
builder.append(DHPUtils.md5(root.attribute("id").getValue()));
builder.append(Constants.SEP);
builder.append(root.attribute("label").getValue());
builder.append(Constants.SEP);
builder.append(root.attribute("id").getValue());
builder.append(Constants.SEP);
builder
.append(
((Node) (root.selectNodes("//description").get(0)))
.getText()
.replace("\n", " ")
.replace("\t", " "));
communities.add(builder.toString());
}
return communities;
}
}

View File

@ -31,7 +31,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class SaveCommunityMap implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class);
private final transient QueryInformationSystem queryInformationSystem;
private final transient QueryCommunityAPI queryInformationSystem;
private final transient BufferedWriter writer;
@ -45,8 +45,7 @@ public class SaveCommunityMap implements Serializable {
fileSystem.delete(hdfsWritePath, true);
}
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryInformationSystem = new QueryCommunityAPI();
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
@ -85,7 +84,7 @@ public class SaveCommunityMap implements Serializable {
}
private void saveCommunityMap(boolean singleCommunity, String communityId)
throws ISLookUpException, IOException, DocumentException, SAXException {
throws IOException {
final String communityMapString = Utils.OBJECT_MAPPER
.writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId));
log.info("communityMap {} ", communityMapString);

View File

@ -8,6 +8,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -21,6 +22,9 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.communityapi.QueryCommunityAPI;
import eu.dnetlib.dhp.communityapi.model.CommunityModel;
import eu.dnetlib.dhp.communityapi.model.CommunitySummary;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Constants;
import eu.dnetlib.dhp.oa.model.Indicator;

View File

@ -11,27 +11,20 @@ import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
import eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.QueryCommunityAPI;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
/**
* @author miriam.baglioni
@ -44,7 +37,7 @@ public class DumpCommunities implements Serializable {
private final BufferedWriter writer;
private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP
+ " description \n";
private final transient QueryInformationSystem queryInformationSystem;
private final transient QueryCommunityAPI queryCommunityAPI;
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
@ -71,24 +64,12 @@ public class DumpCommunities implements Serializable {
}
private void writeCommunity(List<String> communities)
throws IOException, ISLookUpException, DocumentException, SAXException {
throws IOException {
writer.write(HEADER);
writer.flush();
String a = IOUtils
.toString(
DumpCommunities.class
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq"));
final String xquery = String
.format(
a,
communities
.stream()
.map(t -> String.format("$x//CONFIGURATION/context[./@id= '%s']", t))
.collect(Collectors.joining(" or ")));
for (String community : queryInformationSystem
.getCommunityCsv(xquery)) {
for (String community : queryCommunityAPI
.getCommunityCsv(communities)) {
writer
.write(
community);
@ -100,8 +81,7 @@ public class DumpCommunities implements Serializable {
public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception {
final Configuration conf = new Configuration();
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
queryCommunityAPI = new QueryCommunityAPI();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);

View File

@ -1,8 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and ($x//context/param[./@name = 'status']/text() = 'all')
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
</community>

View File

@ -1,11 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and (%s)
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
<description>
{$x//CONFIGURATION/context/param[@name='description']/text()}
</description>
</community>

View File

@ -1,8 +0,0 @@
for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')
where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']
and $x//CONFIGURATION/context[./@id=%s]
return
<community>
{$x//CONFIGURATION/context/@id}
{$x//CONFIGURATION/context/@label}
</community>

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.dump;
import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@ -22,55 +23,14 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
class QueryInformationSystemTest {
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
+
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
" and ($x//context/param[./@name = 'status']/text() = 'all') "
+
" return " +
"<community> " +
"{$x//CONFIGURATION/context/@id}" +
"{$x//CONFIGURATION/context/@label}" +
"</community>";
List<String> communityMap = Arrays
.asList(
"<community id=\"egi\" label=\"EGI Federation\"/>",
"<community id=\"fet-fp7\" label=\"FET FP7\"/>",
"<community id=\"fet-h2020\" label=\"FET H2020\"/>",
"<community id=\"clarin\" label=\"CLARIN\"/>",
"<community id=\"rda\" label=\"Research Data Alliance\"/>",
"<community id=\"ee\" label=\"SDSN - Greece\"/>",
"<community id=\"dh-ch\" label=\"Digital Humanities and Cultural Heritage\"/>",
"<community id=\"fam\" label=\"Fisheries and Aquaculture Management\"/>",
"<community id=\"ni\" label=\"Neuroinformatics\"/>",
"<community id=\"mes\" label=\"European Marine Science\"/>",
"<community id=\"instruct\" label=\"Instruct-ERIC\"/>",
"<community id=\"elixir-gr\" label=\"ELIXIR GR\"/>",
"<community id=\"aginfra\" label=\"Agricultural and Food Sciences\"/>",
"<community id=\"dariah\" label=\"DARIAH EU\"/>",
"<community id=\"risis\" label=\"RISIS\"/>",
"<community id=\"epos\" label=\"EPOS\"/>",
"<community id=\"beopen\" label=\"Transport Research\"/>",
"<community id=\"euromarine\" label=\"EuroMarine\"/>",
"<community id=\"ifremer\" label=\"Ifremer\"/>",
"<community id=\"oa-pg\" label=\"EC Post-Grant Open Access Pilot\"/>",
"<community id=\"science-innovation-policy\" label=\"Science and Innovation Policy Studies\"/>",
"<community id=\"covid-19\" label=\"COVID-19\"/>",
"<community id=\"enermaps\" label=\"Energy Research\"/>");
@Mock
private ISLookUpService isLookUpService;
private QueryInformationSystem queryInformationSystem;
private QueryCommunityAPI queryInformationSystem;
private Map<String, String> map;
@BeforeEach
public void setUp() throws ISLookUpException, DocumentException, SAXException {
lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap);
queryInformationSystem = new QueryInformationSystem();
queryInformationSystem.setIsLookUp(isLookUpService);
public void setUp() throws ISLookUpException, DocumentException, SAXException, IOException {
queryInformationSystem = new QueryCommunityAPI();
map = queryInformationSystem.getCommunityMap(false, null);
}