diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/Utils.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/Utils.java deleted file mode 100644 index 30cb46b..0000000 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/Utils.java +++ /dev/null @@ -1,93 +0,0 @@ - -package eu.dnetlib.dhp.communityapi; - -import java.io.IOException; -import java.io.Serializable; -import java.util.List; -import java.util.stream.Collectors; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.communityapi.model.CommunitySummary; - -/** - * @author miriam.baglioni - * @Date 09/10/23 - */ -public class Utils implements Serializable { - private static final Logger log = LoggerFactory.getLogger(Utils.class); - - public static List getValidCommunities() throws IOException { - ObjectMapper mapper = new ObjectMapper(); - return mapper - .readValue(QueryCommunityAPI.communities(), CommunitySummary.class) - .stream() - .filter( - community -> community.getStatus().equals("all") && - (community.getType().equals("ri") || community.getType().equals("community"))) - .collect(Collectors.toList()); - - } - - /** - * it returns for each organization the list of associated communities - */ -// public static CommunityEntityMap getCommunityOrganization() throws IOException { -// CommunityEntityMap organizationMap = new CommunityEntityMap(); -// getValidCommunities(production) -// .forEach(community -> { -// String id = community.getId(); -// try { -// List associatedOrgs = MAPPER -// .readValue( -// QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class); -// associatedOrgs.forEach(o -> { -// if (!organizationMap -// .keySet() -// .contains( -// "20|" + o)) -// organizationMap.put("20|" + o, new ArrayList<>()); -// organizationMap.get("20|" + o).add(community.getId()); -// }); -// } catch (IOException e) { -// throw new RuntimeException(e); -// } -// }); -// -// return organizationMap; -// } -// -// public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException { -// CommunityEntityMap projectMap = new CommunityEntityMap(); -// getValidCommunities(production) -// .forEach(community -> { -// int page = -1; -// int size = 100; -// ContentModel cm = new ContentModel(); -// do { -// page++; -// try { -// cm = MAPPER -// .readValue( -// QueryCommunityAPI -// .communityProjects( -// community.getId(), String.valueOf(page), String.valueOf(size), production), -// ContentModel.class); -// if (cm.getContent().size() > 0) { -// cm.getContent().forEach(p -> { -// if (!projectMap.keySet().contains("40|" + p.getOpenaireId())) -// projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>()); -// projectMap.get("40|" + p.getOpenaireId()).add(community.getId()); -// }); -// } -// } catch (IOException e) { -// throw new RuntimeException(e); -// } -// } while (!cm.getLast()); -// }); -// return projectMap; -// } -} diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java index 1d86308..ea00a16 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.communityapi.model; import java.io.Serializable; -import java.util.List; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -13,7 +12,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties(ignoreUnknown = true) public class CommunityModel implements Serializable { private String id; - private String label; + private String name; private String description; private String status; @@ -44,12 +43,12 @@ public class CommunityModel implements Serializable { this.id = id; } - public String getLabel() { - return label; + public String getName() { + return name; } - public void setLabel(String label) { - this.label = label; + public void setName(String name) { + this.name = name; } public String getDescription() { diff --git a/dump/pom.xml b/dump/pom.xml index 1600163..3ae18f2 100644 --- a/dump/pom.xml +++ b/dump/pom.xml @@ -53,6 +53,11 @@ dump-schema 1.2.5-SNAPSHOT + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + org.apache.httpcomponents httpclient diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java new file mode 100644 index 0000000..17e8b24 --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java @@ -0,0 +1,78 @@ + +package eu.dnetlib.dhp.oa.graph.dump; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.communityapi.model.CommunityModel; +import eu.dnetlib.dhp.communityapi.model.CommunitySummary; +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; +import eu.dnetlib.dhp.utils.DHPUtils; + +public class QueryCommunityAPI { + + private static final Logger log = LoggerFactory.getLogger(QueryCommunityAPI.class); + + public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) + throws IOException { + if (singleCommunity) + return getMap(Arrays.asList(getCommunity(communityId))); + return getMap(getValidCommunities()); + + } + + private CommunityMap getMap(List communities) { + final CommunityMap map = new CommunityMap(); + communities.forEach(c -> map.put(c.getId(), c.getName())); + return map; + } + + public List getCommunityCsv(List comms) { + return comms.stream().map(c -> { + try { + CommunityModel community = getCommunity(c); + StringBuilder builder = new StringBuilder(); + builder.append(DHPUtils.md5(community.getId())); + builder.append(Constants.SEP); + builder.append(community.getName()); + builder.append(Constants.SEP); + builder.append(community.getId()); + builder.append(Constants.SEP); + builder + .append( + community.getDescription()); + return builder.toString(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + + } + + private List getValidCommunities() throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) + .stream() + .filter( + community -> community.getStatus().equals("all") && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + + } + + private CommunityModel getCommunity(String id) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); + + } +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java deleted file mode 100644 index 8ca73ea..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ /dev/null @@ -1,110 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; - -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; - -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; -import eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - - private ISLookUpService isLookUp; - - private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class); - - private static final String XQUERY_ALL = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and ($x//context/param[./@name = 'status']/text() = 'all') " - + - " return " + - " " + - "{$x//CONFIGURATION/context/@id}" + - "{$x//CONFIGURATION/context/@label}" + - ""; - - private static final String XQUERY_CI = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//CONFIGURATION/context[./@id=%s] " - + - " return " + - " " + - "{$x//CONFIGURATION/context/@id}" + - "{$x//CONFIGURATION/context/@label}" + - ""; - - public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) - throws ISLookUpException, DocumentException, SAXException { - if (singleCommunity) - return getMap(isLookUp.quickSearchProfile(XQUERY_CI.replace("%s", "'" + communityId + "'"))); - return getMap(isLookUp.quickSearchProfile(XQUERY_ALL)); - - } - - public ISLookUpService getIsLookUp() { - return isLookUp; - } - - public void setIsLookUp(ISLookUpService isLookUpService) { - this.isLookUp = isLookUpService; - } - - private CommunityMap getMap(List communityMap) throws DocumentException, SAXException { - final CommunityMap map = new CommunityMap(); - - for (String xml : communityMap) { - final Document doc; - final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - doc = reader.read(new StringReader(xml)); - Element root = doc.getRootElement(); - map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); - } - - return map; - } - - public List getCommunityCsv(String toString) throws ISLookUpException, SAXException, DocumentException { - List communities = new ArrayList<>(); - - for (String xml : isLookUp.quickSearchProfile(toString)) { - log.info(xml); - final Document doc; - final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - doc = reader.read(new StringReader(xml)); - Element root = doc.getRootElement(); - StringBuilder builder = new StringBuilder(); - builder.append(DHPUtils.md5(root.attribute("id").getValue())); - builder.append(Constants.SEP); - builder.append(root.attribute("label").getValue()); - builder.append(Constants.SEP); - builder.append(root.attribute("id").getValue()); - builder.append(Constants.SEP); - builder - .append( - ((Node) (root.selectNodes("//description").get(0))) - .getText() - .replace("\n", " ") - .replace("\t", " ")); - communities.add(builder.toString()); - } - return communities; - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 51f1852..d891a16 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -31,7 +31,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); - private final transient QueryInformationSystem queryInformationSystem; + private final transient QueryCommunityAPI queryInformationSystem; private final transient BufferedWriter writer; @@ -45,8 +45,7 @@ public class SaveCommunityMap implements Serializable { fileSystem.delete(hdfsWritePath, true); } - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + queryInformationSystem = new QueryCommunityAPI(); FSDataOutputStream fos = fileSystem.create(hdfsWritePath); writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); @@ -85,7 +84,7 @@ public class SaveCommunityMap implements Serializable { } private void saveCommunityMap(boolean singleCommunity, String communityId) - throws ISLookUpException, IOException, DocumentException, SAXException { + throws IOException { final String communityMapString = Utils.OBJECT_MAPPER .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); log.info("communityMap {} ", communityMapString); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index d3f41d8..b506505 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -8,6 +8,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -21,6 +22,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.communityapi.QueryCommunityAPI; +import eu.dnetlib.dhp.communityapi.model.CommunityModel; +import eu.dnetlib.dhp.communityapi.model.CommunitySummary; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.oa.graph.dump.complete.Constants; import eu.dnetlib.dhp.oa.model.Indicator; diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java index ebbadaa..b8a3b90 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java @@ -11,27 +11,20 @@ import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem; -import eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.Utils; +import eu.dnetlib.dhp.oa.graph.dump.QueryCommunityAPI; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; /** * @author miriam.baglioni @@ -44,7 +37,7 @@ public class DumpCommunities implements Serializable { private final BufferedWriter writer; private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP + " description \n"; - private final transient QueryInformationSystem queryInformationSystem; + private final transient QueryCommunityAPI queryCommunityAPI; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -71,24 +64,12 @@ public class DumpCommunities implements Serializable { } private void writeCommunity(List communities) - throws IOException, ISLookUpException, DocumentException, SAXException { + throws IOException { writer.write(HEADER); writer.flush(); - String a = IOUtils - .toString( - DumpCommunities.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq")); - final String xquery = String - .format( - a, - communities - .stream() - .map(t -> String.format("$x//CONFIGURATION/context[./@id= '%s']", t)) - .collect(Collectors.joining(" or "))); - - for (String community : queryInformationSystem - .getCommunityCsv(xquery)) { + for (String community : queryCommunityAPI + .getCommunityCsv(communities)) { writer .write( community); @@ -100,8 +81,7 @@ public class DumpCommunities implements Serializable { public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception { final Configuration conf = new Configuration(); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + queryCommunityAPI = new QueryCommunityAPI(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq deleted file mode 100644 index 620955c..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/all_communities.xq +++ /dev/null @@ -1,8 +0,0 @@ -for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') -where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] -and ($x//context/param[./@name = 'status']/text() = 'all') -return - -{$x//CONFIGURATION/context/@id} -{$x//CONFIGURATION/context/@label} - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq deleted file mode 100644 index 7b470ca..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/set_of_communities.xq +++ /dev/null @@ -1,11 +0,0 @@ -for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') -where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] -and (%s) -return - -{$x//CONFIGURATION/context/@id} -{$x//CONFIGURATION/context/@label} - -{$x//CONFIGURATION/context/param[@name='description']/text()} - - \ No newline at end of file diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq deleted file mode 100644 index 4f257a6..0000000 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/xqueries/single_community.xq +++ /dev/null @@ -1,8 +0,0 @@ -for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') -where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] -and $x//CONFIGURATION/context[./@id=%s] -return - -{$x//CONFIGURATION/context/@id} -{$x//CONFIGURATION/context/@label} - \ No newline at end of file diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index 8391c89..9f3cb84 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.dump; import static org.mockito.Mockito.lenient; +import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -22,55 +23,14 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) class QueryInformationSystemTest { - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and ($x//context/param[./@name = 'status']/text() = 'all') " - + - " return " + - " " + - "{$x//CONFIGURATION/context/@id}" + - "{$x//CONFIGURATION/context/@label}" + - ""; - - List communityMap = Arrays - .asList( - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ""); - - @Mock - private ISLookUpService isLookUpService; - - private QueryInformationSystem queryInformationSystem; + private QueryCommunityAPI queryInformationSystem; private Map map; @BeforeEach - public void setUp() throws ISLookUpException, DocumentException, SAXException { - lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(isLookUpService); + public void setUp() throws ISLookUpException, DocumentException, SAXException, IOException { + + queryInformationSystem = new QueryCommunityAPI(); map = queryInformationSystem.getCommunityMap(false, null); }