From 818bb4b11c92cbe9a48f8bb10ca8755731452d65 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 30 Oct 2023 14:28:55 +0100 Subject: [PATCH] removing interaction with the IS. Using communityAPIs instead --- api/pom.xml | 5 + .../dhp/communityapi/QueryCommunityAPI.java | 16 ++ .../communityapi/model/CommunityModel.java | 21 ++ .../dhp/communityapi/model/ProjectModel.java | 21 ++ dump/pom.xml | 6 + .../dhp/oa/graph/dump/QueryCommunityAPI.java | 78 ------ .../dhp/oa/graph/dump/SaveCommunityMap.java | 16 +- .../dhp/oa/graph/dump/UtilCommunityAPI.java | 196 ++++++++++++++ .../dump/complete/CreateContextEntities.java | 13 +- .../dump/complete/CreateContextRelation.java | 45 +--- .../dump/complete/QueryInformationSystem.java | 246 ------------------ .../complete/SparkOrganizationRelation.java | 11 +- .../oa/graph/dump/csv/DumpCommunities.java | 13 +- .../countryresults/oozie_app/workflow.xml | 1 - .../oa/graph/dump/csv/oozie_app/workflow.xml | 7 - .../oa/graph/dump/input_cm_parameters.json | 6 - .../oa/graph/dump/input_entity_parameter.json | 15 +- .../dump/input_organization_parameters.json | 7 +- .../graph/dump/wf/main/oozie_app/workflow.xml | 1 - .../complete/oozie_app/workflow.xml | 18 +- .../dump/QueryInformationSystemTest.java | 10 +- 21 files changed, 303 insertions(+), 449 deletions(-) delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java create mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java delete mode 100644 dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java diff --git a/api/pom.xml b/api/pom.xml index 0109977..7a3e978 100644 --- a/api/pom.xml +++ b/api/pom.xml @@ -37,6 +37,11 @@ dhp-common ${project.version} + + com.fasterxml.jackson.core + jackson-annotations + compile + diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java index a5148ea..0b7cdaf 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/QueryCommunityAPI.java @@ -40,6 +40,22 @@ public class QueryCommunityAPI { } + public static String communityDatasource(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders"); + + } + + public static String communityPropagationOrganization(String id) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations"); + } + + public static String communityProjects(String id, String page, String size) throws IOException { + + return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size); + } + private static String getBody(HttpURLConnection conn) throws IOException { String body = "{}"; try (BufferedReader br = new BufferedReader( diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java index ea00a16..92a1a5e 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/CommunityModel.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.communityapi.model; import java.io.Serializable; +import java.util.List; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -19,6 +20,26 @@ public class CommunityModel implements Serializable { private String type; + private List subject; + + private String zenodoCOmmunity; + + public List getSubject() { + return subject; + } + + public void setSubject(List subject) { + this.subject = subject; + } + + public String getZenodoCOmmunity() { + return zenodoCOmmunity; + } + + public void setZenodoCOmmunity(String zenodoCOmmunity) { + this.zenodoCOmmunity = zenodoCOmmunity; + } + public String getType() { return type; } diff --git a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java index f5aa967..2a816c2 100644 --- a/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java +++ b/api/src/main/java/eu/dnetlib/dhp/communityapi/model/ProjectModel.java @@ -14,6 +14,27 @@ public class ProjectModel implements Serializable { private String openaireId; + private String funder; + + private String gratId; + + + public String getFunder() { + return funder; + } + + public void setFunder(String funder) { + this.funder = funder; + } + + public String getGratId() { + return gratId; + } + + public void setGratId(String gratId) { + this.gratId = gratId; + } + public String getOpenaireId() { return openaireId; } diff --git a/dump/pom.xml b/dump/pom.xml index 3ae18f2..01d29b6 100644 --- a/dump/pom.xml +++ b/dump/pom.xml @@ -67,6 +67,12 @@ classgraph 4.8.71 + + eu.dnetlib.dhp + api + 1.2.5-SNAPSHOT + compile + diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java deleted file mode 100644 index 17e8b24..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryCommunityAPI.java +++ /dev/null @@ -1,78 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.communityapi.model.CommunityModel; -import eu.dnetlib.dhp.communityapi.model.CommunitySummary; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; -import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; -import eu.dnetlib.dhp.utils.DHPUtils; - -public class QueryCommunityAPI { - - private static final Logger log = LoggerFactory.getLogger(QueryCommunityAPI.class); - - public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) - throws IOException { - if (singleCommunity) - return getMap(Arrays.asList(getCommunity(communityId))); - return getMap(getValidCommunities()); - - } - - private CommunityMap getMap(List communities) { - final CommunityMap map = new CommunityMap(); - communities.forEach(c -> map.put(c.getId(), c.getName())); - return map; - } - - public List getCommunityCsv(List comms) { - return comms.stream().map(c -> { - try { - CommunityModel community = getCommunity(c); - StringBuilder builder = new StringBuilder(); - builder.append(DHPUtils.md5(community.getId())); - builder.append(Constants.SEP); - builder.append(community.getName()); - builder.append(Constants.SEP); - builder.append(community.getId()); - builder.append(Constants.SEP); - builder - .append( - community.getDescription()); - return builder.toString(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }).collect(Collectors.toList()); - - } - - private List getValidCommunities() throws IOException { - ObjectMapper mapper = new ObjectMapper(); - return mapper - .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) - .stream() - .filter( - community -> community.getStatus().equals("all") && - (community.getType().equals("ri") || community.getType().equals("community"))) - .collect(Collectors.toList()); - - } - - private CommunityModel getCommunity(String id) throws IOException { - ObjectMapper mapper = new ObjectMapper(); - return mapper - .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); - - } -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index d891a16..e93a2ea 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -13,13 +13,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * This class connects with the IS related to the isLookUpUrl got as parameter. It saves the information about the @@ -31,11 +28,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); - private final transient QueryCommunityAPI queryInformationSystem; + private final transient UtilCommunityAPI queryInformationSystem; private final transient BufferedWriter writer; - public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { + public SaveCommunityMap(String hdfsPath, String hdfsNameNode) throws IOException { final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); @@ -45,7 +42,7 @@ public class SaveCommunityMap implements Serializable { fileSystem.delete(hdfsWritePath, true); } - queryInformationSystem = new QueryCommunityAPI(); + queryInformationSystem = new UtilCommunityAPI(); FSDataOutputStream fos = fileSystem.create(hdfsWritePath); writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); @@ -67,9 +64,6 @@ public class SaveCommunityMap implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); - final Boolean singleCommunity = Optional .ofNullable(parser.get("singleDeposition")) .map(Boolean::valueOf) @@ -77,14 +71,14 @@ public class SaveCommunityMap implements Serializable { final String community_id = Optional.ofNullable(parser.get("communityId")).orElse(null); - final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); + final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode); scm.saveCommunityMap(singleCommunity, community_id); } private void saveCommunityMap(boolean singleCommunity, String communityId) - throws IOException { + throws IOException { final String communityMapString = Utils.OBJECT_MAPPER .writeValueAsString(queryInformationSystem.getCommunityMap(singleCommunity, communityId)); log.info("communityMap {} ", communityMapString); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java new file mode 100644 index 0000000..c352d0d --- /dev/null +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/UtilCommunityAPI.java @@ -0,0 +1,196 @@ + +package eu.dnetlib.dhp.oa.graph.dump; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import eu.dnetlib.dhp.communityapi.model.*; +import eu.dnetlib.dhp.oa.graph.dump.complete.ContextInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + + +import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; +import eu.dnetlib.dhp.oa.graph.dump.csv.Constants; +import eu.dnetlib.dhp.utils.DHPUtils; + +import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; + +public class UtilCommunityAPI { + + private static final Logger log = LoggerFactory.getLogger(UtilCommunityAPI.class); + + public CommunityMap getCommunityMap(boolean singleCommunity, String communityId) + throws IOException { + if (singleCommunity) + return getMap(Arrays.asList(getCommunity(communityId))); + return getMap(getValidCommunities()); + + } + + private CommunityMap getMap(List communities) { + final CommunityMap map = new CommunityMap(); + communities.forEach(c -> map.put(c.getId(), c.getName())); + return map; + } + + public List getCommunityCsv(List comms) { + return comms.stream().map(c -> { + try { + CommunityModel community = getCommunity(c); + StringBuilder builder = new StringBuilder(); + builder.append(DHPUtils.md5(community.getId())); + builder.append(Constants.SEP); + builder.append(community.getName()); + builder.append(Constants.SEP); + builder.append(community.getId()); + builder.append(Constants.SEP); + builder + .append( + community.getDescription()); + return builder.toString(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + + } + + private List getValidCommunities() throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communities(), CommunitySummary.class) + .stream() + .filter( + community -> community.getStatus().equals("all") && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + + } + + private CommunityModel getCommunity(String id) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + return mapper + .readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.community(id), CommunityModel.class); + + } + + + public List getContextInformation(final Consumer consumer) throws IOException { + List ret = new ArrayList<>(); + getValidCommunities() + .forEach(c -> { + ContextInfo cinfo = new ContextInfo(); + cinfo.setId(c.getId()); + cinfo.setDescription(c.getDescription()); + CommunityModel cm =null; + try { + cm = getCommunity(c.getId()); + } catch (IOException e) { + throw new RuntimeException(e); + } + cinfo.setSubject(cm.getSubject()); + cinfo.setZenodocommunity(c.getZenodoCOmmunity()); + cinfo.setType(c.getType()); + ret.add(cinfo); + }); + + return ret; + + } + + public void getContextRelation(final Consumer consumer) throws IOException { + getValidCommunities().forEach(c -> { + ContextInfo cinfo = new ContextInfo(); + cinfo.setId(c.getId()); + cinfo.setDatasourceList( getDatasourceList(c.getId())); + cinfo.setProjectList(getProjectList(c.getId())); + consumer.accept(cinfo); + }); + } + + private List getDatasourceList(String id) { + List datasourceList = new ArrayList<>(); + try { + + new ObjectMapper().readValue(eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityDatasource(id), + DatasourceList.class) + .stream() + .forEach(ds ->{ + if(Optional.ofNullable(ds.getOpenaireId()).isPresent()){ + + datasourceList.add(ds.getOpenaireId()); + } + + }); + + } catch (IOException e) { + throw new RuntimeException(e); + } + return datasourceList; + } + + private List getProjectList( String id) { + int page = -1; + int size = 100; + ContentModel cm = null;; + ArrayList projectList = new ArrayList<>(); + do { + page++; + try { + cm = new ObjectMapper() + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityProjects( + id, String.valueOf(page), String.valueOf(size)), + ContentModel.class); + if (cm.getContent().size() > 0) { + cm.getContent().forEach(p -> { + if(Optional.ofNullable(p.getOpenaireId()).isPresent()) + projectList.add(p.getOpenaireId()); + + }); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + + return projectList; + } + + + /** + * it returns for each organization the list of associated communities + */ + public CommunityEntityMap getCommunityOrganization() throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities() + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue( + eu.dnetlib.dhp.communityapi.QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class); + associatedOrgs.forEach(o -> { + if (!organizationMap + .keySet() + .contains(o)) + organizationMap.put(o, new ArrayList<>()); + organizationMap.get(o).add(community.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + return organizationMap; + } + +} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index 0356bd4..c190ac1 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -9,6 +9,7 @@ import java.nio.charset.StandardCharsets; import java.util.function.Consumer; import java.util.function.Function; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -22,7 +23,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.model.graph.ResearchInitiative; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and @@ -52,13 +52,11 @@ public class CreateContextEntities implements Serializable { final String hdfsNameNode = parser.get("nameNode"); log.info("nameNode: {}", hdfsNameNode); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); final CreateContextEntities cce = new CreateContextEntities(hdfsPath, hdfsNameNode); log.info("Processing contexts..."); - cce.execute(Process::getEntity, isLookUpUrl); + cce.execute(Process::getEntity); cce.close(); @@ -87,11 +85,10 @@ public class CreateContextEntities implements Serializable { } - public void execute(final Function producer, String isLookUpUrl) - throws ISLookUpException { + public void execute(final Function producer) + throws IOException { - QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); + UtilCommunityAPI queryInformationSystem = new UtilCommunityAPI(); final Consumer consumer = ci -> writeEntity(producer.apply(ci)); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 0708fc9..2f9e6ee 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -10,9 +10,9 @@ import java.util.Optional; import java.util.function.Consumer; import java.util.function.Function; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -25,11 +25,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.oa.graph.dump.exceptions.MyRuntimeException; import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate; -import eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB; import eu.dnetlib.dhp.oa.model.graph.*; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes the set of new Relation between the context and datasources. At the moment the relation between the context @@ -39,11 +35,6 @@ public class CreateContextRelation implements Serializable { private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); private final transient Configuration conf; private final transient BufferedWriter writer; - private final transient QueryInformationSystem queryInformationSystem; - - private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; - public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( @@ -68,27 +59,17 @@ public class CreateContextRelation implements Serializable { final String hdfsNameNode = parser.get("nameNode"); log.info("hdfsNameNode: {}", hdfsNameNode); - final String isLookUpUrl = parser.get("isLookUpUrl"); - log.info("isLookUpUrl: {}", isLookUpUrl); final String masterDuplicatePath = parser.get("masterDuplicate"); log.info("masterDuplicatePath: {}", masterDuplicatePath); - final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode, isLookUpUrl); + final CreateContextRelation cce = new CreateContextRelation(hdfsPath, hdfsNameNode); - final List masterDuplicateList = cce.readMasterDuplicate(masterDuplicatePath); - log.info("Creating relation for datasource..."); + log.info("Creating relation for datasources and projects..."); cce .execute( - Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class), - masterDuplicateList); - - log.info("Creating relations for projects... "); - cce - .execute( - Process::getRelation, CONTEX_RELATION_PROJECT, - ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); + Process::getRelation); cce.close(); @@ -112,15 +93,11 @@ public class CreateContextRelation implements Serializable { writer.close(); } - public CreateContextRelation(String hdfsPath, String hdfsNameNode, String isLookUpUrl) - throws IOException, ISLookUpException { + public CreateContextRelation(String hdfsPath, String hdfsNameNode) + throws IOException{ this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); - queryInformationSystem = new QueryInformationSystem(); - queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - queryInformationSystem.execContextRelationQuery(); - FileSystem fileSystem = FileSystem.get(this.conf); Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; @@ -134,17 +111,13 @@ public class CreateContextRelation implements Serializable { } - public void execute(final Function> producer, String category, String prefix) { - execute(producer, category, prefix, null); - } - - public void execute(final Function> producer, String category, String prefix, - List masterDuplicateList) { + public void execute(final Function> producer) throws IOException { final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); - queryInformationSystem.getContextRelation(consumer, category, prefix, masterDuplicateList); + UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI(); + queryCommunityAPI.getContextRelation(consumer); } protected void writeEntity(final Relation r) { diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java deleted file mode 100644 index b982b26..0000000 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ /dev/null @@ -1,246 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.dump.complete; - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.function.Consumer; - -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.Node; -import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; - -import eu.dnetlib.dhp.oa.graph.dump.subset.MasterDuplicate; -import eu.dnetlib.dhp.oa.graph.dump.subset.SparkDumpResult; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -public class QueryInformationSystem { - private static final Logger log = LoggerFactory.getLogger(QueryInformationSystem.class); - private ISLookUpService isLookUp; - private List contextRelationResult; - - private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + - " and $x//context/param[./@name = 'status']/text() = 'all' " + - " return " + - "$x//context"; - - private static final String XQUERY_ENTITY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " - + - "where $x//context[./@type='community' or ./@type = 'ri'] and $x//context/param[./@name = 'status']/text() = 'all' return " - + - "concat(data($x//context/@id) , '@@', $x//context/param[./@name =\"name\"]/text(), '@@', " + - "$x//context/param[./@name=\"description\"]/text(), '@@', $x//context/param[./@name = \"subject\"]/text(), '@@', " - + - "$x//context/param[./@name = \"zenodoCommunity\"]/text(), '@@', $x//context/@type)"; - - public void getContextInformation(final Consumer consumer) throws ISLookUpException { - - isLookUp - .quickSearchProfile(XQUERY_ENTITY) - .forEach(c -> { - ContextInfo cinfo = new ContextInfo(); - String[] cSplit = c.split("@@"); - cinfo.setId(cSplit[0]); - cinfo.setName(cSplit[1]); - log.info("community name : {}", cSplit[1]); - cinfo.setDescription(cSplit[2]); - if (!cSplit[3].trim().equals("")) { - cinfo.setSubject(Arrays.asList(cSplit[3].split(","))); - } - cinfo.setZenodocommunity(cSplit[4]); - cinfo.setType(cSplit[5]); - consumer.accept(cinfo); - }); - - } - - public List getContextInformation() throws ISLookUpException { - List ret = new ArrayList<>(); - isLookUp - .quickSearchProfile(XQUERY_ENTITY) - .forEach(c -> { - ContextInfo cinfo = new ContextInfo(); - String[] cSplit = c.split("@@"); - cinfo.setId(cSplit[0]); - cinfo.setName(cSplit[1]); - cinfo.setDescription(cSplit[2]); - if (!cSplit[3].trim().equals("")) { - cinfo.setSubject(Arrays.asList(cSplit[3].split(","))); - } - cinfo.setZenodocommunity(cSplit[4]); - cinfo.setType(cSplit[5]); - ret.add(cinfo); - }); - - return ret; - - } - - public List getContextRelationResult() { - return contextRelationResult; - } - - public void setContextRelationResult(List contextRelationResult) { - this.contextRelationResult = contextRelationResult; - } - - public ISLookUpService getIsLookUp() { - return isLookUp; - } - - public void setIsLookUp(ISLookUpService isLookUpService) { - this.isLookUp = isLookUpService; - } - - public void execContextRelationQuery() throws ISLookUpException { - contextRelationResult = isLookUp.quickSearchProfile(XQUERY); - - } - - public void getContextRelation(final Consumer consumer, String category, String prefix) { - getContextRelation(consumer, category, prefix, null); - } - - public void getContextRelation(final Consumer consumer, String category, String prefix, - List masterDuplicateList) { - - contextRelationResult.forEach(xml -> { - ContextInfo cinfo = new ContextInfo(); - final Document doc; - - try { - final SAXReader reader = new SAXReader(); - reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - doc = reader.read(new StringReader(xml)); - Element root = doc.getRootElement(); - cinfo.setId(root.attributeValue("id")); - - Iterator it = root.elementIterator(); - while (it.hasNext()) { - Element el = it.next(); - if (el.getName().equals("category")) { - String categoryId = el.attributeValue("id"); - categoryId = categoryId.substring(categoryId.lastIndexOf("::") + 2); - if (categoryId.equals(category)) { - cinfo.setDatasourceList(getCategoryList(el, prefix, masterDuplicateList)); - } - } - - } - consumer.accept(cinfo); - } catch (DocumentException | SAXException e) { - e.printStackTrace(); - } - - }); - - } - - @NotNull - private List getCategoryList(Element el, String prefix, List masterDuplicateList) { - List datasourceList = new ArrayList<>(); - for (Object node : el.selectNodes(".//concept")) { - String oid = getOpenaireId((Node) node, prefix); - if (oid != null) - if (masterDuplicateList == null) - datasourceList.add(oid); - else - datasourceList.add(getMaster(oid, masterDuplicateList)); - } - - return datasourceList; - } - - private String getMaster(String oid, List masterDuplicateList) { - for (MasterDuplicate md : masterDuplicateList) { - if (md.getDuplicate().equals(oid)) - return md.getMaster(); - } - return oid; - } - - private String getOpenaireId(Node el, String prefix) { - for (Object node : el.selectNodes(".//param")) { - Node n = (Node) node; - if (n.valueOf("./@name").equals("openaireId")) { - String id = n.getText(); - if (id.startsWith(prefix + "|")) - return id; - return prefix + "|" + id; - } - } - - return makeOpenaireId(el, prefix); - - } - - private String makeOpenaireId(Node el, String prefix) { - if (!prefix.equals(ModelSupport.entityIdPrefix.get("project"))) { - return null; - } - String funder = ""; - String grantId = null; - String funding = null; - for (Object node : el.selectNodes(".//param")) { - Node n = (Node) node; - switch (n.valueOf("./@name")) { - case "funding": - funding = n.getText(); - break; - case "funder": - funder = n.getText(); - break; - case "CD_PROJECT_NUMBER": - grantId = n.getText(); - break; - default: - break; - } - } - String nsp = null; - - switch (funder.toLowerCase()) { - case "ec": - if (funding == null) { - return null; - } - if (funding.toLowerCase().contains("h2020")) { - nsp = "corda__h2020::"; - } else if (funding.toLowerCase().contains("he")) { - nsp = "corda_____he::"; - } else { - nsp = "corda_______::"; - } - break; - case "tubitak": - nsp = "tubitakf____::"; - break; - case "dfg": - nsp = "dfgf________::"; - break; - default: - StringBuilder bld = new StringBuilder(); - bld.append(funder.toLowerCase()); - for (int i = funder.length(); i < 12; i++) - bld.append("_"); - bld.append("::"); - nsp = bld.toString(); - } - - return prefix + "|" + nsp + DHPUtils.md5(grantId); - } - -} diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 527e324..fab8fb9 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -10,6 +10,8 @@ import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; +import eu.dnetlib.dhp.communityapi.model.CommunityEntityMap; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -58,8 +60,9 @@ public class SparkOrganizationRelation implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final OrganizationMap organizationMap = new Gson() - .fromJson(parser.get("organizationCommunityMap"), OrganizationMap.class); + UtilCommunityAPI queryCommunityAPI = new UtilCommunityAPI(); + final CommunityEntityMap organizationMap = queryCommunityAPI.getCommunityOrganization(); + final String serializedOrganizationMap = new Gson().toJson(organizationMap); log.info("organization map : {}", serializedOrganizationMap); @@ -79,7 +82,7 @@ public class SparkOrganizationRelation implements Serializable { } - private static void extractRelation(SparkSession spark, String inputPath, OrganizationMap organizationMap, + private static void extractRelation(SparkSession spark, String inputPath, CommunityEntityMap organizationMap, String outputPath, String communityMapPath) { CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath); @@ -129,7 +132,7 @@ public class SparkOrganizationRelation implements Serializable { } @NotNull - private static Consumer getMergedRelsConsumer(OrganizationMap organizationMap, + private static Consumer getMergedRelsConsumer(CommunityEntityMap organizationMap, List relList, CommunityMap communityMap) { return mergedRels -> { String oId = mergedRels.getOrganizationId(); diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java index b8a3b90..2231dac 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/csv/DumpCommunities.java @@ -17,14 +17,11 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.QueryCommunityAPI; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.dhp.oa.graph.dump.UtilCommunityAPI; /** * @author miriam.baglioni @@ -37,7 +34,7 @@ public class DumpCommunities implements Serializable { private final BufferedWriter writer; private final static String HEADER = "id" + Constants.SEP + "name" + Constants.SEP + "acronym" + Constants.SEP + " description \n"; - private final transient QueryCommunityAPI queryCommunityAPI; + private final transient UtilCommunityAPI queryCommunityAPI; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -57,7 +54,7 @@ public class DumpCommunities implements Serializable { final List communities = Arrays.asList(split(parser.get("communities"), ";")); - final DumpCommunities dc = new DumpCommunities(outputPath, nameNode, parser.get("isLookUpUrl")); + final DumpCommunities dc = new DumpCommunities(outputPath, nameNode); dc.writeCommunity(communities); @@ -79,9 +76,9 @@ public class DumpCommunities implements Serializable { writer.close(); } - public DumpCommunities(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws Exception { + public DumpCommunities(String hdfsPath, String hdfsNameNode) throws Exception { final Configuration conf = new Configuration(); - queryCommunityAPI = new QueryCommunityAPI(); + queryCommunityAPI = new UtilCommunityAPI(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml index 5d62bd9..a6e68d0 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/countryresults/oozie_app/workflow.xml @@ -88,7 +88,6 @@ eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml index bacf0a3..c29798d 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/csv/oozie_app/workflow.xml @@ -81,7 +81,6 @@ eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities --outputPath${outputPath}/community --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} --communities${communities} @@ -143,7 +142,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --workingPath${outputPath}/workingDir --resultTypepublication @@ -169,7 +167,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --workingPath${outputPath}/workingDir --resultTypedataset @@ -195,7 +192,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --workingPath${outputPath}/workingDir --resultTypeotherresearchproduct @@ -221,7 +217,6 @@ --sourcePath${sourcePath} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --workingPath${outputPath}/workingDir --resultTypesoftware @@ -252,9 +247,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --workingPath${outputPath}/workingDir - --outputPath${outputPath} - diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json index 225819f..31d8619 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_cm_parameters.json @@ -1,12 +1,6 @@ [ - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": true - }, { "paramName":"nn", "paramLongName":"nameNode", diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json index 9946e94..ba359ce 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_entity_parameter.json @@ -1,11 +1,6 @@ [ - { - "paramName":"is", - "paramLongName":"isLookUpUrl", - "paramDescription": "URL of the isLookUp Service", - "paramRequired": false - }, + { "paramName": "hdfs", "paramLongName": "hdfsPath", @@ -17,12 +12,8 @@ "paramLongName": "nameNode", "paramDescription": "the name node", "paramRequired": true - },{ - "paramName": "md", - "paramLongName": "masterDuplicate", - "paramDescription": "the master duplicate path for datasource deduplication", - "paramRequired": false -} + } + ] diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json index c27a923..70abce6 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_organization_parameters.json @@ -1,11 +1,6 @@ [ - { - "paramName":"ocm", - "paramLongName":"organizationCommunityMap", - "paramDescription": "the organization community map association", - "paramRequired": false - }, + { "paramName":"s", "paramLongName":"sourcePath", diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml index 165afad..e17c6ce 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/main/oozie_app/workflow.xml @@ -167,7 +167,6 @@ eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap --outputPath${workingDir}/communityMap --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} --singleDeposition${singleDeposition} --communityId${communityId} diff --git a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml index 4b9983b..9d4350f 100644 --- a/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml +++ b/dump/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/wf/subworkflows/complete/oozie_app/workflow.xml @@ -85,20 +85,8 @@ - + - - - eu.dnetlib.dhp.oa.graph.dump.subset.ReadMasterDuplicateFromDB - --hdfsPath${workingDir}/masterduplicate - --hdfsNameNode${nameNode} - --postgresUrl${postgresURL} - --postgresUser${postgresUser} - --postgresPassword${postgresPassword} - - - - @@ -349,7 +337,6 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextEntities --hdfsPath${outputPath}/communities_infrastructures/community_infrastructure.json.gz --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} @@ -360,8 +347,6 @@ eu.dnetlib.dhp.oa.graph.dump.complete.CreateContextRelation --hdfsPath${workingDir}/relation/context --nameNode${nameNode} - --isLookUpUrl${isLookUpUrl} - --masterDuplicate${workingDir}/masterduplicate @@ -386,7 +371,6 @@ --sourcePath${sourcePath}/relation --outputPath${workingDir}/relation/contextOrg - --organizationCommunityMap${organizationCommunityMap} --communityMapPath${communityMapPath} diff --git a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index 9f3cb84..6fe055b 100644 --- a/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dump/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -1,11 +1,7 @@ package eu.dnetlib.dhp.oa.graph.dump; -import static org.mockito.Mockito.lenient; - import java.io.IOException; -import java.util.Arrays; -import java.util.List; import java.util.Map; import org.dom4j.DocumentException; @@ -13,24 +9,22 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) class QueryInformationSystemTest { - private QueryCommunityAPI queryInformationSystem; + private UtilCommunityAPI queryInformationSystem; private Map map; @BeforeEach public void setUp() throws ISLookUpException, DocumentException, SAXException, IOException { - queryInformationSystem = new QueryCommunityAPI(); + queryInformationSystem = new UtilCommunityAPI(); map = queryInformationSystem.getCommunityMap(false, null); }