From 5121cbaf6aa76a3ede5ffc1a28f285eb4d0a6d88 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 9 Jun 2020 15:37:46 +0200 Subject: [PATCH] new classes for external dump. Only classes functional to dump products --- .../dhp/schema/dump/oaf/AccessRight.java | 4 + .../dhp/schema/dump/oaf/Container.java | 120 +++++++ .../dnetlib/dhp/schema/dump/oaf/Context.java | 10 +- .../dhp/schema/dump/oaf/ControlledField.java | 11 + .../dnetlib/dhp/schema/dump/oaf/Dataset.java | 2 +- .../schema/dump/oaf/ExternalReference.java | 26 +- .../dnetlib/dhp/schema/dump/oaf/KeyValue.java | 6 + .../eu/dnetlib/dhp/schema/dump/oaf/Oaf.java | 8 + .../dhp/schema/dump/oaf/OafEntity.java | 15 +- .../schema/dump/oaf/OtherResearchProduct.java | 2 +- .../dnetlib/dhp/schema/dump/oaf/Projects.java | 67 ++++ .../dhp/schema/dump/oaf/Publication.java | 17 + .../dhp/schema/dump/oaf/Qualifier.java | 40 +-- .../dnetlib/dhp/schema/dump/oaf/Result.java | 28 +- .../dnetlib/dhp/schema/dump/oaf/Software.java | 4 +- .../dhp/oa/graph/dump/CommunityMap.java | 4 + .../dnetlib/dhp/oa/graph/dump/Constants.java | 4 + .../eu/dnetlib/dhp/oa/graph/dump/Mapper.java | 4 + .../oa/graph/dump/QueryInformationSystem.java | 54 +++ .../dhp/oa/graph/dump/ResultProject.java | 4 + .../dump/SparkDumpCommunityProducts.java | 122 +++++++ .../graph/dump/SparkPrepareResultProject.java | 4 + .../oa/graph/dump/SparkSplitForCommunity.java | 4 + .../oa/graph/dump/SparkUpdateProjectInfo.java | 105 ++++++ .../eu/dnetlib/dhp/oa/graph/dump/Utils.java | 4 + .../dhp/oa/graph/dump/input_parameters.json | 20 ++ .../graph/dump/oozie_app/config-default.xml | 26 ++ .../dhp/oa/graph/dump/oozie_app/workflow.xml | 336 ++++++++++++++++++ .../graph/dump/project_input_parameters.json | 48 +++ .../dump/project_prepare_parameters.json | 20 ++ .../dhp/oa/graph/dump/split_parameters.json | 48 +++ 31 files changed, 1094 insertions(+), 73 deletions(-) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Projects.java create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Publication.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/CommunityMap.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Mapper.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultProject.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkDumpCommunityProducts.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkPrepareResultProject.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkUpdateProjectInfo.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prepare_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java index cf3eb43f1..381cb8b64 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/AccessRight.java @@ -1,5 +1,7 @@ package eu.dnetlib.dhp.schema.dump.oaf; +import com.fasterxml.jackson.annotation.JsonProperty; + public class AccessRight extends Qualifier{ private String schema; @@ -11,4 +13,6 @@ public class AccessRight extends Qualifier{ public void setSchema(String schema) { this.schema = schema; } + + } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java new file mode 100644 index 000000000..6bca44fa3 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Container.java @@ -0,0 +1,120 @@ +package eu.dnetlib.dhp.schema.dump.oaf; + +import java.io.Serializable; +import java.util.Objects; + +public class Container implements Serializable { + + private String name; + + private String issnPrinted; + + private String issnOnline; + + private String issnLinking; + + private String ep; + + private String iss; + + private String sp; + + private String vol; + + private String edition; + + private String conferenceplace; + + private String conferencedate; + + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getIssnPrinted() { + return issnPrinted; + } + + public void setIssnPrinted(String issnPrinted) { + this.issnPrinted = issnPrinted; + } + + public String getIssnOnline() { + return issnOnline; + } + + public void setIssnOnline(String issnOnline) { + this.issnOnline = issnOnline; + } + + public String getIssnLinking() { + return issnLinking; + } + + public void setIssnLinking(String issnLinking) { + this.issnLinking = issnLinking; + } + + public String getEp() { + return ep; + } + + public void setEp(String ep) { + this.ep = ep; + } + + public String getIss() { + return iss; + } + + public void setIss(String iss) { + this.iss = iss; + } + + public String getSp() { + return sp; + } + + public void setSp(String sp) { + this.sp = sp; + } + + public String getVol() { + return vol; + } + + public void setVol(String vol) { + this.vol = vol; + } + + public String getEdition() { + return edition; + } + + public void setEdition(String edition) { + this.edition = edition; + } + + public String getConferenceplace() { + return conferenceplace; + } + + public void setConferenceplace(String conferenceplace) { + this.conferenceplace = conferenceplace; + } + + public String getConferencedate() { + return conferencedate; + } + + public void setConferencedate(String conferencedate) { + this.conferencedate = conferencedate; + } + + +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Context.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Context.java index 883d3e8e6..3d389a295 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Context.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Context.java @@ -2,14 +2,16 @@ package eu.dnetlib.dhp.schema.dump.oaf; -public class Context extends Qualifier { - private String provenance; +import java.util.List; - public String getProvenance() { +public class Context extends Qualifier { + private List provenance; + + public List getProvenance() { return provenance; } - public void setProvenance(String provenance) { + public void setProvenance(List provenance) { this.provenance = provenance; } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java index c77cc60b8..334a5a460 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ControlledField.java @@ -1,5 +1,7 @@ package eu.dnetlib.dhp.schema.dump.oaf; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; + import java.io.Serializable; public class ControlledField implements Serializable { @@ -21,4 +23,13 @@ public class ControlledField implements Serializable { public void setValue(String value) { this.value = value; } + + public static ControlledField newInstance(StructuredProperty pid){ + ControlledField cf = new ControlledField(); + + cf.scheme = pid.getQualifier().getClassid(); + cf.value = pid.getValue(); + + return cf; + } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Dataset.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Dataset.java index 0a1864dd6..88b7ce8c3 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Dataset.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Dataset.java @@ -17,7 +17,7 @@ public class Dataset extends Result implements Serializable { private List geolocation; public Dataset() { - setResulttype(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); + setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname()); } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ExternalReference.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ExternalReference.java index d1cdb049c..ac9f211ad 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ExternalReference.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/ExternalReference.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.schema.dump.oaf; +import eu.dnetlib.dhp.schema.oaf.ExtraInfo; + import java.io.Serializable; import java.util.Objects; //ExtraInfo @@ -56,22 +58,14 @@ public class ExternalReference implements Serializable { this.value = value; } - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - ExternalReference extraInfo = (ExternalReference) o; - return Objects.equals(name, extraInfo.name) - && Objects.equals(typology, extraInfo.typology) - && Objects.equals(provenance, extraInfo.provenance) - && Objects.equals(trust, extraInfo.trust) - && Objects.equals(value, extraInfo.value); - } + public static ExternalReference newInstance(ExtraInfo ei){ + ExternalReference er = new ExternalReference(); - @Override - public int hashCode() { - return Objects.hash(name, typology, provenance, trust, value); + er.name = ei.getName(); + er.typology = ei.getTypology(); + er.provenance = ei.getProvenance(); + er.trust = ei.getTrust(); + er.value = ei.getValue(); + return er; } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java index d3785e340..17fb3d75e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/KeyValue.java @@ -31,6 +31,12 @@ public class KeyValue implements Serializable { } + public static KeyValue newInstance(String key, String value){ + KeyValue inst = new KeyValue(); + inst.key = key; + inst.value = value; + return inst; + } @JsonIgnore public boolean isBlank() { diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Oaf.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Oaf.java index 362a1f2ca..108bfd847 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Oaf.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Oaf.java @@ -5,6 +5,7 @@ package eu.dnetlib.dhp.schema.dump.oaf; import java.io.Serializable; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; public abstract class Oaf implements Serializable { @@ -32,4 +33,11 @@ public abstract class Oaf implements Serializable { this.lastupdatetimestamp = lastupdatetimestamp; } +// public void setAllowedValues(eu.dnetlib.dhp.schema.oaf.Oaf o){ +// collectedfrom = o.getCollectedfrom().stream().map(cf -> KeyValue.newInstance(cf)).collect(Collectors.toList()); +// +// lastupdatetimestamp = o.getLastupdatetimestamp(); +// +// } + } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OafEntity.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OafEntity.java index b109286ca..0284692f1 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OafEntity.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OafEntity.java @@ -6,6 +6,7 @@ import java.io.Serializable; import java.util.List; + public abstract class OafEntity extends Oaf implements Serializable { private String id; @@ -16,9 +17,7 @@ public abstract class OafEntity extends Oaf implements Serializable { private String dateofcollection; - - private List externalReferences; //extraInfo - + private List projects; public String getId() { return id; @@ -52,11 +51,13 @@ public abstract class OafEntity extends Oaf implements Serializable { this.dateofcollection = dateofcollection; } - public List getExternalReferences() { - return externalReferences; + public List getProjects() { + return projects; } - public void setExternalReferences(List externalReferences) { - this.externalReferences = externalReferences; + public void setProjects(List projects) { + this.projects = projects; } + + } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OtherResearchProduct.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OtherResearchProduct.java index 29c01d00c..ae55902b1 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OtherResearchProduct.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/OtherResearchProduct.java @@ -16,7 +16,7 @@ public class OtherResearchProduct extends Result implements Serializable { private List tool; public OtherResearchProduct() { - setResulttype(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); + setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); } public List getContactperson() { diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Projects.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Projects.java new file mode 100644 index 000000000..09e7565c8 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Projects.java @@ -0,0 +1,67 @@ +package eu.dnetlib.dhp.schema.dump.oaf; + +import eu.dnetlib.dhp.schema.oaf.Project; + +import java.util.List; + +public class Projects { + + private String id ;//OpenAIRE id + private String code; + + private String acronym; + + private String title; + + private List funding_tree; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getAcronym() { + return acronym; + } + + public void setAcronym(String acronym) { + this.acronym = acronym; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public List getFunding_tree() { + return funding_tree; + } + + public void setFunding_tree(List funding_tree) { + this.funding_tree = funding_tree; + } + + public static Projects newInstance(String id, String code, String acronym, String title, List funding_tree){ + Projects projects = new Projects(); + projects.setAcronym(acronym); + projects.setCode(code); + projects.setFunding_tree(funding_tree); + projects.setId(id); + projects.setTitle(title); + return projects; + } +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Publication.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Publication.java new file mode 100644 index 000000000..0cf481372 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Publication.java @@ -0,0 +1,17 @@ + +package eu.dnetlib.dhp.schema.dump.oaf; + +import eu.dnetlib.dhp.schema.common.ModelConstants; + + +import java.io.Serializable; + +public class Publication extends Result implements Serializable { + + + public Publication() { + setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname()); + } + + +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java index b06c45eec..260ea2ca2 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Qualifier.java @@ -27,40 +27,10 @@ public class Qualifier implements Serializable { this.label = label; } - public String toComparableString() { - return isBlank() - ? "" - : String - .format( - "%s::%s::%s::%s", - code != null ? code : "", - label != null ? label : ""); - - } - - @JsonIgnore - public boolean isBlank() { - return StringUtils.isBlank(code) - && StringUtils.isBlank(label); - - } - - @Override - public int hashCode() { - return toComparableString().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - Qualifier other = (Qualifier) obj; - - return toComparableString().equals(other.toComparableString()); + public static Qualifier newInstance(String code, String value){ + Qualifier qualifier = new Qualifier(); + qualifier.setCode(code); + qualifier.setLabel(value); + return qualifier; } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java index d82e8e0ee..3bb9aa940 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Result.java @@ -24,7 +24,6 @@ public class Result extends OafEntity implements Serializable { private String subtitle; - private List description; private String publicationdata; // dateofacceptance; @@ -49,21 +48,32 @@ public class Result extends OafEntity implements Serializable { private List instance; + private Container container;//Journal + public List getAuthor() { return author; } + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public Container getContainer() { + return container; + } + + public void setContainer(Container container) { + this.container = container; + } + public void setAuthor(List author) { this.author = author; } - public String getResulttype() { - return type; - } - - public void setResulttype(String resulttype) { - this.type = resulttype; - } public Qualifier getLanguage() { return language; @@ -200,4 +210,6 @@ public class Result extends OafEntity implements Serializable { public void setInstance(List instance) { this.instance = instance; } + + } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Software.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Software.java index fba6d02f0..0d85f5ef9 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Software.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Software.java @@ -16,7 +16,7 @@ public class Software extends Result implements Serializable { private String programmingLanguage; public Software() { - setResulttype(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); + setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname()); } public List getDocumentationUrl() { @@ -42,4 +42,6 @@ public class Software extends Result implements Serializable { public void setProgrammingLanguage(String programmingLanguage) { this.programmingLanguage = programmingLanguage; } + + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/CommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/CommunityMap.java new file mode 100644 index 000000000..8b184f7dc --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/CommunityMap.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +public class CommunityMap { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java new file mode 100644 index 000000000..a37147aef --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +public class Constants { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Mapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Mapper.java new file mode 100644 index 000000000..346881568 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Mapper.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +public class Mapper { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java new file mode 100644 index 000000000..9014bd99d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -0,0 +1,54 @@ + +package eu.dnetlib.dhp.oa.graph.dump; + +import com.google.common.base.Joiner; +import com.google.common.collect.Maps; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Element; +import org.dom4j.Node; +import org.dom4j.io.SAXReader; + +import java.io.StringReader; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class PrepareCommunityMap { + private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " + + " return " + + " " + + "{$x//CONFIGURATION/context/@id}" + + "{$x//CONFIGURATION/context/@label}" + + ""; + + public static Map getCommunityMap(final String isLookupUrl) + throws ISLookUpException, DocumentException { + ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); + final List res = isLookUp.quickSearchProfile(XQUERY); + + final Map communityMap = new HashMap<>(); + + res.stream().forEach(xml -> { + final Document doc; + try { + doc = new SAXReader().read(new StringReader(xml)); + Element root = doc.getRootElement(); + communityMap.put(root.attribute("id").getValue(), root.attribute("label").getValue()); + } catch (DocumentException e) { + e.printStackTrace(); + } + + + }); + + return communityMap; + + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultProject.java new file mode 100644 index 000000000..8dc354972 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultProject.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +public class ResultProject { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkDumpCommunityProducts.java new file mode 100644 index 000000000..0e84358a1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkDumpCommunityProducts.java @@ -0,0 +1,122 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.oaf.Context; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Result; + + + +public class DumpCommunityProducts implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(DumpCommunityProducts.class); + + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + DumpCommunityProducts.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); + + final String dumpClassName = parser.get("dumpClassName"); + log.info("dumpClassName: {}", dumpClassName); + + final String isLookUpUrl = parser.get("isLookUpUrl"); + log.info("isLookUpUrl: {}", isLookUpUrl); + + final String resultType = parser.get("resultType"); + log.info("resultType: {}", resultType); + + Class inputClazz = (Class) Class.forName(resultClassName); + Class dumpClazz = + (Class) Class.forName(dumpClassName); + + SparkConf conf = new SparkConf(); + + Map + communityMap = QueryInformationSystem.getCommunityMap(isLookUpUrl); + + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + execDump(spark, inputPath, outputPath + "/" + resultType, communityMap, inputClazz, dumpClazz); + }); + } + + private static void execDump( + SparkSession spark, + String inputPath, + String outputPath, + Map communityMap, + Class inputClazz, + Class dumpClazz) { + + Set communities = communityMap.keySet(); + Dataset tmp = Utils.readPath(spark, inputPath, inputClazz); + tmp.map(value -> { + Optional> inputContext = Optional.ofNullable(value.getContext()); + if(!inputContext.isPresent()){ + return null; + } + List toDumpFor = inputContext.get().stream().map(c -> { + if (communities.contains(c.getId())) { + return c.getId(); + } + return null; + }).filter(Objects::nonNull).collect(Collectors.toList()); + if(toDumpFor.size() == 0){ + return null; + } + return Mapper.map(value, communityMap); + },Encoders.bean(dumpClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .json(outputPath); + + } + + +} + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkPrepareResultProject.java new file mode 100644 index 000000000..2626cecd9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkPrepareResultProject.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +public class SparkPrepareResultProject { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java new file mode 100644 index 000000000..ec1c23a2e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +public class SparkSplitForCommunity { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkUpdateProjectInfo.java new file mode 100644 index 000000000..31fb951fe --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkUpdateProjectInfo.java @@ -0,0 +1,105 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +import eu.dnetlib.dhp.schema.dump.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.MapGroupsFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.Map; +import java.util.Optional; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +public class UpdateProjectInfo implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(UpdateProjectInfo.class); + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + UpdateProjectInfo.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String resultClassName = parser.get("resultTableName"); + log.info("resultTableName: {}", resultClassName); + + final String resultType = parser.get("resultType"); + log.info("resultType: {}", resultType); + + Class inputClazz = (Class) Class.forName(resultClassName); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + Utils.removeOutputDir(spark, outputPath); + extend(spark, inputPath, outputPath , resultType, inputClazz); + }); + } + + private static void extend( + SparkSession spark, + String inputPath, + String outputPath, + String resultType, + Class inputClazz) { + + Dataset result = Utils.readPath(spark, inputPath + "/" + resultType, inputClazz); + Dataset relation = Utils.readPath(spark, inputPath + "/relation", Relation.class) + .filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); + Dataset project = Utils.readPath(spark,inputPath + "/project", Project.class); + relation.joinWith(project, relation.col("source").equalTo(project.col("id"))) + result.joinWith(relation, result.col("id").equalTo(relation.col("target")), "left") + .groupByKey( + (MapFunction, String>) p -> p._1().getId(), + Encoders.STRING()) + .mapGroups((MapGroupsFunction, R>)(c, it) -> { + Tuple2 first = it.next(); + + + }, Encoders.bean(inputClazz)); + .mapGroups((MapGroupsFunction) (s, it) -> { + Project first = it.next(); + it.forEachRemaining(p -> { + first.mergeFrom(p); + }); + return first; + } + + + + } + + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java new file mode 100644 index 000000000..eae3194c5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -0,0 +1,4 @@ +package eu.dnetlib.dhp.oa.graph.dump; + +public class Utils { +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json new file mode 100644 index 000000000..82714d973 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml new file mode 100644 index 000000000..9608732ed --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/config-default.xml @@ -0,0 +1,26 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml new file mode 100644 index 000000000..8566d7667 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/oozie_app/workflow.xml @@ -0,0 +1,336 @@ + + + + + inputPath + the source path + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + hive.metastore.uris + ${hiveMetastoreUris} + + + ${hiveJdbcUrl}/${hiveDbName} + + hiveDbName=${hiveDbName} + + + + + + + + + + + + + + + + + + + yarn + cluster + Import table publication + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/publication + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Publication + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table dataset + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/dataset + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Dataset + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table otherresearchproduct + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/otherresearchproduct + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table software + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/software + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Software + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table datasource + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/datasource + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Datasource + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table organization + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/organization + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Organization + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table project + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/project + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Project + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + yarn + cluster + Import table project + eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --inputPath${inputPath}/relation + --hiveDbName${hiveDbName} + --classNameeu.dnetlib.dhp.schema.oaf.Relation + --hiveMetastoreUris${hiveMetastoreUris} + + + + + + + + + + + + hive.metastore.uris + ${hiveMetastoreUris} + + + ${hiveJdbcUrl}/${hiveDbName} + + hiveDbName=${hiveDbName} + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json new file mode 100644 index 000000000..04f3c9e1b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json @@ -0,0 +1,48 @@ + +[ + { + "paramName":"is", + "paramLongName":"isLookUpUrl", + "paramDescription": "URL of the isLookUp Service", + "paramRequired": true + }, + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"dn", + "paramLongName":"dumpTableName", + "paramDescription": "the name of the corresondent dump element ", + "paramRequired": true + }, + { + "paramName":"rt", + "paramLongName":"resultType", + "paramDescription": "the name of the corresondent dump element ", + "paramRequired": true + } +] + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prepare_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prepare_parameters.json new file mode 100644 index 000000000..7d995f39a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/project_prepare_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "s", + "paramLongName": "sourcePath", + "paramDescription": "the source path", + "paramRequired": true + }, + { + "paramName": "g", + "paramLongName": "graphRawPath", + "paramDescription": "the path of the graph Raw in hdfs", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json new file mode 100644 index 000000000..04f3c9e1b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json @@ -0,0 +1,48 @@ + +[ + { + "paramName":"is", + "paramLongName":"isLookUpUrl", + "paramDescription": "URL of the isLookUp Service", + "paramRequired": true + }, + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store temporary output files", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + }, + { + "paramName":"dn", + "paramLongName":"dumpTableName", + "paramDescription": "the name of the corresondent dump element ", + "paramRequired": true + }, + { + "paramName":"rt", + "paramLongName":"resultType", + "paramDescription": "the name of the corresondent dump element ", + "paramRequired": true + } +] + + +