[SKG-IF-EOSC] added new indicators field in the product plus the scheme to the denormalized topic in the result. Added provenance classname to the provenance field in the topic in the result

This commit is contained in:
Miriam Baglioni 2024-04-05 12:59:41 +02:00
parent 292c69d819
commit f85db930d9
114 changed files with 154 additions and 8403 deletions

View File

@ -12,8 +12,7 @@ import com.fasterxml.jackson.databind.SerializationFeature;
import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.skgif.model.*;
public class ExecCreateSchemas {
final static String DIRECTORY = "/eu/dnetlib/dhp/schema/dump/jsonschemas/";
@ -61,14 +60,13 @@ public class ExecCreateSchemas {
ExecCreateSchemas ecs = new ExecCreateSchemas();
ecs.init();
ecs.generate(GraphResult.class, DIRECTORY, "result_schema.json");
ecs.generate(ResearchCommunity.class, DIRECTORY, "community_infrastructure_schema.json");
ecs.generate(Datasource.class, DIRECTORY, "datasource_schema.json");
ecs.generate(Project.class, DIRECTORY, "project_schema.json");
ecs.generate(Relation.class, DIRECTORY, "relation_schema.json");
ecs.generate(Organization.class, DIRECTORY, "organization_schema.json");
ecs.generate(CommunityResult.class, DIRECTORY, "community_result_schema.json");
ecs.generate(Venue.class, DIRECTORY, "venue.json");
ecs.generate(Grant.class, DIRECTORY, "grant.json");
ecs.generate(ResearchProduct.class, DIRECTORY, "product.json");
ecs.generate(Persons.class, DIRECTORY, "person.json");
ecs.generate(Topic.class, DIRECTORY, "topic.json");
ecs.generate(Organization.class, DIRECTORY, "organization.json");
ecs.generate(Datasource.class, DIRECTORY, "datasource.json");
}
}

View File

@ -1,29 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
/**
* Used to refer to the Article Processing Charge information. It contains two parameters: -
* currency of type String to store the currency of the APC - amount of type String to stores the charged amount
*/
public class APC implements Serializable {
private String currency;
private String amount;
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public String getAmount() {
return amount;
}
public void setAmount(String amount) {
this.amount = amount;
}
}

View File

@ -1,27 +0,0 @@
package eu.dnetlib.dhp.oa.model;
/**
* AccessRight. Used to represent the result access rights. It extends the eu.dnet.lib.dhp.schema.dump.oaf.BestAccessRight
* element with value for the openaccess route
*/
public class AccessRight extends BestAccessRight {
private OpenAccessRoute openAccessRoute;
public static AccessRight newInstance(String code, String label, String scheme) {
AccessRight ar = new AccessRight();
ar.setCode(code);
ar.setLabel(label);
ar.setScheme(scheme);
return ar;
}
public OpenAccessRoute getOpenAccessRoute() {
return openAccessRoute;
}
public void setOpenAccessRoute(OpenAccessRoute openAccessRoute) {
this.openAccessRoute = openAccessRoute;
}
}

View File

@ -1,44 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class AlternateIdentifier implements Serializable {
@JsonSchema(
description = "The scheme of the identifier. It can be a persistent identifier (i.e. doi). If it is present in the alternate identifiers "
+
"it means it has not been forged by an authority for that pid. For example we collect metadata from an institutional repository that provides "
+
"as identifier for the result also the doi")
private String scheme;
@JsonSchema(description = "The value expressed in the scheme")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static AlternateIdentifier newInstance(String scheme, String value) {
AlternateIdentifier cf = new AlternateIdentifier();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,75 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* Used to represent the generic author of the result. It has six parameters: - name of type String to store the given
* name of the author. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author name - surname of
* type String to store the family name of the author. The value for this parameter corresponds to
* eu.dnetlib.dhp.schema.oaf.Author surname - fullname of type String to store the fullname of the author. The value for
* this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author fullname - rank of type Integer to store the rank on
* the author in the result's authors list. The value for this parameter corresponds to eu.dnetlib.dhp.schema.oaf.Author
* rank - pid of type eu.dnetlib.dhp.schema.dump.oaf.Pid to store the persistent identifier for the author. For the
* moment only ORCID identifiers will be dumped. - The id element is instantiated by using the following values in the
* eu.dnetlib.dhp.schema.oaf.Result pid: * Qualifier.classid for scheme * value for value - The provenance element is
* instantiated only if the dataInfo is set for the pid in the result to be dumped. The provenance element is
* instantiated by using the following values in the eu.dnetlib.dhp.schema.oaf.Result pid: *
* dataInfo.provenanceaction.classname for provenance * dataInfo.trust for trust
*/
public class Author implements Serializable {
private String fullname;
private String name;
private String surname;
private Integer rank;
@JsonSchema(description = "The author's persistent identifiers")
private AuthorPid pid;
public String getFullname() {
return fullname;
}
public void setFullname(String fullname) {
this.fullname = fullname;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSurname() {
return surname;
}
public void setSurname(String surname) {
this.surname = surname;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
public AuthorPid getPid() {
return pid;
}
public void setPid(AuthorPid pid) {
this.pid = pid;
}
}

View File

@ -1,52 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent the generic persistent identifier. It has two parameters:
* - id of type
* eu.dnetlib.dhp.schema.dump.oaf.AuthorPidSchemeValue to store the scheme and value of the Persistent Identifier.
* - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store the provenance and trust of the information
*/
public class AuthorPid implements Serializable {
private AuthorPidSchemeValue id;
@JsonSchema(description = "The reason why the pid was associated to the author")
private Provenance provenance;
public AuthorPidSchemeValue getId() {
return id;
}
public void setId(AuthorPidSchemeValue pid) {
this.id = pid;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public static AuthorPid newInstance(AuthorPidSchemeValue pid, Provenance provenance) {
AuthorPid p = new AuthorPid();
p.id = pid;
p.provenance = provenance;
return p;
}
public static AuthorPid newInstance(AuthorPidSchemeValue pid) {
AuthorPid p = new AuthorPid();
p.id = pid;
return p;
}
}

View File

@ -1,40 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class AuthorPidSchemeValue implements Serializable {
@JsonSchema(description = "The author's pid scheme. OpenAIRE currently supports 'ORCID'")
private String scheme;
@JsonSchema(description = "The author's pid value in that scheme (i.e. 0000-1111-2222-3333)")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static AuthorPidSchemeValue newInstance(String scheme, String value) {
AuthorPidSchemeValue cf = new AuthorPidSchemeValue();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,60 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* BestAccessRight. Used to represent the result best access rights. Values for this element are found against the
* COAR access right scheme. The classid of the element accessright in eu.dnetlib.dhp.schema.oaf.Result is used to get
* the COAR corresponding code whose value will be used to set the code parameter. The COAR label corresponding to the
* COAR code will be used to set the label parameter. The scheme value will always be the one referring to the COAR
* access right scheme
*/
public class BestAccessRight implements Serializable {
@JsonSchema(
description = "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/")
private String code; // the classid in the Qualifier
@JsonSchema(description = "Label for the access mode")
private String label; // the classname in the Qualifier
@JsonSchema(
description = "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/")
private String scheme;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public static BestAccessRight newInstance(String code, String label, String scheme) {
BestAccessRight ar = new BestAccessRight();
ar.code = code;
ar.label = label;
ar.scheme = scheme;
return ar;
}
}

View File

@ -1,143 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To store information about the conference or journal where the result has been presented or published. It contains
* eleven parameters: - name of type String to store the name of the journal or conference. It corresponds to the
* parameter name of eu.dnetlib.dhp.schema.oaf.Journal - issnPrinted ot type String to store the journal printed issn.
* It corresponds to the parameter issnPrinted of eu.dnetlib.dhp.schema.oaf.Journal - issnOnline of type String to store
* the journal online issn. It corresponds to the parameter issnOnline of eu.dnetlib.dhp.schema.oaf.Journal -
* issnLinking of type String to store the journal linking issn. It corresponds to the parameter issnLinking of
* eu.dnetlib.dhp.schema.oaf.Journal - ep of type String to store the end page. It corresponds to the parameter ep of
* eu.dnetlib.dhp.schema.oaf.Journal - iss of type String to store the journal issue. It corresponds to the parameter
* iss of eu.dnetlib.dhp.schema.oaf.Journal - sp of type String to store the start page. It corresponds to the parameter
* sp of eu.dnetlib.dhp.schema.oaf.Journal - vol of type String to store the Volume. It corresponds to the parameter vol
* of eu.dnetlib.dhp.schema.oaf.Journal - edition of type String to store the edition of the journal or conference
* proceeding. It corresponds to the parameter edition of eu.dnetlib.dhp.schema.oaf.Journal - conferenceplace of type
* String to store the place of the conference. It corresponds to the parameter conferenceplace of
* eu.dnetlib.dhp.schema.oaf.Journal - conferencedate of type String to store the date of the conference. It corresponds
* to the parameter conferencedate of eu.dnetlib.dhp.schema.oaf.Journal
*/
public class Container implements Serializable {
@JsonSchema(description = "Name of the journal or conference")
private String name;
private String issnPrinted;
private String issnOnline;
private String issnLinking;
@JsonSchema(description = "End page")
private String ep;
@JsonSchema(description = "Journal issue number")
private String iss;
@JsonSchema(description = "Start page")
private String sp;
@JsonSchema(description = "Volume")
private String vol;
@JsonSchema(description = "Edition of the journal or conference proceeding")
private String edition;
private String conferenceplace;
private String conferencedate;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getIssnPrinted() {
return issnPrinted;
}
public void setIssnPrinted(String issnPrinted) {
this.issnPrinted = issnPrinted;
}
public String getIssnOnline() {
return issnOnline;
}
public void setIssnOnline(String issnOnline) {
this.issnOnline = issnOnline;
}
public String getIssnLinking() {
return issnLinking;
}
public void setIssnLinking(String issnLinking) {
this.issnLinking = issnLinking;
}
public String getEp() {
return ep;
}
public void setEp(String ep) {
this.ep = ep;
}
public String getIss() {
return iss;
}
public void setIss(String iss) {
this.iss = iss;
}
public String getSp() {
return sp;
}
public void setSp(String sp) {
this.sp = sp;
}
public String getVol() {
return vol;
}
public void setVol(String vol) {
this.vol = vol;
}
public String getEdition() {
return edition;
}
public void setEdition(String edition) {
this.edition = edition;
}
public String getConferenceplace() {
return conferenceplace;
}
public void setConferenceplace(String conferenceplace) {
this.conferenceplace = conferenceplace;
}
public String getConferencedate() {
return conferencedate;
}
public void setConferencedate(String conferencedate) {
this.conferencedate = conferencedate;
}
}

View File

@ -1,47 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* Represents the country associated to the generic entity. It extends eu.dnetlib.dhp.schema.dump.oaf.Qualifier with a
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country in not mapped if its value in the
* result reprensented in the internal format is Unknown. The value for this element correspond to: - code corresponds
* to the classid of eu.dnetlib.dhp.schema.oaf.Country - label corresponds to the classname of
* eu.dnetlib.dhp.schema.oaf.Country - provenance set only if the dataInfo associated to the Country of the result to be
* dumped is not null. In this case : - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with
* datainfo.provenanceaction.classname) - trust corresponds to dataInfo.trust
*/
public class Country implements Serializable {
@JsonSchema(description = "ISO 3166-1 alpha-2 country code (i.e. IT)")
private String code; // the classid in the Qualifier
@JsonSchema(description = "The label for that code (i.e. Italy)")
private String label; // the classname in the Qualifier
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public static Country newInstance(String code, String label) {
Country c = new Country();
c.setCode(code);
c.setLabel(label);
return c;
}
}

View File

@ -1,43 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class Funder implements Serializable {
@JsonSchema(description = "The short name of the funder (EC)")
private String shortName;
@JsonSchema(description = "The name of the funder (European Commission)")
private String name;
@JsonSchema(
description = "Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR)")
private String jurisdiction;
public String getJurisdiction() {
return jurisdiction;
}
public void setJurisdiction(String jurisdiction) {
this.jurisdiction = jurisdiction;
}
public String getShortName() {
return shortName;
}
public void setShortName(String shortName) {
this.shortName = shortName;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}

View File

@ -1,53 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
/**
* Represents the geolocation information. It has three parameters: - point of type String to store the point
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation point - box ot type String to store the box
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation box - place of type String to store the place
* information. It corresponds to eu.dnetlib.dhp.schema.oaf.GeoLocation place
*/
public class GeoLocation implements Serializable {
private String point;
private String box;
private String place;
public String getPoint() {
return point;
}
public void setPoint(String point) {
this.point = point;
}
public String getBox() {
return box;
}
public void setBox(String box) {
this.box = box;
}
public String getPlace() {
return place;
}
public void setPlace(String place) {
this.place = place;
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
}
}

View File

@ -1,56 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 07/11/22
*/
public class ImpactIndicators implements Serializable {
Score influence;
Score influence_alt;
Score popularity;
Score popularity_alt;
Score impulse;
public Score getInfluence() {
return influence;
}
public void setInfluence(Score influence) {
this.influence = influence;
}
public Score getInfluence_alt() {
return influence_alt;
}
public void setInfluence_alt(Score influence_alt) {
this.influence_alt = influence_alt;
}
public Score getPopularity() {
return popularity;
}
public void setPopularity(Score popularity) {
this.popularity = popularity;
}
public Score getPopularity_alt() {
return popularity_alt;
}
public void setPopularity_alt(Score popularity_alt) {
this.popularity_alt = popularity_alt;
}
public Score getImpulse() {
return impulse;
}
public void setImpulse(Score impulse) {
this.impulse = impulse;
}
}

View File

@ -1,34 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class Indicator implements Serializable {
@JsonSchema(description = "The impact measures (i.e. popularity)")
List<Score> bipIndicators;
@JsonSchema(description = "The usage counts (i.e. downloads)")
UsageCounts usageCounts;
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Score> getBipIndicators() {
return bipIndicators;
}
public void setBipIndicators(List<Score> bipIndicators) {
this.bipIndicators = bipIndicators;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public UsageCounts getUsageCounts() {
return usageCounts;
}
public void setUsageCounts(UsageCounts usageCounts) {
this.usageCounts = usageCounts;
}
}

View File

@ -1,152 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* Represents the manifestations (i.e. different versions) of the result. For example: the pre-print and the published
* versions are two manifestations of the same research result. It has the following parameters: - license of type
* String to store the license applied to the instance. It corresponds to the value of the licence in the instance to be
* dumped - accessright of type eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store the accessright of the instance. -
* type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
* (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type
* List<String> list of locations where the instance is accessible. It corresponds to url of the instance to be dumped -
* publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type
* String to store information abour the review status of the instance. Possible values are 'Unknown',
* 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped
* - articleprocessingcharge of type APC to store the article processing charges possibly associated to the instance
* -pid of type List<ControlledField> that is the list of pids associated to the result coming from authoritative sources for that pid
* -alternateIdentifier of type List<ControlledField> that is the list of pids associated to the result coming from NON authoritative
* sources for that pid
* -measure list<KeyValue> to represent the measure computed for this instance (for example the Bip!Finder ones). It corresponds to measures in the model
*/
public class Instance implements Serializable {
// @JsonSchema(description = "Indicators computed for this instance, for example Bip!Finder ones")
// private Indicator indicators;
private List<ResultPid> pid;
@JsonSchema(
description = "All the identifiers other than pids forged by an authorithy for the pid type (i.e. Crossref for DOIs")
private List<AlternateIdentifier> alternateIdentifier;
private String license;
@JsonSchema(description = "The accessRights for this materialization of the result")
private AccessRight accessright;
@JsonSchema(
description = "The specific sub-type of this instance (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)")
private String type;
@JsonSchema(
description = "URLs to the instance. They may link to the actual full-text or to the landing page at the hosting source. ")
private List<String> url;
@JsonSchema(
description = "The money spent to make this book or article available in Open Access. Source for this information is the OpenAPC initiative.")
private APC articleprocessingcharge;
@JsonSchema(description = "Date of the research product")
private String publicationdate;// dateofacceptance;
@JsonSchema(
description = "If this instance has been peer-reviewed or not. Allowed values are peerReviewed, " +
"nonPeerReviewed, UNKNOWN (as defined in https://api.openaire.eu/vocabularies/dnet:review_levels)")
private String refereed; // peer-review status
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getLicense() {
return license;
}
public void setLicense(String license) {
this.license = license;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public AccessRight getAccessright() {
return accessright;
}
public void setAccessright(AccessRight accessright) {
this.accessright = accessright;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getUrl() {
return url;
}
public void setUrl(List<String> url) {
this.url = url;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getPublicationdate() {
return publicationdate;
}
public void setPublicationdate(String publicationdate) {
this.publicationdate = publicationdate;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getRefereed() {
return refereed;
}
public void setRefereed(String refereed) {
this.refereed = refereed;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public APC getArticleprocessingcharge() {
return articleprocessingcharge;
}
public void setArticleprocessingcharge(APC articleprocessingcharge) {
this.articleprocessingcharge = articleprocessingcharge;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<ResultPid> getPid() {
return pid;
}
public void setPid(List<ResultPid> pid) {
this.pid = pid;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<AlternateIdentifier> getAlternateIdentifier() {
return alternateIdentifier;
}
public void setAlternateIdentifier(List<AlternateIdentifier> alternateIdentifier) {
this.alternateIdentifier = alternateIdentifier;
}
// @JsonInclude(JsonInclude.Include.NON_NULL)
// public Indicator getIndicators() {
// return indicators;
// }
//
// public void setIndicators(Indicator indicators) {
// this.indicators = indicators;
// }
}

View File

@ -1,38 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class Language implements Serializable {
@JsonSchema(description = "alpha-3/ISO 639-2 code of the language")
private String code; // the classid in the Qualifier
@JsonSchema(description = "Language label in English")
private String label; // the classname in the Qualifier
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public static Language newInstance(String code, String value) {
Language qualifier = new Language();
qualifier.setCode(code);
qualifier.setLabel(value);
return qualifier;
}
}

View File

@ -1,49 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* @author miriam.baglioni
* @Date 03/08/22
*/
public class Measure implements Serializable {
@JsonSchema(description = "The measure (i.e. class)")
private String key;
@JsonSchema(description = "The value for that measure")
private String value;
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static Measure newInstance(String key, String value) {
Measure mes = new Measure();
mes.key = key;
mes.value = value;
return mes;
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
}
}

View File

@ -1,15 +0,0 @@
package eu.dnetlib.dhp.oa.model;
/**
* @author miriam.baglioni
* @Date 19/12/23
*/
/**
* The OpenAccess color meant to be used on the result level
*/
public enum OpenAccessColor {
gold, hybrid, bronze
}

View File

@ -1,13 +0,0 @@
package eu.dnetlib.dhp.oa.model;
/**
* This Enum models the OpenAccess status, currently including only the values from Unpaywall
*
* https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-
*/
public enum OpenAccessRoute {
gold, green, hybrid, bronze
}

View File

@ -1,57 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* This class to store the common information about the project that will be dumped for community and for the whole
* graph - private String id to store the id of the project (OpenAIRE id) - private String code to store the grant
* agreement of the project - private String acronym to store the acronym of the project - private String title to store
* the tile of the project
*/
public class Project implements Serializable {
@JsonSchema(description = "The OpenAIRE id for the project")
protected String id;// OpenAIRE id
@JsonSchema(description = "The grant agreement number")
protected String code;
@JsonSchema(description = "The acronym of the project")
protected String acronym;
protected String title;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}

View File

@ -1,41 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
/**
* Indicates the process that produced (or provided) the information, and the trust associated to the information. It
* has two parameters: - provenance of type String to store the provenance of the information, - trust of type String to
* store the trust associated to the information
*/
public class Provenance implements Serializable {
private String provenance;
private String trust;
public String getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
this.provenance = provenance;
}
public String getTrust() {
return trust;
}
public void setTrust(String trust) {
this.trust = trust;
}
public static Provenance newInstance(String provenance, String trust) {
Provenance p = new Provenance();
p.provenance = provenance;
p.trust = trust;
return p;
}
public String toString() {
return provenance + trust;
}
}

View File

@ -1,511 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent the dumped result. It will be extended in the dump for Research Communities - Research
* Initiative/Infrastructures. It has the following parameters:
* - author of type
* List<eu.dnetlib.dhpschema.dump.oaf.Author> to describe the authors of a result. For each author in the result
* represented in the internal model one author in the esternal model is produced.
* - type of type String to represent
* the category of the result. Possible values are publication, dataset, software, other. It corresponds to
* resulttype.classname of the dumped result
* - language of type eu.dnetlib.dhp.schema.dump.oaf.Language to store
* information about the language of the result. It is dumped as - code corresponds to language.classid - value
* corresponds to language.classname
* - country of type List<eu.dnetlib.dhp.schema.dump.oaf.Country> to store the country
* list to which the result is associated. For each country in the result respresented in the internal model one country
* in the external model is produces - subjects of type List<eu.dnetlib.dhp.dump.oaf.Subject> to store the subjects for
* the result. For each subject in the result represented in the internal model one subject in the external model is
* produced - maintitle of type String to store the main title of the result. It corresponds to the value of the first
* title in the resul to be dumped having classid equals to "main title" - subtitle of type String to store the subtitle
* of the result. It corresponds to the value of the first title in the resul to be dumped having classid equals to
* "subtitle" - description of type List<String> to store the description of the result. It corresponds to the list of
* description.value in the result represented in the internal model - publicationdate of type String to store the
* pubblication date. It corresponds to dateofacceptance.value in the result represented in the internal model -
* publisher of type String to store information about the publisher. It corresponds to publisher.value of the result
* represented in the intrenal model - embargoenddate of type String to store the embargo end date. It corresponds to
* embargoenddate.value of the result represented in the internal model - source of type List<String> See definition of
* Dublin Core field dc:source. It corresponds to the list of source.value in the result represented in the internal
* model - format of type List<String> It corresponds to the list of format.value in the result represented in the
* internal model - contributor of type List<String> to represent contributors for this result. It corresponds to the
* list of contributor.value in the result represented in the internal model - coverage of type String. It corresponds
* to the list of coverage.value in the result represented in the internal model - bestaccessright of type
* eu.dnetlib.dhp.schema.dump.oaf.AccessRight to store informatin about the openest access right associated to the
* manifestations of this research results. It corresponds to the same parameter in the result represented in the
* internal model - container of type eu.dnetlib.dhp.schema/dump.oaf.Container (only for result of type publication). It
* corresponds to the parameter journal of the result represented in the internal model - documentationUrl of type
* List<String> (only for results of type software) to store the URLs to the software documentation. It corresponds to
* the list of documentationUrl.value of the result represented in the internal model - codeRepositoryUrl of type String
* (only for results of type software) to store the URL to the repository with the source code. It corresponds to
* codeRepositoryUrl.value of the result represented in the internal model - programmingLanguage of type String (only
* for results of type software) to store the programming language. It corresponds to programmingLanguaga.classid of the
* result represented in the internal model - contactperson of type List<String> (only for results of type other) to
* store the contact person for this result. It corresponds to the list of contactperson.value of the result represented
* in the internal model - contactgroup of type List<String> (only for results of type other) to store the information
* for the contact group. It corresponds to the list of contactgroup.value of the result represented in the internal
* model - tool of type List<String> (only fro results of type other) to store information about tool useful for the
* interpretation and/or re-used of the research product. It corresponds to the list of tool.value in the result
* represented in the internal modelt - size of type String (only for results of type dataset) to store the size of the
* dataset. It corresponds to size.value in the result represented in the internal model - version of type String (only
* for results of type dataset) to store the version. It corresponds to version.value of the result represented in the
* internal model - geolocation fo type List<eu.dnetlib.dhp.schema.dump.oaf.GeoLocation> (only for results of type
* dataset) to store geolocation information. For each geolocation element in the result represented in the internal
* model a GeoLocation in the external model il produced - id of type String to store the OpenAIRE id of the result. It
* corresponds to the id of the result represented in the internal model - originalId of type List<String> to store the
* original ids of the result. It corresponds to the originalId of the result represented in the internal model - pid of
* type List<eu.dnetlib.dhp.schema.dump.oaf.ControlledField> to store the persistent identifiers for the result. For
* each pid in the results represented in the internal model one pid in the external model is produced. The value
* correspondence is: - scheme corresponds to pid.qualifier.classid of the result represented in the internal model -
* value corresponds to the pid.value of the result represented in the internal model - dateofcollection of type String
* to store information about the time OpenAIRE collected the record. It corresponds to dateofcollection of the result
* represented in the internal model - lasteupdatetimestamp of type String to store the timestamp of the last update of
* the record. It corresponds to lastupdatetimestamp of the resord represented in the internal model
*
*/
public class Result implements Serializable {
private List<Author> author;
// resulttype allows subclassing results into publications | datasets | software
@JsonProperty("isGreen")
@JsonSchema(description = "True if the result is green Open Access")
private Boolean isGreen;
@JsonSchema(description = "The Open Access Color of the publication")
private OpenAccessColor openAccessColor;
@JsonProperty("isInDiamondJournal")
@JsonSchema(description = "True if the result is published in a Diamond Journal")
private Boolean isInDiamondJournal;
@JsonSchema(description = "True if the result is outcome of a project")
private Boolean publiclyFunded;
public Boolean getGreen() {
return isGreen;
}
public void setGreen(Boolean green) {
isGreen = green;
}
public OpenAccessColor getOpenAccessColor() {
return openAccessColor;
}
public void setOpenAccessColor(OpenAccessColor openAccessColor) {
this.openAccessColor = openAccessColor;
}
public Boolean getInDiamondJournal() {
return isInDiamondJournal;
}
public void setInDiamondJournal(Boolean inDiamondJournal) {
isInDiamondJournal = inDiamondJournal;
}
public Boolean getPubliclyFunded() {
return publiclyFunded;
}
public void setPubliclyFunded(Boolean publiclyFunded) {
this.publiclyFunded = publiclyFunded;
}
@JsonSchema(
description = "Type of the result: one of 'publication', 'dataset', 'software', 'other' (see also https://api.openaire.eu/vocabularies/dnet:result_typologies)")
private String type; // resulttype
// common fields
private Language language;
@JsonSchema(description = "The list of countries associated to this result")
private List<ResultCountry> country;
@JsonSchema(description = "Keywords associated to the result")
private List<Subject> subjects;
@JsonSchema(
description = "A name or title by which a scientific result is known. May be the title of a publication, of a dataset or the name of a piece of software.")
private String maintitle;
@JsonSchema(description = "Explanatory or alternative name by which a scientific result is known.")
private String subtitle;
private List<String> description;
@JsonSchema(
description = "Main date of the research product: typically the publication or issued date. In case of a research result with different versions with different dates, the date of the result is selected as the most frequent well-formatted date. If not available, then the most recent and complete date among those that are well-formatted. For statistics, the year is extracted and the result is counted only among the result of that year. Example: Pre-print date: 2019-02-03, Article date provided by repository: 2020-02, Article date provided by Crossref: 2020, OpenAIRE will set as date 2019-02-03, because its the most recent among the complete and well-formed dates. If then the repository updates the metadata and set a complete date (e.g. 2020-02-12), then this will be the new date for the result because it becomes the most recent most complete date. However, if OpenAIRE then collects the pre-print from another repository with date 2019-02-03, then this will be the “winning date” because it becomes the most frequent well-formatted date.")
private String publicationdate; // dateofacceptance;
@JsonSchema(
description = "The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the resource.")
private String publisher;
@JsonSchema(description = "Date when the embargo ends and this result turns Open Access")
private String embargoenddate;
@JsonSchema(description = "See definition of Dublin Core field dc:source")
private List<String> source;
private List<String> format;
@JsonSchema(description = "Contributors for the result")
private List<String> contributor;
private List<String> coverage;
@JsonSchema(description = "The openest of the access rights of this result.")
private BestAccessRight bestaccessright;
@JsonSchema(
description = "Container has information about the conference or journal where the result has been presented or published")
private Container container;// Journal
@JsonSchema(description = "Only for results with type 'software': URL to the software documentation")
private List<String> documentationUrl; // software
@JsonSchema(description = "Only for results with type 'software': the URL to the repository with the source code")
private String codeRepositoryUrl; // software
@JsonSchema(description = "Only for results with type 'software': the programming language")
private String programmingLanguage; // software
@JsonSchema(
description = "Only for results with type 'software': Information on the person responsible for providing further information regarding the resource")
private List<String> contactperson; // orp
@JsonSchema(
description = "Only for results with type 'software': Information on the group responsible for providing further information regarding the resource")
private List<String> contactgroup; // orp
@JsonSchema(
description = "Only for results with type 'other': tool useful for the interpretation and/or re-used of the research product")
private List<String> tool; // orp
@JsonSchema(description = "Only for results with type 'dataset': the declared size of the dataset")
private String size; // dataset
@JsonSchema(description = "Version of the result")
private String version; // dataset
@JsonSchema(description = "Geolocation information")
private List<GeoLocation> geolocation; // dataset
@JsonSchema(description = "The OpenAIRE identifiers for this result")
private String id;
@JsonSchema(description = "Identifiers of the record at the original sources")
private List<String> originalId;
@JsonSchema(description = "Persistent identifiers of the result")
private List<ResultPid> pid;
@JsonSchema(description = "When OpenAIRE collected the record the last time")
private String dateofcollection;
@JsonSchema(description = "Timestamp of last update of the record in OpenAIRE")
private Long lastupdatetimestamp;
@JsonSchema(description = "Indicators computed for this result, for example UsageCount ones")
private Indicator indicators;
@JsonInclude(JsonInclude.Include.NON_NULL)
public Indicator getIndicators() {
return indicators;
}
public void setIndicators(Indicator indicators) {
this.indicators = indicators;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
this.lastupdatetimestamp = lastupdatetimestamp;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getOriginalId() {
return originalId;
}
public void setOriginalId(List<String> originalId) {
this.originalId = originalId;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<ResultPid> getPid() {
return pid;
}
public void setPid(List<ResultPid> pid) {
this.pid = pid;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getDateofcollection() {
return dateofcollection;
}
public void setDateofcollection(String dateofcollection) {
this.dateofcollection = dateofcollection;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Author> getAuthor() {
return author;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public Container getContainer() {
return container;
}
public void setContainer(Container container) {
this.container = container;
}
public void setAuthor(List<Author> author) {
this.author = author;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public Language getLanguage() {
return language;
}
public void setLanguage(Language language) {
this.language = language;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<ResultCountry> getCountry() {
return country;
}
public void setCountry(List<ResultCountry> country) {
this.country = country;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Subject> getSubjects() {
return subjects;
}
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getMaintitle() {
return maintitle;
}
public void setMaintitle(String maintitle) {
this.maintitle = maintitle;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getSubtitle() {
return subtitle;
}
public void setSubtitle(String subtitle) {
this.subtitle = subtitle;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getDescription() {
return description;
}
public void setDescription(List<String> description) {
this.description = description;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getPublicationdate() {
return publicationdate;
}
public void setPublicationdate(String publicationdate) {
this.publicationdate = publicationdate;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getEmbargoenddate() {
return embargoenddate;
}
public void setEmbargoenddate(String embargoenddate) {
this.embargoenddate = embargoenddate;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getSource() {
return source;
}
public void setSource(List<String> source) {
this.source = source;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getFormat() {
return format;
}
public void setFormat(List<String> format) {
this.format = format;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getContributor() {
return contributor;
}
public void setContributor(List<String> contributor) {
this.contributor = contributor;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getCoverage() {
return coverage;
}
public void setCoverage(List<String> coverage) {
this.coverage = coverage;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public BestAccessRight getBestaccessright() {
return bestaccessright;
}
public void setBestaccessright(BestAccessRight bestaccessright) {
this.bestaccessright = bestaccessright;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getDocumentationUrl() {
return documentationUrl;
}
public void setDocumentationUrl(List<String> documentationUrl) {
this.documentationUrl = documentationUrl;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getCodeRepositoryUrl() {
return codeRepositoryUrl;
}
public void setCodeRepositoryUrl(String codeRepositoryUrl) {
this.codeRepositoryUrl = codeRepositoryUrl;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getProgrammingLanguage() {
return programmingLanguage;
}
public void setProgrammingLanguage(String programmingLanguage) {
this.programmingLanguage = programmingLanguage;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getContactperson() {
return contactperson;
}
public void setContactperson(List<String> contactperson) {
this.contactperson = contactperson;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getContactgroup() {
return contactgroup;
}
public void setContactgroup(List<String> contactgroup) {
this.contactgroup = contactgroup;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getTool() {
return tool;
}
public void setTool(List<String> tool) {
this.tool = tool;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getSize() {
return size;
}
public void setSize(String size) {
this.size = size;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<GeoLocation> getGeolocation() {
return geolocation;
}
public void setGeolocation(List<GeoLocation> geolocation) {
this.geolocation = geolocation;
}
}

View File

@ -1,41 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* Represents the country associated to the generic result. It extends eu.dnetlib.dhp.schema.dump.oaf.Country with a
* provenance parameter of type eu.dnetlib.dhp.schema.dumo.oaf.Provenance. The country is not mapped if its value in the
* result reprensented in the internal format is Unknown. The value for this element correspond to:
* - code corresponds to the classid of eu.dnetlib.dhp.schema.oaf.Country
* - label corresponds to the classname of eu.dnetlib.dhp.schema.oaf.Country
* - provenance set only if the dataInfo associated to the Country of the result to be dumped is not null. In this case:
* - provenance corresponds to dataInfo.provenanceaction.classid (to be modified with datainfo.provenanceaction.classname)
* - trust corresponds to dataInfo.trust
*/
public class ResultCountry extends Country {
@JsonSchema(description = "Why this result is associated to the country.")
private Provenance provenance;
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public static ResultCountry newInstance(String code, String label, Provenance provenance) {
ResultCountry c = new ResultCountry();
c.setProvenance(provenance);
c.setCode(code);
c.setLabel(label);
return c;
}
public static ResultCountry newInstance(String code, String label, String provenance, String trust) {
return newInstance(code, label, Provenance.newInstance(provenance, trust));
}
}

View File

@ -1,44 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class ResultPid implements Serializable {
@JsonSchema(
description = "The scheme of the persistent identifier for the result (i.e. doi). " +
"If the pid is here it means the information for the pid has been collected from an authority for " +
"that pid type (i.e. Crossref/Datacite for doi). The set of authoritative pid is: doi when collected from Crossref or Datacite "
+
"pmid when collected from EuroPubmed, arxiv when collected from arXiv, handle from the repositories")
private String scheme;
@JsonSchema(description = "The value expressed in the scheme (i.e. 10.1000/182)")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static ResultPid newInstance(String scheme, String value) {
ResultPid cf = new ResultPid();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,46 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonGetter;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
/**
* @author miriam.baglioni
* @Date 07/11/22
*/
public class Score implements Serializable {
private String indicator;
private String score;
@JsonProperty("class")
private String clazz;
public String getScore() {
return score;
}
public void setScore(String score) {
this.score = score;
}
@JsonGetter("class")
public String getClazz() {
return clazz;
}
@JsonSetter("class")
public void setClazz(String clazz) {
this.clazz = clazz;
}
public String getIndicator() {
return indicator;
}
public void setIndicator(String indicator) {
this.indicator = indicator;
}
}

View File

@ -1,40 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent keywords associated to the result. It has two parameters:
* - subject of type eu.dnetlib.dhp.schema.dump.oaf.SubjectSchemeValue to describe the subject. It mapped as:
* - schema it corresponds to qualifier.classid of the dumped subject
* - value it corresponds to the subject value
* - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to represent the provenance of the subject. It is dumped only if dataInfo
* is not null. In this case:
* - provenance corresponds to dataInfo.provenanceaction.classname
* - trust corresponds to dataInfo.trust
*/
public class Subject implements Serializable {
private SubjectSchemeValue subject;
@JsonSchema(description = "Why this subject is associated to the result")
private Provenance provenance;
public SubjectSchemeValue getSubject() {
return subject;
}
public void setSubject(SubjectSchemeValue subject) {
this.subject = subject;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -1,42 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class SubjectSchemeValue implements Serializable {
@JsonSchema(
description = "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies).")
private String scheme;
@JsonSchema(
description = "The value for the subject in the selected scheme. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary).")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static SubjectSchemeValue newInstance(String scheme, String value) {
SubjectSchemeValue cf = new SubjectSchemeValue();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,29 +0,0 @@
package eu.dnetlib.dhp.oa.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 07/11/22
*/
public class UsageCounts implements Serializable {
private String downloads;
private String views;
public String getDownloads() {
return downloads;
}
public void setDownloads(String downloads) {
this.downloads = downloads;
}
public String getViews() {
return views;
}
public void setViews(String views) {
this.views = views;
}
}

View File

@ -1,47 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class CfHbKeyValue implements Serializable {
@JsonSchema(description = "the OpenAIRE identifier of the data source")
private String key;
@JsonSchema(description = "the name of the data source")
private String value;
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static CfHbKeyValue newInstance(String key, String value) {
CfHbKeyValue inst = new CfHbKeyValue();
inst.key = key;
inst.value = value;
return inst;
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(key) && StringUtils.isBlank(value);
}
}

View File

@ -1,43 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Instance;
/**
* It extends eu.dnetlib.dhp.dump.oaf.Instance with values related to the community dump. In the Result dump this
* information is not present because it is dumped as a set of relations between the result and the datasource. -
* hostedby of type eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the
* instance can be viewed or downloaded. It is mapped against the hostedby parameter of the instance to be dumped and -
* key corresponds to hostedby.key - value corresponds to hostedby.value - collectedfrom of type
* eu.dnetlib.dhp.schema.dump.oaf.KeyValue to store the information about the source from which the instance has been
* collected. It is mapped against the collectedfrom parameter of the instance to be dumped and - key corresponds to
* collectedfrom.key - value corresponds to collectedfrom.value
*/
public class CommunityInstance extends Instance {
@JsonSchema(description = "Information about the source from which the instance can be viewed or downloaded.")
private CfHbKeyValue hostedby;
@JsonSchema(description = "Information about the source from which the record has been collected")
private CfHbKeyValue collectedfrom;
@JsonInclude(JsonInclude.Include.NON_NULL)
public CfHbKeyValue getHostedby() {
return hostedby;
}
public void setHostedby(CfHbKeyValue hostedby) {
this.hostedby = hostedby;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public CfHbKeyValue getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(CfHbKeyValue collectedfrom) {
this.collectedfrom = collectedfrom;
}
}

View File

@ -1,75 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Result;
/**
* extends eu.dnetlib.dhp.schema.dump.oaf.Result with the following parameters: - projects of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Project> to store the list of projects related to the result. The
* information is added after the result is mapped to the external model - context of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.Context> to store information about the RC RI related to the result.
* For each context in the result represented in the internal model one context in the external model is produced -
* collectedfrom of type List<eu.dnetliv.dhp.schema.dump.oaf.KeyValue> to store information about the sources from which
* the record has been collected. For each collectedfrom in the result represented in the internal model one
* collectedfrom in the external model is produced - instance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance> to store all the instances associated to the result.
* It corresponds to the same parameter in the result represented in the internal model
*/
public class CommunityResult extends Result {
@JsonSchema(description = "List of projects (i.e. grants) that (co-)funded the production ofn the research results")
private List<Project> projects;
@JsonSchema(
description = "Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu")
private List<Context> context;
@JsonSchema(description = "Information about the sources from which the record has been collected")
protected List<CfHbKeyValue> collectedfrom;
@JsonSchema(
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
private List<CommunityInstance> instance;
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<CommunityInstance> getInstance() {
return instance;
}
public void setInstance(List<CommunityInstance> instance) {
this.instance = instance;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<CfHbKeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<CfHbKeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Project> getProjects() {
return projects;
}
public void setProjects(List<Project> projects) {
this.projects = projects;
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<Context> getContext() {
return context;
}
public void setContext(List<Context> context) {
this.context = context;
}
}

View File

@ -1,73 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
/**
* Reference to a relevant research infrastructure, initiative or community (RI/RC) among those collaborating with
* OpenAIRE. It extend eu.dnetlib.dhp.shema.dump.oaf.Qualifier with a parameter provenance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.Provenance> to store the provenances of the association between the result and
* the RC/RI. The values for this element correspond to: - code: it corresponds to the id of the context in the result
* to be mapped. If the context id refers to a RC/RI and contains '::' only the part of the id before the first "::"
* will be used as value for code - label it corresponds to the label associated to the id. The information id taken
* from the profile of the RC/RI - provenance it is set only if the dataInfo associated to the contenxt element of the
* result to be dumped is not null. For each dataInfo one instance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance is
* instantiated if the element datainfo.provenanceaction is not null. In this case - provenance corresponds to
* dataInfo.provenanceaction.classname - trust corresponds to dataInfo.trust
*/
public class Context {
@JsonSchema(description = "Code identifying the RI/RC")
private String code;
@JsonSchema(description = "Label of the RI/RC")
private String label;
@JsonSchema(description = "Why this result is associated to the RI/RC.")
private List<Provenance> provenance;
public List<Provenance> getProvenance() {
return provenance;
}
public void setProvenance(List<Provenance> provenance) {
this.provenance = provenance;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
@Override
public int hashCode() {
final String p = Optional
.ofNullable(getProvenance())
.map(
prov -> prov
.stream()
.map(Provenance::toString)
.collect(Collectors.joining()))
.orElse("");
return Objects.hash(getCode(), getLabel(), p);
}
}

View File

@ -1,24 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To store information about the funder funding the project related to the result. It has the following parameters: -
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
* store the jurisdiction of the funder
*/
public class Funder extends eu.dnetlib.dhp.oa.model.Funder {
@JsonSchema(description = "Stream of funding (e.g. for European Commission can be H2020 or FP7)")
private String fundingStream;
public String getFundingStream() {
return fundingStream;
}
public void setFundingStream(String fundingStream) {
this.fundingStream = fundingStream;
}
}

View File

@ -1,58 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
/**
* To store information about the project related to the result. This information is not directly mapped from the result
* represented in the internal model because it is not there. The mapped result will be enriched with project
* information derived by relation between results and projects. Project extends eu.dnetlib.dhp.schema.dump.oaf.Project
* with the following parameters: - funder of type eu.dnetlib.dhp.schema.dump.oaf.community.Funder to store information
* about the funder funding the project - provenance of type eu.dnetlib.dhp.schema.dump.oaf.Provenance to store
* information about the. provenance of the association between the result and the project
*/
public class Project extends eu.dnetlib.dhp.oa.model.Project {
@JsonSchema(description = "Information about the funder funding the project")
private Funder funder;
private Provenance provenance;
private Validated validated;
public void setValidated(Validated validated) {
this.validated = validated;
}
public Validated getValidated() {
return validated;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public Funder getFunder() {
return funder;
}
public void setFunder(Funder funders) {
this.funder = funders;
}
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
Project project = new Project();
project.setAcronym(acronym);
project.setCode(code);
project.setFunder(funder);
project.setId(id);
project.setTitle(title);
return project;
}
}

View File

@ -1,39 +0,0 @@
package eu.dnetlib.dhp.oa.model.community;
import java.io.Serializable;
/**
* To store information about the funder funding the project related to the result. It has the following parameters: -
* shortName of type String to store the funder short name (e.c. AKA). - name of type String to store the funder name
* (e.c. Akademy of Finland) - fundingStream of type String to store the funding stream - jurisdiction of type String to
* store the jurisdiction of the funder
*/
public class Validated implements Serializable {
private String validationDate;
private Boolean validatedByFunder;
public void setValidationDate(String validationDate) {
this.validationDate = validationDate;
}
public String getValidationDate() {
return validationDate;
}
public void setValidatedByFunder(Boolean validated) {
this.validatedByFunder = validated;
}
public Boolean getValidatedByFunder() {
return validatedByFunder;
}
public static Validated newInstance(Boolean validated, String validationDate) {
Validated v = new Validated();
v.validatedByFunder = validated;
v.validationDate = validationDate;
return v;
}
}

View File

@ -1,21 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
public class Constants implements Serializable {
// collectedFrom va con isProvidedBy -> becco da ModelSupport
public static final String HOSTED_BY = "isHostedBy";
public static final String HOSTS = "hosts";
// community result uso isrelatedto
public static final String RESULT_ENTITY = "result";
public static final String DATASOURCE_ENTITY = "datasource";
public static final String CONTEXT_ENTITY = "context";
public static final String CONTEXT_ID = "60";
public static final String CONTEXT_NS_PREFIX = "context____";
}

View File

@ -1,358 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Container;
import eu.dnetlib.dhp.oa.model.Indicator;
/**
* To store information about the datasource OpenAIRE collects information from. It contains the following parameters: -
* id of type String to store the OpenAIRE id for the datasource. It corresponds to the parameter id of the datasource
* represented in the internal model - originalId of type List<String> to store the list of original ids associated to
* the datasource. It corresponds to the parameter originalId of the datasource represented in the internal model. The
* null values are filtered out - pid of type List<eu.dnetlib.shp.schema.dump.oaf.ControlledField> to store the
* persistent identifiers for the datasource. For each pid in the datasource represented in the internal model one pid
* in the external model is produced as : - schema corresponds to pid.qualifier.classid of the datasource represented in
* the internal model - value corresponds to pid.value of the datasource represented in the internal model -
* datasourceType of type eu.dnetlib.dhp.schema.dump.oaf.ControlledField to store the datasource type (e.g.
* pubsrepository::institutional, Institutional Repository) as in the dnet vocabulary dnet:datasource_typologies. It
* corresponds to datasourcetype of the datasource represented in the internal model and : - code corresponds to
* datasourcetype.classid - value corresponds to datasourcetype.classname - openairecompatibility of type String to
* store information about the OpenAIRE compatibility of the ingested results (which guidelines they are compliant to).
* It corresponds to openairecompatibility.classname of the datasource represented in the internal model - officialname
* of type Sgtring to store the official name of the datasource. It correspond to officialname.value of the datasource
* represented in the internal model - englishname of type String to store the English name of the datasource. It
* corresponds to englishname.value of the datasource represented in the internal model - websiteurl of type String to
* store the URL of the website of the datasource. It corresponds to websiteurl.value of the datasource represented in
* the internal model - logourl of type String to store the URL of the logo for the datasource. It corresponds to
* logourl.value of the datasource represented in the internal model - dateofvalidation of type String to store the data
* of validation against the guidelines for the datasource records. It corresponds to dateofvalidation.value of the
* datasource represented in the internal model - description of type String to store the description for the
* datasource. It corresponds to description.value of the datasource represented in the internal model
*/
public class Datasource implements Serializable {
@JsonSchema(description = "The OpenAIRE id of the data source")
private String id; // string
@JsonSchema(description = "Original identifiers for the datasource")
private List<String> originalId; // list string
@JsonSchema(description = "Persistent identifiers of the datasource")
private List<DatasourcePid> pid; // list<String>
@JsonSchema(
description = "The type of the datasource. See https://api.openaire.eu/vocabularies/dnet:datasource_typologies")
private DatasourceSchemeValue datasourcetype; // value
@JsonSchema(
description = "OpenAIRE guidelines the data source comply with. See also https://guidelines.openaire.eu.")
private String openairecompatibility; // value
@JsonSchema(description = "The official name of the datasource")
private String officialname; // string
@JsonSchema(description = "The English name of the datasource")
private String englishname; // string
private String websiteurl; // string
private String logourl; // string
@JsonSchema(description = "The date of last validation against the OpenAIRE guidelines for the datasource records")
private String dateofvalidation; // string
private String description; // description
@JsonSchema(description = "List of subjects associated to the datasource")
private List<String> subjects; // List<String>
// opendoar specific fields (od*)
@JsonSchema(description = "The languages present in the data source's content, as defined by OpenDOAR.")
private List<String> languages; // odlanguages List<String>
@JsonSchema(description = "Types of content in the data source, as defined by OpenDOAR")
private List<String> contenttypes; // odcontent types List<String>
// re3data fields
@JsonSchema(description = "Releasing date of the data source, as defined by re3data.org")
private String releasestartdate; // string
@JsonSchema(
description = "Date when the data source went offline or stopped ingesting new research data. As defined by re3data.org")
private String releaseenddate; // string
@JsonSchema(
description = "The URL of a mission statement describing the designated community of the data source. As defined by re3data.org")
private String missionstatementurl; // string
@JsonSchema(
description = "Type of access to the data source, as defined by re3data.org. Possible values: " +
"{open, restricted, closed}")
private String accessrights; // databaseaccesstype string
// {open, restricted or closed}
@JsonSchema(description = "Type of data upload. As defined by re3data.org: one of {open, restricted,closed}")
private String uploadrights; // datauploadtype string
@JsonSchema(
description = "Access restrinctions to the data source, as defined by re3data.org. One of {feeRequired, registration, other}")
private String databaseaccessrestriction; // string
@JsonSchema(
description = "Upload restrictions applied by the datasource, as defined by re3data.org. One of {feeRequired, registration, other}")
private String datauploadrestriction; // string
@JsonSchema(description = "As defined by redata.org: 'yes' if the data source supports versioning, 'no' otherwise.")
private Boolean versioning; // boolean
@JsonSchema(
description = "The URL of the data source providing information on how to cite its items. As defined by re3data.org.")
private String citationguidelineurl; // string
// {yes, no, uknown}
@JsonSchema(
description = "The persistent identifier system that is used by the data source. As defined by re3data.org")
private String pidsystems; // string
@JsonSchema(
description = "The certificate, seal or standard the data source complies with. As defined by re3data.org.")
private String certificates; // string
@JsonSchema(description = "Policies of the data source, as defined in OpenDOAR.")
private List<String> policies; //
@JsonSchema(description = "Information about the journal, if this data source is of type Journal.")
private Container journal; // issn etc del Journal
// @JsonSchema(description = "Indicators computed for this Datasource, for example UsageCount ones")
// private Indicator indicators;
//
// public Indicator getIndicators() {
// return indicators;
// }
//
// public void setIndicators(Indicator indicators) {
// this.indicators = indicators;
// }
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<String> getOriginalId() {
return originalId;
}
public void setOriginalId(List<String> originalId) {
this.originalId = originalId;
}
public List<DatasourcePid> getPid() {
return pid;
}
public void setPid(List<DatasourcePid> pid) {
this.pid = pid;
}
public DatasourceSchemeValue getDatasourcetype() {
return datasourcetype;
}
public void setDatasourcetype(DatasourceSchemeValue datasourcetype) {
this.datasourcetype = datasourcetype;
}
public String getOpenairecompatibility() {
return openairecompatibility;
}
public void setOpenairecompatibility(String openairecompatibility) {
this.openairecompatibility = openairecompatibility;
}
public String getOfficialname() {
return officialname;
}
public void setOfficialname(String officialname) {
this.officialname = officialname;
}
public String getEnglishname() {
return englishname;
}
public void setEnglishname(String englishname) {
this.englishname = englishname;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public String getLogourl() {
return logourl;
}
public void setLogourl(String logourl) {
this.logourl = logourl;
}
public String getDateofvalidation() {
return dateofvalidation;
}
public void setDateofvalidation(String dateofvalidation) {
this.dateofvalidation = dateofvalidation;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public List<String> getSubjects() {
return subjects;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public List<String> getLanguages() {
return languages;
}
public void setLanguages(List<String> languages) {
this.languages = languages;
}
public List<String> getContenttypes() {
return contenttypes;
}
public void setContenttypes(List<String> contenttypes) {
this.contenttypes = contenttypes;
}
public String getReleasestartdate() {
return releasestartdate;
}
public void setReleasestartdate(String releasestartdate) {
this.releasestartdate = releasestartdate;
}
public String getReleaseenddate() {
return releaseenddate;
}
public void setReleaseenddate(String releaseenddate) {
this.releaseenddate = releaseenddate;
}
public String getMissionstatementurl() {
return missionstatementurl;
}
public void setMissionstatementurl(String missionstatementurl) {
this.missionstatementurl = missionstatementurl;
}
public String getAccessrights() {
return accessrights;
}
public void setAccessrights(String accessrights) {
this.accessrights = accessrights;
}
public String getUploadrights() {
return uploadrights;
}
public void setUploadrights(String uploadrights) {
this.uploadrights = uploadrights;
}
public String getDatabaseaccessrestriction() {
return databaseaccessrestriction;
}
public void setDatabaseaccessrestriction(String databaseaccessrestriction) {
this.databaseaccessrestriction = databaseaccessrestriction;
}
public String getDatauploadrestriction() {
return datauploadrestriction;
}
public void setDatauploadrestriction(String datauploadrestriction) {
this.datauploadrestriction = datauploadrestriction;
}
public Boolean getVersioning() {
return versioning;
}
public void setVersioning(Boolean versioning) {
this.versioning = versioning;
}
public String getCitationguidelineurl() {
return citationguidelineurl;
}
public void setCitationguidelineurl(String citationguidelineurl) {
this.citationguidelineurl = citationguidelineurl;
}
public String getPidsystems() {
return pidsystems;
}
public void setPidsystems(String pidsystems) {
this.pidsystems = pidsystems;
}
public String getCertificates() {
return certificates;
}
public void setCertificates(String certificates) {
this.certificates = certificates;
}
public List<String> getPolicies() {
return policies;
}
public void setPolicies(List<String> policiesr3) {
this.policies = policiesr3;
}
public Container getJournal() {
return journal;
}
public void setJournal(Container journal) {
this.journal = journal;
}
}

View File

@ -1,41 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public class DatasourcePid implements Serializable {
@JsonSchema(description = "The scheme used to express the value ")
private String scheme;
@JsonSchema(description = "The value expressed in the scheme ")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static DatasourcePid newInstance(String scheme, String value) {
DatasourcePid cf = new DatasourcePid();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,41 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public // TODO change the DatasourceSchemaValue to DatasourceKeyValue. The scheme is always the dnet one. What we show
// here is the entry in the scheme (the key) and its understandable value
class DatasourceSchemeValue implements Serializable {
@JsonSchema(description = "The scheme used to express the value (i.e. pubsrepository::journal)")
private String scheme;
@JsonSchema(description = "The value expressed in the scheme (Journal)")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static DatasourceSchemeValue newInstance(String scheme, String value) {
DatasourceSchemeValue cf = new DatasourceSchemeValue();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,23 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To store information about the funder funding the project related to the result. It extends
* eu.dnetlib.dhp.schema.dump.oaf.Funder with the following parameter: - - private
* eu.dnetdlib.dhp.schema.dump.oaf.graph.Fundings funding_stream to store the fundingstream
*/
public class Funder extends eu.dnetlib.dhp.oa.model.Funder {
@JsonSchema(description = "Description of the funding stream")
private Fundings funding_stream;
public Fundings getFunding_stream() {
return funding_stream;
}
public void setFunding_stream(Fundings funding_stream) {
this.funding_stream = funding_stream;
}
}

View File

@ -1,38 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To store inforamtion about the funding stream. It has two parameters: - private String id to store the id of the
* fundings stream. The id is created by appending the shortname of the funder to the name of each level in the xml
* representing the fundng stream. For example: if the funder is the European Commission, the funding level 0 name is
* FP7, the funding level 1 name is SP3 and the funding level 2 name is PEOPLE then the id will be: EC::FP7::SP3::PEOPLE
* - private String description to describe the funding stream. It is created by concatenating the description of each
* funding level so for the example above the description would be: SEVENTH FRAMEWORK PROGRAMME - SP3-People -
* Marie-Curie Actions
*/
public class Fundings implements Serializable {
@JsonSchema(description = "Id of the funding stream")
private String id;
private String description;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -1,61 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To describe the funded amount. It has the following parameters: - private String currency to store the currency of
* the fund - private float totalcost to store the total cost of the project - private float fundedamount to store the
* funded amount by the funder
*/
public class Granted implements Serializable {
@JsonSchema(description = "The currency of the granted amount (e.g. EUR)")
private String currency;
@JsonSchema(description = "The total cost of the project")
private float totalcost;
@JsonSchema(description = "The funded amount")
private float fundedamount;
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public float getTotalcost() {
return totalcost;
}
public void setTotalcost(float totalcost) {
this.totalcost = totalcost;
}
public float getFundedamount() {
return fundedamount;
}
public void setFundedamount(float fundedamount) {
this.fundedamount = fundedamount;
}
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
Granted granted = new Granted();
granted.currency = currency;
granted.totalcost = totalcost;
granted.fundedamount = fundedamount;
return granted;
}
public static Granted newInstance(String currency, float fundedamount) {
Granted granted = new Granted();
granted.currency = currency;
granted.fundedamount = fundedamount;
return granted;
}
}

View File

@ -1,28 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.Result;
/**
* It extends the eu.dnetlib.dhp.schema.dump.oaf.Result with - instance of type
* List<eu.dnetlib.dhp.schema.dump.oaf.Instance> to store all the instances associated to the result. It corresponds to
* the same parameter in the result represented in the internal model
*/
public class GraphResult extends Result {
@JsonSchema(
description = "Each instance is one specific materialisation or version of the result. For example, you can have one result with three instance: one is the pre-print, one is the post-print, one is te published version")
private List<Instance> instance;
public List<Instance> getInstance() {
return instance;
}
public void setInstance(List<Instance> instance) {
this.instance = instance;
}
}

View File

@ -1,82 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
/**
* To store information about the classification for the project. The classification depends on the programme. For example
* H2020-EU.3.4.5.3 can be classified as
* H2020-EU.3. => Societal Challenges (level1)
* H2020-EU.3.4. => Transport (level2)
* H2020-EU.3.4.5. => CLEANSKY2 (level3)
* H2020-EU.3.4.5.3. => IADP Fast Rotorcraft (level4)
*
* We decided to explicitly represent up to three levels in the classification.
*
* H2020Classification has the following parameters:
* - private Programme programme to store the information about the programme related to this classification
* - private String level1 to store the information about the level 1 of the classification (Priority or Pillar of the EC)
* - private String level2 to store the information about the level2 af the classification (Objectives (?))
* - private String level3 to store the information about the level3 of the classification
* - private String classification to store the entire classification related to the programme
*/
public class H2020Classification implements Serializable {
private Programme programme;
private String level1;
private String level2;
private String level3;
private String classification;
public Programme getProgramme() {
return programme;
}
public void setProgramme(Programme programme) {
this.programme = programme;
}
public String getLevel1() {
return level1;
}
public void setLevel1(String level1) {
this.level1 = level1;
}
public String getLevel2() {
return level2;
}
public void setLevel2(String level2) {
this.level2 = level2;
}
public String getLevel3() {
return level3;
}
public void setLevel3(String level3) {
this.level3 = level3;
}
public String getClassification() {
return classification;
}
public void setClassification(String classification) {
this.classification = classification;
}
public static H2020Classification newInstance(String programme_code, String programme_description, String level1,
String level2, String level3, String classification) {
H2020Classification h2020classification = new H2020Classification();
h2020classification.programme = Programme.newInstance(programme_code, programme_description);
h2020classification.level1 = level1;
h2020classification.level2 = level2;
h2020classification.level3 = level3;
h2020classification.classification = classification;
return h2020classification;
}
}

View File

@ -1,94 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Country;
/**
* To represent the generic organizaiton. It has the following parameters:
* - private String legalshortname to store the legalshortname of the organizaiton
* - private String legalname to store the legal name of the organization
* - private String websiteurl to store the websiteurl of the organization
* - private List<String> alternativenames to store the alternative names of the organization
* - private Country country to store the country of the organization
* - private String id to store the openaire id of the organization
* - private List<OrganizationPid> pid to store the list of pids for the organization
*/
public class Organization implements Serializable {
private String legalshortname;
private String legalname;
private String websiteurl;
@JsonSchema(description = "Alternative names that identify the organisation")
private List<String> alternativenames;
@JsonSchema(description = "The organisation country")
private Country country;
@JsonSchema(description = "The OpenAIRE id for the organisation")
private String id;
@JsonSchema(description = "Persistent identifiers for the organisation i.e. isni 0000000090326370")
private List<OrganizationPid> pid;
public String getLegalshortname() {
return legalshortname;
}
public void setLegalshortname(String legalshortname) {
this.legalshortname = legalshortname;
}
public String getLegalname() {
return legalname;
}
public void setLegalname(String legalname) {
this.legalname = legalname;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public List<String> getAlternativenames() {
return alternativenames;
}
public void setAlternativenames(List<String> alternativenames) {
this.alternativenames = alternativenames;
}
public Country getCountry() {
return country;
}
public void setCountry(Country country) {
this.country = country;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<OrganizationPid> getPid() {
return pid;
}
public void setPid(List<OrganizationPid> pid) {
this.pid = pid;
}
}

View File

@ -1,42 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
public
class OrganizationPid implements Serializable {
@JsonSchema(description = "The scheme of the identifier (i.e. isni)")
private String scheme;
@JsonSchema(description = "The value in the schema (i.e. 0000000090326370)")
private String value;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public static OrganizationPid newInstance(String scheme, String value) {
OrganizationPid cf = new OrganizationPid();
cf.setScheme(scheme);
cf.setValue(value);
return cf;
}
}

View File

@ -1,41 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To store information about the ec programme for the project. It has the following parameters: - private String code
* to store the code of the programme - private String description to store the description of the programme
*/
public class Programme implements Serializable {
@JsonSchema(description = "The code of the programme")
private String code;
@JsonSchema(description = "The description of the programme")
private String description;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public static Programme newInstance(String code, String description) {
Programme p = new Programme();
p.code = code;
p.description = description;
return p;
}
}

View File

@ -1,211 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import java.util.List;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Indicator;
/**
* This is the class representing the Project in the model used for the dumps of the whole graph. At the moment the dump
* of the Projects differs from the other dumps because we do not create relations between Funders (Organization) and
* Projects but we put the information about the Funder within the Project representation. We also removed the
* collected from element from the Project. No relation between the Project and the Datasource entity from which it is
* collected will be created. We will never create relations between Project and Datasource. In case some relation will
* be extracted from the Project they will refer the Funder and will be of type ( organization -> funds -> project,
* project -> isFundedBy -> organization) We also removed the duration parameter because the most of times it is set to
* 0. It has the following parameters:
* - private String id to store the id of the project (OpenAIRE id)
* - private String websiteurl to store the websiteurl of the project
* - private String code to store the grant agreement of the project
* - private String acronym to store the acronym of the project
* - private String title to store the tile of the project
* - private String startdate to store the start date
* - private String enddate to store the end date
* - private String callidentifier to store the call indentifier
* - private String keywords to store the keywords
* - private boolean openaccessmandateforpublications to store if the project must accomplish to the open access mandate
* for publications. This value will be set to true if one of the field in the project represented in the internal model
* is set to true
* - private boolean openaccessmandatefordataset to store if the project must accomplish to the open access mandate for
* dataset. It is set to the value in the corresponding filed of the project represented in the internal model
* - private List<String> subject to store the list of subjects of the project
* - private List<Funder> funding to store the list of funder of the project
* - private String summary to store the summary of the project
* - private Granted granted to store the granted amount
* - private List<Programme> h2020programme to store the list of programmes the project is related to
*/
public class Project implements Serializable {
private String id;
private String websiteurl;
private String code;
private String acronym;
private String title;
private String startdate;
private String enddate;
private String callidentifier;
private String keywords;
private boolean openaccessmandateforpublications;
private boolean openaccessmandatefordataset;
private List<String> subject;
@JsonSchema(description = "Funding information for the project")
private List<Funder> funding;
private String summary;
@JsonSchema(description = "The money granted to the project")
private Granted granted;
@JsonSchema(description = "The h2020 programme funding the project")
private List<Programme> h2020programme;
// @JsonSchema(description = "Indicators computed for this project, for example UsageCount ones")
// private Indicator indicators;
//
// public Indicator getIndicators() {
// return indicators;
// }
//
// public void setIndicators(Indicator indicators) {
// this.indicators = indicators;
// }
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getStartdate() {
return startdate;
}
public void setStartdate(String startdate) {
this.startdate = startdate;
}
public String getEnddate() {
return enddate;
}
public void setEnddate(String enddate) {
this.enddate = enddate;
}
public String getCallidentifier() {
return callidentifier;
}
public void setCallidentifier(String callidentifier) {
this.callidentifier = callidentifier;
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = keywords;
}
public boolean isOpenaccessmandateforpublications() {
return openaccessmandateforpublications;
}
public void setOpenaccessmandateforpublications(boolean openaccessmandateforpublications) {
this.openaccessmandateforpublications = openaccessmandateforpublications;
}
public boolean isOpenaccessmandatefordataset() {
return openaccessmandatefordataset;
}
public void setOpenaccessmandatefordataset(boolean openaccessmandatefordataset) {
this.openaccessmandatefordataset = openaccessmandatefordataset;
}
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
public List<Funder> getFunding() {
return funding;
}
public void setFunding(List<Funder> funding) {
this.funding = funding;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public Granted getGranted() {
return granted;
}
public void setGranted(Granted granted) {
this.granted = granted;
}
public List<Programme> getH2020programme() {
return h2020programme;
}
public void setH2020programme(List<Programme> h2020programme) {
this.h2020programme = h2020programme;
}
}

View File

@ -1,39 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
/**
* To represent the semantics of the generic relation between two entities. It has the following parameters: - private
* String name to store the semantics of the relation (i.e. isAuthorInstitutionOf). It corresponds to the relclass
* parameter in the relation represented in the internal model represented in the internal model - private String type
* to store the type of the relation (i.e. affiliation). It corresponds to the subreltype parameter of the relation
* represented in theinternal model
*/
public class RelType implements Serializable {
private String name; // relclass
private String type; // subreltype
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public static RelType newInstance(String name, String type) {
RelType rel = new RelType();
rel.name = name;
rel.type = type;
return rel;
}
}

View File

@ -1,124 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import java.util.Objects;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
import eu.dnetlib.dhp.oa.model.Provenance;
/**
* To represent the gereric relation between two entities. It has the following parameters: - private Node source to
* represent the entity source of the relation - private Node target to represent the entity target of the relation -
* private RelType reltype to represent the semantics of the relation - private Provenance provenance to represent the
* provenance of the relation
*/
public class Relation implements Serializable {
@JsonSchema(description = "The identifier of the source in the relation")
private String source;
@JsonSchema(description = "The entity type of the source in the relation")
private String sourceType;
@JsonSchema(description = "The identifier of the target in the relation")
private String target;
@JsonSchema(description = "The entity type of the target in the relation")
private String targetType;
@JsonSchema(description = "To represent the semantics of a relation between two entities")
private RelType reltype;
@JsonSchema(description = "The reason why OpenAIRE holds the relation ")
private Provenance provenance;
@JsonSchema(
description = "True if the relation is related to a project and it has been collected from an authoritative source (i.e. the funder)")
private boolean validated;
@JsonSchema(description = "The date when the relation was collected from OpenAIRE")
private String validationDate;
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getSourceType() {
return sourceType;
}
public void setSourceType(String sourceType) {
this.sourceType = sourceType;
}
public String getTarget() {
return target;
}
public void setTarget(String target) {
this.target = target;
}
public String getTargetType() {
return targetType;
}
public void setTargetType(String targetType) {
this.targetType = targetType;
}
public RelType getReltype() {
return reltype;
}
public void setReltype(RelType reltype) {
this.reltype = reltype;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public void setValidated(boolean validate) {
this.validated = validate;
}
public boolean getValidated() {
return validated;
}
public void setValidationDate(String validationDate) {
this.validationDate = validationDate;
}
public String getValidationDate() {
return validationDate;
}
@Override
public int hashCode() {
return Objects.hash(source, target, reltype.getType() + ":" + reltype.getName());
}
public static Relation newInstance(String source, String sourceType, String target, String targetType,
RelType reltype, Provenance provenance) {
Relation relation = new Relation();
relation.source = source;
relation.sourceType = sourceType;
relation.target = target;
relation.targetType = targetType;
relation.reltype = reltype;
relation.provenance = provenance;
return relation;
}
}

View File

@ -1,27 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent RC entities. It extends eu.dnetlib.dhp.dump.oaf.grap.ResearchInitiative by adding the parameter subject
* to store the list of subjects related to the community
*/
public class ResearchCommunity extends ResearchInitiative {
@JsonSchema(
description = "Only for research communities: the list of the subjects associated to the research community")
@JsonInclude(JsonInclude.Include.NON_NULL)
private List<String> subject;
public List<String> getSubject() {
return subject;
}
public void setSubject(List<String> subject) {
this.subject = subject;
}
}

View File

@ -1,89 +0,0 @@
package eu.dnetlib.dhp.oa.model.graph;
import java.io.Serializable;
import com.github.imifou.jsonschema.module.addon.annotation.JsonSchema;
/**
* To represent entity of type RC/RI. It has the following parameters, which are mostly derived by the profile
* - private
* String id to store the openaire id for the entity. Is has as code 00 and will be created as
* 00|context_____::md5(originalId) private
* String originalId to store the id of the context as provided in the profile
* (i.e. mes)
* - private String name to store the name of the context (got from the label attribute in the context
* definition)
* - private String type to store the type of the context (i.e.: research initiative or research community)
* - private String description to store the description of the context as given in the profile
* -private String
* zenodo_community to store the zenodo community associated to the context (main zenodo community)
*/
public class ResearchInitiative implements Serializable {
@JsonSchema(description = "The OpenAIRE id for the community/research infrastructure")
private String id; // openaireId
@JsonSchema(description = "The acronym of the community")
private String acronym; // context id
@JsonSchema(description = "The long name of the community")
private String name; // context name
@JsonSchema(description = "One of {Research Community, Research infrastructure}")
private String type; // context type: research initiative or research community
@JsonSchema(description = "Description of the research community/research infrastructure")
private String description;
@JsonSchema(
description = "The URL of the Zenodo community associated to the Research community/Research infrastructure")
private String zenodo_community;
public String getZenodo_community() {
return zenodo_community;
}
public void setZenodo_community(String zenodo_community) {
this.zenodo_community = zenodo_community;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String label) {
this.name = label;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -0,0 +1,47 @@
package eu.dnetlib.dhp.skgif.model;
/**
* @author miriam.baglioni
* @Date 04/04/24
*/
/**
* @author miriam.baglioni
* @Date 04/04/24
*/
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class DownloadsAndViews implements Serializable {
private String views;
private String downloads;
public String getViews() {
return views;
}
public void setViews(String views) {
this.views = views;
}
public String getDownloads() {
return downloads;
}
public void setDownloads(String downloads) {
this.downloads = downloads;
}
public static DownloadsAndViews newInstance(String views, String downloads) {
DownloadsAndViews uc = new DownloadsAndViews();
uc.setViews(views);
uc.setDownloads(downloads);
return uc;
}
public boolean isEmpty() {
return StringUtils.isEmpty(this.downloads) || StringUtils.isEmpty(this.views);
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 04/04/24
*/
public class Indicator implements Serializable {
private DownloadsAndViews downloadsAndViews;
public DownloadsAndViews getDownloadsAndViews() {
return downloadsAndViews;
}
public void setDownloadsAndViews(DownloadsAndViews downloadsAndViews) {
this.downloadsAndViews = downloadsAndViews;
}
public static Indicator newInstance(DownloadsAndViews uc) {
Indicator i = new Indicator();
i.setDownloadsAndViews(uc);
return i;
}
public static Indicator newInstance(String downloads, String views) {
Indicator i = new Indicator();
i.setDownloadsAndViews(DownloadsAndViews.newInstance(views, downloads));
return i;
}
}

View File

@ -10,6 +10,15 @@ import java.io.Serializable;
public class MinTopic implements Serializable {
private String local_identifier;
private String value;
private String scheme;
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getLocal_identifier() {
return local_identifier;

View File

@ -28,6 +28,16 @@ public class ResearchProduct implements Serializable {
@JsonProperty("related_products")
private List<Relations> related_products;
private Indicator indicator;
public Indicator getIndicator() {
return indicator;
}
public void setIndicator(Indicator indicator) {
this.indicator = indicator;
}
public String getLocal_identifier() {
return local_identifier;
}

View File

@ -9,9 +9,7 @@ import com.github.imifou.jsonschema.module.addon.AddonModule;
import com.github.victools.jsonschema.generator.*;
import eu.dnetlib.dhp.ExecCreateSchemas;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.*;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
//@Disabled
class GenerateJsonSchema {
@ -25,7 +23,7 @@ class GenerateJsonSchema {
configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(CommunityResult.class);
JsonNode jsonSchema = generator.generateSchema(ResearchProduct.class);
System.out.println(jsonSchema.toString());
}
@ -42,7 +40,7 @@ class GenerateJsonSchema {
.without(Option.NONPUBLIC_NONSTATIC_FIELDS_WITHOUT_GETTERS);
SchemaGeneratorConfig config = configBuilder.build();
SchemaGenerator generator = new SchemaGenerator(config);
JsonNode jsonSchema = generator.generateSchema(Result.class);
JsonNode jsonSchema = generator.generateSchema(ResearchProduct.class);
System.out.println(jsonSchema.toString());
}

View File

@ -36,7 +36,7 @@ public class FilterEntities implements Serializable {
.toString(
FilterEntities.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/filter_entities_parameters.json"));
"/eu/dnetlib/dhp/oa/graph/dump/skgif/filter_entities_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);

View File

@ -46,7 +46,7 @@ public class SelectConnectedEntities implements Serializable {
.toString(
SelectConnectedEntities.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/select_connected_entities_parameters.json"));
"/eu/dnetlib/dhp/oa/graph/dump/skgif/select_connected_entities_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);

View File

@ -87,20 +87,17 @@ public class DumpResearchProduct implements Serializable {
}
private static void moveDumpedProducts(SparkSession spark, String workingDir, String outputPath) {
Dataset<ResearchProduct> researchProducts = spark.emptyDataset(Encoders.bean(ResearchProduct.class));
for (EntityType e : ModelSupport.entityTypes.keySet()) {
if (ModelSupport.isResult(e))
researchProducts = researchProducts
.union(
Utils
.readPath(
spark, workingDir + "products" + e.name() + "/researchproduct", ResearchProduct.class));
Utils
.readPath(
spark, workingDir + "products" + e.name() + "/researchproduct", ResearchProduct.class)
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath + "products");
}
researchProducts
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "products");
}
private static <R extends Result> void dumpResearchProduct(SparkSession spark, String inputPath, String workingDir,

View File

@ -70,6 +70,7 @@ public class EmitFromEntities implements Serializable {
});
}
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
@ -78,7 +79,7 @@ public class EmitFromEntities implements Serializable {
emitDatasourcePublisher(spark, inputPath, workingDir);
}
private static void emitDatasourcePublisher(SparkSession spark, String inputPath, String workingDir) {
Dataset<Row> journalIds = spark
.read()
@ -130,7 +131,8 @@ public class EmitFromEntities implements Serializable {
.stream()
.filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")
|| s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|| s.getQualifier().getClassid().equalsIgnoreCase("sdg")
|| s.getQualifier().getClassid().equalsIgnoreCase("keyword"))
.map(s -> {
Topic t = new Topic();
t

View File

@ -9,7 +9,6 @@ import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoAllowedTypeException;
import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoTitleFoundException;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.skgif.model.*;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
/**
@ -35,6 +34,7 @@ public class ResultMapper implements Serializable {
mapType(out, input);
mapTopic(out, input);
mapContribution(out, input);
mapMeasure(out, input);
//The manifestation will be included extending the result as well as the relations to funder, organization and other results
return out;
@ -47,6 +47,25 @@ public class ResultMapper implements Serializable {
}
private static void mapMeasure(ResearchProduct out, eu.dnetlib.dhp.schema.oaf.Result input) {
if (Optional.ofNullable(input.getMeasures()).isPresent()) {
Indicator i = new Indicator();
DownloadsAndViews uc = new DownloadsAndViews();
input.getMeasures().forEach(m -> {
if (m.getId().equals("downloads")) {
uc.setDownloads(m.getUnit().get(0).getValue());
}
if (m.getId().equals("views")) {
uc.setViews(m.getUnit().get(0).getValue());
}
});
if (!uc.isEmpty()) {
i.setDownloadsAndViews(uc);
out.setIndicator(i);
}
}
}
private static <E extends Result> void mapContribution(ResearchProduct out, E input) {
if (Optional.ofNullable(input.getAuthor()).isPresent()) {
int count = 0;
@ -93,9 +112,10 @@ public class ResultMapper implements Serializable {
.getSubject()
.stream()
.filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos"))
// ||
// s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")
||
s.getQualifier().getClassid().equalsIgnoreCase("sdg")
|| s.getQualifier().getClassid().equalsIgnoreCase("keyword"))
.map(s -> {
ResultTopic topic = new ResultTopic();
MinTopic minTopic = new MinTopic();
@ -103,6 +123,7 @@ public class ResultMapper implements Serializable {
.setLocal_identifier(
Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
minTopic.setValue(s.getValue());
minTopic.setScheme(s.getQualifier().getClassid());
topic
.setTopic(minTopic);
if (Optional.ofNullable(s.getDataInfo()).isPresent()) {
@ -112,7 +133,7 @@ public class ResultMapper implements Serializable {
} catch (NumberFormatException nfe) {
}
provenance.setType(s.getDataInfo().getInferenceprovenance());
provenance.setType(s.getDataInfo().getProvenanceaction().getClassname());
topic.setProvenance(provenance);
}

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,110 +0,0 @@
<workflow-app name="copy_graph_from_hive" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>hivePath</name>
<description>the country for which to produce the dump</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
</property>
<property>
<name>hiveJdbcUrl</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="copy_graph" />
<!-- <start to="make_archive" />-->
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="copy_graph">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Copy graph</name>
<class>eu.dnetlib.dhp.oa.graph.dump.SparkCopyGraph</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--hivePath</arg><arg>${hivePath}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End" />
</workflow-app>

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,511 +0,0 @@
<workflow-app name="dump_country" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>country</name>
<description>the country for which to produce the dump</description>
</property>
<property>
<name>hiveDbName</name>
<description>the target hive database name</description>
</property>
<property>
<name>hiveJdbcUrl</name>
<description>hive server jdbc url</description>
</property>
<property>
<name>hiveMetastoreUris</name>
<description>hive server metastore URIs</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="save_community_map" />
<!-- <start to="make_archive" />-->
<action name="save_community_map">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SaveCommunityMap</main-class>
<arg>--outputPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
</java>
<ok to="find_results_for_country"/>
<error to="Kill"/>
</action>
<action name="find_results_for_country">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultsRelatedToCountry</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/resultsInCountry</arg>
<arg>--country</arg><arg>${country}</arg>
</spark>
<ok to="fork_select"/>
<error to="Kill"/>
</action>
<fork name="fork_select">
<path start="select_publication"/>
<path start="select_dataset"/>
<path start="select_orp"/>
<path start="select_software"/>
</fork>
<action name="select_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=7G
--executor-cores=2
--driver-memory=7G
--conf spark.sql.shuffle.partitions=3840
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
<arg>--resultType</arg><arg>publication</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset </name>
<class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--resultType</arg><arg>dataset</arg>
<arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP </name>
<class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
<arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="select_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software </name>
<class>eu.dnetlib.dhp.oa.graph.dump.country.SparkFindResultWithCountry</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--resultType</arg><arg>software</arg>
<arg>--resultWithCountry</arg><arg>${workingDir}/resultsInCountry</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="fork_dump_community"/>
<fork name="fork_dump_community">
<path start="dump_publication"/>
<path start="dump_dataset"/>
<path start="dump_orp"/>
<path start="dump_software"/>
</fork>
<action name="dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table publication for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/publication</arg>
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--dumpType</arg><arg>country</arg>
</spark>
<ok to="join_dump_comm"/>
<error to="Kill"/>
</action>
<action name="dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table dataset for community/funder related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/dataset</arg>
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--dumpType</arg><arg>country</arg>
</spark>
<ok to="join_dump_comm"/>
<error to="Kill"/>
</action>
<action name="dump_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table ORP for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--dumpType</arg><arg>country</arg>
</spark>
<ok to="join_dump_comm"/>
<error to="Kill"/>
</action>
<action name="dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table software for community related products</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/original/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/dump/software</arg>
<arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
<arg>--dumpType</arg><arg>country</arg>
</spark>
<ok to="join_dump_comm"/>
<error to="Kill"/>
</action>
<join name="join_dump_comm" to="prepareResultProject"/>
<action name="prepareResultProject">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Prepare association result subset of project info</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkPrepareResultProject</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--outputPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="fork_extendWithProject"/>
<error to="Kill"/>
</action>
<fork name="fork_extendWithProject">
<path start="extend_publication"/>
<path start="extend_dataset"/>
<path start="extend_orp"/>
<path start="extend_software"/>
</fork>
<action name="extend_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped publications with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/publication</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/publication</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped dataset with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/dataset</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_orp">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped ORP with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/otherresearchproduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/otherresearchproduct</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<action name="extend_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Extend dumped software with information about project</name>
<class>eu.dnetlib.dhp.oa.graph.dump.community.SparkUpdateProjectInfo</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/dump/software</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/software</arg>
<arg>--preparedInfoPath</arg><arg>${workingDir}/preparedInfo</arg>
</spark>
<ok to="join_extend"/>
<error to="Kill"/>
</action>
<join name="join_extend" to="make_archive"/>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}/tar</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<kill name="Kill">
<message>Sub-workflow dump complete failed with error message ${wf:errorMessage()}
</message>
</kill>
<end name="End" />
</workflow-app>

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,282 +0,0 @@
<workflow-app name="dump_graph_csv" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>communities</name>
<description>the communities whose products should be dumped</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="reset_outputpath" />
<action name="reset_outputpath">
<fs>
<delete path="${outputPath}"/>
<mkdir path="${outputPath}"/>
</fs>
<ok to="dump_communities"/>
<error to="Kill"/>
</action>
<action name="dump_communities">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.csv.DumpCommunities</main-class>
<arg>--outputPath</arg><arg>${outputPath}/community</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</java>
<ok to="select_result_dump_relation"/>
<error to="Kill"/>
</action>
<action name="select_result_dump_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results ids connected to communities and dump relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkSelectResultsAndDumpRelations</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</spark>
<ok to="fork_dump_result_author_pid"/>
<error to="Kill"/>
</action>
<fork name="fork_dump_result_author_pid">
<path start="dump_publication"/>
<path start="dump_dataset"/>
<path start="dump_other"/>
<path start="dump_software"/>
</fork>
<action name="dump_publication">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from publication </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=9G
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>publication</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_dataset">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from dataset </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>dataset</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_other">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from other </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>otherresearchproduct</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<action name="dump_software">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results from software</name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkDumpResults</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--resultType</arg><arg>software</arg>
</spark>
<ok to="join_dump"/>
<error to="Kill"/>
</action>
<join name="join_dump" to="dump_single_results"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="dump_single_results">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump single results </name>
<class>eu.dnetlib.dhp.oa.graph.dump.csv.SparkMoveOnSigleDir</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--workingPath</arg><arg>${outputPath}/workingDir</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/>
</action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--depositionType</arg><arg>${depositionType}</arg>
<arg>--depositionId</arg><arg>${depositionId}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,31 +0,0 @@
[
{
"paramName":"nn",
"paramLongName":"nameNode",
"paramDescription": "the name node",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "sd",
"paramLongName": "singleDeposition",
"paramDescription": "true if the dump should be created for a single community",
"paramRequired": false
},
{
"paramName": "ci",
"paramLongName": "communityId",
"paramDescription": "the id of the community for which to create the dump",
"paramRequired": false
}
]

View File

@ -1,30 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "ra",
"paramLongName": "resultAggregation",
"paramDescription": "true if all the result type should be saved under the generic result name. false to get a different dump for each result type",
"paramRequired": true
}
]

View File

@ -1,30 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,23 +0,0 @@
[
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "nn",
"paramLongName": "nameNode",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": true
},
{
"paramName":"c",
"paramLongName":"communities",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,38 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"wp",
"paramLongName":"workingPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"c",
"paramLongName":"communities",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,36 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"wp",
"paramLongName":"workingPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"rt",
"paramLongName":"resultType",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"rtn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,25 +0,0 @@
[
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"wp",
"paramLongName":"workingPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"o",
"paramLongName":"outputPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,20 +0,0 @@
[
{
"paramName": "hdfs",
"paramLongName": "hdfsPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "nn",
"paramLongName": "nameNode",
"paramDescription": "the name node",
"paramRequired": true
}
]

View File

@ -1,31 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": true
}
]

View File

@ -1,20 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
}
]

View File

@ -1,75 +0,0 @@
[
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": false
},
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"dt",
"paramLongName":"dumpType",
"paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project",
"paramRequired": false
},
{
"paramName":"cid",
"paramLongName":"communityId",
"paramDescription": "the id of the community to be dumped",
"paramRequired": false
},
{
"paramName":"sc",
"paramLongName":"selectionCriteria",
"paramDescription": "the selection criteria to choose the results",
"paramRequired": false
},
{
"paramName":"pm",
"paramLongName":"pathMap",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
},
{
"paramName":"rt",
"paramLongName":"resultType",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
},
{
"paramName":"md",
"paramLongName":"masterDuplicatePath",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -1,41 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"gp",
"paramLongName":"graphPath",
"paramDescription": "the path to the relations",
"paramRequired": true
},
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the relations",
"paramRequired": true
}
]

View File

@ -1,44 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "rs",
"paramLongName": "removeSet",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
},
{
"paramName": "wd",
"paramLongName": "workingDir",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
},
{
"paramName": "mdp",
"paramLongName": "masterDuplicatePath",
"paramDescription": "the list of classname relations, split by ';', not to be dumped",
"paramRequired": false
}
]

View File

@ -1,37 +0,0 @@
[
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": false
},
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"cp",
"paramLongName":"contextPath",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
}
]

View File

@ -1,27 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"crp",
"paramLongName":"contextRelationPath",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -1,27 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"rp",
"paramLongName":"relationPath",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,88 +0,0 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="dump_organization"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="dump_organization">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table organization and related relations </name>
<class>eu.dnetlib.dhp.oa.graph.dump.organizationonly.SparkDumpOrganizationJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${workingDir}/project</arg>
<arg>--communityMapPath</arg><arg>noneed</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,29 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "pip",
"paramLongName": "preparedInfoPath",
"paramDescription": "the path of the association result projectlist",
"paramRequired": true
}
]

View File

@ -1,26 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "sb",
"paramLongName": "substring",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
}
]

View File

@ -1,27 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "pl",
"paramLongName": "projectListPath",
"paramDescription": "the path of the association result projectlist",
"paramRequired": true
}
]

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,171 +0,0 @@
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>projectListPath</name>
<description>the path to the project list</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>accessToken</name>
<description>the access token used for the deposition in Zenodo</description>
</property>
<property>
<name>connectionUrl</name>
<description>the connection url for Zenodo</description>
</property>
<property>
<name>metadata</name>
<description> the metadata associated to the deposition</description>
</property>
<property>
<name>depositionType</name>
<description>the type of deposition we want to perform. "new" for brand new deposition, "version" for a new version of a published deposition (in this case the concept record id must be provided), "upload" to upload content to an open deposition for which we already have the deposition id (in this case the deposition id should be provided)</description>
</property>
<property>
<name>conceptRecordId</name>
<description>for new version, the id of the record for the old deposition</description>
</property>
<property>
<name>depositionId</name>
<description>the depositionId of a deposition open that has to be added content</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="dump_project"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="dump_project">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/project</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--outputPath</arg><arg>${workingDir}/project</arg>
<arg>--communityMapPath</arg><arg>noneed</arg>
</spark>
<ok to="get_new_projects"/>
<error to="Kill"/>
</action>
<action name="get_new_projects">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Dump table project </name>
<class>eu.dnetlib.dhp.oa.graph.dump.projectssubset.ProjectsSubsetSparkJob</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/project</arg>
<arg>--outputPath</arg><arg>${workingDir}/tar/project</arg>
<arg>--projectListPath</arg><arg>${projectListPath}</arg>
</spark>
<ok to="make_archive"/>
<error to="Kill"/>
</action>
<action name="make_archive">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.MakeTar</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--sourcePath</arg><arg>${workingDir}/tar</arg>
</java>
<ok to="send_zenodo"/>
<error to="Kill"/>
</action>
<action name="send_zenodo">
<java>
<main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
<arg>--hdfsPath</arg><arg>${outputPath}</arg>
<arg>--nameNode</arg><arg>${nameNode}</arg>
<arg>--accessToken</arg><arg>${accessToken}</arg>
<arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
<arg>--metadata</arg><arg>${metadata}</arg>
<arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
<arg>--depositionType</arg><arg>${depositionType}</arg>
<arg>--depositionId</arg><arg>${depositionId}</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -1,42 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "c",
"paramLongName": "resultWithCountry",
"paramDescription": "the path of the id of results associated to a given country",
"paramRequired": true
},
{
"paramName":"tn",
"paramLongName":"resultTableName",
"paramDescription": "the name of the result table we are currently working on",
"paramRequired": true
},
{
"paramName":"rt",
"paramLongName":"resultType",
"paramDescription": "",
"paramRequired": true
}
]

View File

@ -1,29 +0,0 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName": "c",
"paramLongName": "country",
"paramDescription": "the path of the association result projectlist",
"paramRequired": true
}
]

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -1,102 +0,0 @@
<workflow-app name="dump_graph_csv" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the source path</description>
</property>
<property>
<name>outputPath</name>
<description>the output path</description>
</property>
<property>
<name>communities</name>
<description>the communities whose products should be dumped</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="select_result_dump_relation" />
<action name="select_result_dump_relation">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>select results ids connected to communities and dump relation </name>
<class>eu.dnetlib.dhp.oa.graph.dump.serafeim.SparkSelectResultsAndDumpRelations</class>
<jar>dump-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=10G
--executor-cores=3
--driver-memory=10G
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
<arg>--workingPath</arg><arg>${workingDir}</arg>
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--communities</arg><arg>${communities}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="End"/>
</workflow-app>

View File

@ -68,6 +68,8 @@
<decision name="resumeFrom">
<switch>
<case to="get_ds_master_duplicate">${wf:conf('resumeFrom') eq "MapEoscDsIds"}</case>
<case to="dump_research_product">${wf:conf('resumeFrom') eq "DumpResearchProduct"}</case>
<case to="emit_from_result">${wf:conf('resumeFrom') eq "EmitFromResult"}</case>
<default to="select_subset"/>
</switch>
</decision>

View File

@ -1,37 +0,0 @@
[
{
"paramName":"cmp",
"paramLongName":"communityMapPath",
"paramDescription": "the path to the serialization of the community map",
"paramRequired": false
},
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "out",
"paramLongName": "outputPath",
"paramDescription": "the path used to store temporary output files",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
}, {
"paramName":"cid",
"paramLongName":"communityId",
"paramDescription": "the id of the community to be dumped",
"paramRequired": false
}
]

View File

@ -1,30 +0,0 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>hiveMetastoreUris</name>
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
</property>
<property>
<name>hiveJdbcUrl</name>
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
</property>
<property>
<name>hiveDbName</name>
<value>openaire</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

Some files were not shown because too many files have changed in this diff Show More