[SKG-IF] refactoring and fixing issues
This commit is contained in:
parent
0c887ca015
commit
752fd896e4
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -7,8 +8,9 @@ import java.io.Serializable;
|
||||||
* @Date 22/02/24
|
* @Date 22/02/24
|
||||||
*/
|
*/
|
||||||
public class Contributor implements Serializable {
|
public class Contributor implements Serializable {
|
||||||
private String person; //I would not map it because we have only information regarding the person (if any) associated to the leading organization
|
private String person; // I would not map it because we have only information regarding the person (if any)
|
||||||
private String organization ; //contributors.person
|
// associated to the leading organization
|
||||||
|
private String organization; // contributors.person
|
||||||
|
|
||||||
private String role ;//private
|
private String role;// private
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -8,143 +9,146 @@ import java.util.List;
|
||||||
* @Date 21/02/24
|
* @Date 21/02/24
|
||||||
*/
|
*/
|
||||||
public class Datasource implements Serializable {
|
public class Datasource implements Serializable {
|
||||||
private String local_identifier ;//id
|
private String local_identifier;// id
|
||||||
private List<Identifier> identifiers; //.schema pid.qualifier.classid;identifiers.value pid.value
|
private List<Identifier> identifiers; // .schema pid.qualifier.classid;identifiers.value pid.value
|
||||||
private String name; //officialname.value
|
private String name; // officialname.value
|
||||||
private String submission_policy_url;// submissionpolicyurl
|
private String submission_policy_url;// submissionpolicyurl
|
||||||
private String preservation_policy_url;// preservationpolicyurl
|
private String preservation_policy_url;// preservationpolicyurl
|
||||||
private Boolean version_control;// versioncontrol bool
|
private Boolean version_control;// versioncontrol bool
|
||||||
private List<PersistentIdentitySystems> persistent_identity_systems;//. product_type researchentitytype list type to be remapped to the eosc types
|
private List<PersistentIdentitySystems> persistent_identity_systems;// . product_type researchentitytype list type
|
||||||
//persistent_identity_systems. pid_scheme pidsystems.value when not null. It can be a string with multiple values
|
// to be remapped to the eosc types
|
||||||
private String jurisdiction;// jurisdiction.classname
|
// persistent_identity_systems. pid_scheme pidsystems.value when not null. It can be a string with multiple values
|
||||||
private String data_source_classification;// eoscdatasourcetype.classname
|
private String jurisdiction;// jurisdiction.classname
|
||||||
private List<String> research_product_type;// researchentitytype list type to be remapped to the eosc types
|
private String data_source_classification;// eoscdatasourcetype.classname
|
||||||
private Boolean thematic ;//thematic bool
|
private List<String> research_product_type;// researchentitytype list type to be remapped to the eosc types
|
||||||
private List<Licence> research_product_license; //.name not mappable listresearch_product_license.url not mappable
|
private Boolean thematic;// thematic bool
|
||||||
private List<String> research_product_access_policy;// "databaseaccesstype if open => open access (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
private List<Licence> research_product_license; // .name not mappable listresearch_product_license.url not mappable
|
||||||
//if restricted => restricted access (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
private List<String> research_product_access_policy;// "databaseaccesstype if open => open access
|
||||||
//if closed => metadata only access (https://vocabularies.coar-repositories.org/access_rights/c_14cb/) " list
|
// (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
||||||
private List<Licence> research_product_metadata_license; //.name not mappable list
|
// if restricted => restricted access (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
||||||
//research_product_metadata_license.url not mappable
|
// if closed => metadata only access (https://vocabularies.coar-repositories.org/access_rights/c_14cb/) " list
|
||||||
private List<String>research_product_metadata_access_policy ;//researchproductmetadataccesspolicies list with the same mapping of research_product_access_policy
|
private List<Licence> research_product_metadata_license; // .name not mappable list
|
||||||
|
// research_product_metadata_license.url not mappable
|
||||||
|
private List<String> research_product_metadata_access_policy;// researchproductmetadataccesspolicies list with the
|
||||||
|
// same mapping of research_product_access_policy
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLocal_identifier(String local_identifier) {
|
public void setLocal_identifier(String local_identifier) {
|
||||||
this.local_identifier = local_identifier;
|
this.local_identifier = local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Identifier> getIdentifiers() {
|
public List<Identifier> getIdentifiers() {
|
||||||
return identifiers;
|
return identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIdentifiers(List<Identifier> identifiers) {
|
public void setIdentifiers(List<Identifier> identifiers) {
|
||||||
this.identifiers = identifiers;
|
this.identifiers = identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setName(String name) {
|
public void setName(String name) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getSubmission_policy_url() {
|
public String getSubmission_policy_url() {
|
||||||
return submission_policy_url;
|
return submission_policy_url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSubmission_policy_url(String submission_policy_url) {
|
public void setSubmission_policy_url(String submission_policy_url) {
|
||||||
this.submission_policy_url = submission_policy_url;
|
this.submission_policy_url = submission_policy_url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getPreservation_policy_url() {
|
public String getPreservation_policy_url() {
|
||||||
return preservation_policy_url;
|
return preservation_policy_url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPreservation_policy_url(String preservation_policy_url) {
|
public void setPreservation_policy_url(String preservation_policy_url) {
|
||||||
this.preservation_policy_url = preservation_policy_url;
|
this.preservation_policy_url = preservation_policy_url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Boolean getVersion_control() {
|
public Boolean getVersion_control() {
|
||||||
return version_control;
|
return version_control;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setVersion_control(Boolean version_control) {
|
public void setVersion_control(Boolean version_control) {
|
||||||
this.version_control = version_control;
|
this.version_control = version_control;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<PersistentIdentitySystems> getPersistent_identity_systems() {
|
public List<PersistentIdentitySystems> getPersistent_identity_systems() {
|
||||||
return persistent_identity_systems;
|
return persistent_identity_systems;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPersistent_identity_systems(List<PersistentIdentitySystems> persistent_identity_systems) {
|
public void setPersistent_identity_systems(List<PersistentIdentitySystems> persistent_identity_systems) {
|
||||||
this.persistent_identity_systems = persistent_identity_systems;
|
this.persistent_identity_systems = persistent_identity_systems;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getJurisdiction() {
|
public String getJurisdiction() {
|
||||||
return jurisdiction;
|
return jurisdiction;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setJurisdiction(String jurisdiction) {
|
public void setJurisdiction(String jurisdiction) {
|
||||||
this.jurisdiction = jurisdiction;
|
this.jurisdiction = jurisdiction;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getData_source_classification() {
|
public String getData_source_classification() {
|
||||||
return data_source_classification;
|
return data_source_classification;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setData_source_classification(String data_source_classification) {
|
public void setData_source_classification(String data_source_classification) {
|
||||||
this.data_source_classification = data_source_classification;
|
this.data_source_classification = data_source_classification;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getResearch_product_type() {
|
public List<String> getResearch_product_type() {
|
||||||
return research_product_type;
|
return research_product_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setResearch_product_type(List<String> research_product_type) {
|
public void setResearch_product_type(List<String> research_product_type) {
|
||||||
this.research_product_type = research_product_type;
|
this.research_product_type = research_product_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Boolean getThematic() {
|
public Boolean getThematic() {
|
||||||
return thematic;
|
return thematic;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setThematic(Boolean thematic) {
|
public void setThematic(Boolean thematic) {
|
||||||
this.thematic = thematic;
|
this.thematic = thematic;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Licence> getResearch_product_license() {
|
public List<Licence> getResearch_product_license() {
|
||||||
return research_product_license;
|
return research_product_license;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setResearch_product_license(List<Licence> research_product_license) {
|
public void setResearch_product_license(List<Licence> research_product_license) {
|
||||||
this.research_product_license = research_product_license;
|
this.research_product_license = research_product_license;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getResearch_product_access_policy() {
|
public List<String> getResearch_product_access_policy() {
|
||||||
return research_product_access_policy;
|
return research_product_access_policy;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setResearch_product_access_policy(List<String> research_product_access_policy) {
|
public void setResearch_product_access_policy(List<String> research_product_access_policy) {
|
||||||
this.research_product_access_policy = research_product_access_policy;
|
this.research_product_access_policy = research_product_access_policy;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Licence> getResearch_product_metadata_license() {
|
public List<Licence> getResearch_product_metadata_license() {
|
||||||
return research_product_metadata_license;
|
return research_product_metadata_license;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setResearch_product_metadata_license(List<Licence> research_product_metadata_license) {
|
public void setResearch_product_metadata_license(List<Licence> research_product_metadata_license) {
|
||||||
this.research_product_metadata_license = research_product_metadata_license;
|
this.research_product_metadata_license = research_product_metadata_license;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getResearch_product_metadata_access_policy() {
|
public List<String> getResearch_product_metadata_access_policy() {
|
||||||
return research_product_metadata_access_policy;
|
return research_product_metadata_access_policy;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setResearch_product_metadata_access_policy(List<String> research_product_metadata_access_policy) {
|
public void setResearch_product_metadata_access_policy(List<String> research_product_metadata_access_policy) {
|
||||||
this.research_product_metadata_access_policy = research_product_metadata_access_policy;
|
this.research_product_metadata_access_policy = research_product_metadata_access_policy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,153 +1,154 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.codehaus.jackson.annotate.JsonProperty;
|
import org.codehaus.jackson.annotate.JsonProperty;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 22/02/24
|
* @Date 22/02/24
|
||||||
*/
|
*/
|
||||||
public class Grant implements Serializable {
|
public class Grant implements Serializable {
|
||||||
private String local_identifier;// id
|
private String local_identifier;// id
|
||||||
private List<Identifier> identifiers;//.schema pid.qualifier.classid identifiers.value pid.value
|
private List<Identifier> identifiers;// .schema pid.qualifier.classid identifiers.value pid.value
|
||||||
//identifiers.schema funder acronym to be used the xpath //fundingtree/funder/shortname
|
// identifiers.schema funder acronym to be used the xpath //fundingtree/funder/shortname
|
||||||
//identifiers.value project.code
|
// identifiers.value project.code
|
||||||
|
|
||||||
private String title;// title.value
|
private String title;// title.value
|
||||||
@JsonProperty(value="abstract")
|
@JsonProperty(value = "abstract")
|
||||||
private String summary ;//summary.value
|
private String summary;// summary.value
|
||||||
private String acronym; //acronym.value
|
private String acronym; // acronym.value
|
||||||
private String funder ;//fundingtree to be used the xpath //funder/name
|
private String funder;// fundingtree to be used the xpath //funder/name
|
||||||
private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
||||||
private String currency;// currency.value
|
private String currency;// currency.value
|
||||||
private Float funded_amount;//' fundedamount.value
|
private Float funded_amount;// ' fundedamount.value
|
||||||
private List<String> keywords;// subject.value
|
private List<String> keywords;// subject.value
|
||||||
private String start_date;// startdate.value
|
private String start_date;// startdate.value
|
||||||
private String end_date;// enddate.value
|
private String end_date;// enddate.value
|
||||||
private String website;// websiteurl.value
|
private String website;// websiteurl.value
|
||||||
private List<String> beneficiaries;// organization.id for the organizations in the relation with semantic class isParticipant produces the list of organization internal identifiers
|
private List<String> beneficiaries;// organization.id for the organizations in the relation with semantic class
|
||||||
private List<Contributor> contributors;//
|
// isParticipant produces the list of organization internal identifiers
|
||||||
|
private List<Contributor> contributors;//
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLocal_identifier(String local_identifier) {
|
public void setLocal_identifier(String local_identifier) {
|
||||||
this.local_identifier = local_identifier;
|
this.local_identifier = local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Identifier> getIdentifiers() {
|
public List<Identifier> getIdentifiers() {
|
||||||
return identifiers;
|
return identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIdentifiers(List<Identifier> identifiers) {
|
public void setIdentifiers(List<Identifier> identifiers) {
|
||||||
this.identifiers = identifiers;
|
this.identifiers = identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTitle() {
|
public String getTitle() {
|
||||||
return title;
|
return title;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTitle(String title) {
|
public void setTitle(String title) {
|
||||||
this.title = title;
|
this.title = title;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getSummary() {
|
public String getSummary() {
|
||||||
return summary;
|
return summary;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSummary(String summary) {
|
public void setSummary(String summary) {
|
||||||
this.summary = summary;
|
this.summary = summary;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAcronym() {
|
public String getAcronym() {
|
||||||
return acronym;
|
return acronym;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setAcronym(String acronym) {
|
public void setAcronym(String acronym) {
|
||||||
this.acronym = acronym;
|
this.acronym = acronym;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFunder() {
|
public String getFunder() {
|
||||||
return funder;
|
return funder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFunder(String funder) {
|
public void setFunder(String funder) {
|
||||||
this.funder = funder;
|
this.funder = funder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFunding_stream() {
|
public String getFunding_stream() {
|
||||||
return funding_stream;
|
return funding_stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFunding_stream(String funding_stream) {
|
public void setFunding_stream(String funding_stream) {
|
||||||
this.funding_stream = funding_stream;
|
this.funding_stream = funding_stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCurrency() {
|
public String getCurrency() {
|
||||||
return currency;
|
return currency;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCurrency(String currency) {
|
public void setCurrency(String currency) {
|
||||||
this.currency = currency;
|
this.currency = currency;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Float getFunded_amount() {
|
public Float getFunded_amount() {
|
||||||
return funded_amount;
|
return funded_amount;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFunded_amount(Float funded_amount) {
|
public void setFunded_amount(Float funded_amount) {
|
||||||
this.funded_amount = funded_amount;
|
this.funded_amount = funded_amount;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getKeywords() {
|
public List<String> getKeywords() {
|
||||||
return keywords;
|
return keywords;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setKeywords(List<String> keywords) {
|
public void setKeywords(List<String> keywords) {
|
||||||
this.keywords = keywords;
|
this.keywords = keywords;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getStart_date() {
|
public String getStart_date() {
|
||||||
return start_date;
|
return start_date;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setStart_date(String start_date) {
|
public void setStart_date(String start_date) {
|
||||||
this.start_date = start_date;
|
this.start_date = start_date;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getEnd_date() {
|
public String getEnd_date() {
|
||||||
return end_date;
|
return end_date;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setEnd_date(String end_date) {
|
public void setEnd_date(String end_date) {
|
||||||
this.end_date = end_date;
|
this.end_date = end_date;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getWebsite() {
|
public String getWebsite() {
|
||||||
return website;
|
return website;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setWebsite(String website) {
|
public void setWebsite(String website) {
|
||||||
this.website = website;
|
this.website = website;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getBeneficiaries() {
|
public List<String> getBeneficiaries() {
|
||||||
return beneficiaries;
|
return beneficiaries;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setBeneficiaries(List<String> beneficiaries) {
|
public void setBeneficiaries(List<String> beneficiaries) {
|
||||||
this.beneficiaries = beneficiaries;
|
this.beneficiaries = beneficiaries;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Contributor> getContributors() {
|
public List<Contributor> getContributors() {
|
||||||
return contributors;
|
return contributors;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setContributors(List<Contributor> contributors) {
|
public void setContributors(List<Contributor> contributors) {
|
||||||
this.contributors = contributors;
|
this.contributors = contributors;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -8,77 +9,77 @@ import java.util.List;
|
||||||
* @Date 21/02/24
|
* @Date 21/02/24
|
||||||
*/
|
*/
|
||||||
public class Organization implements Serializable {
|
public class Organization implements Serializable {
|
||||||
private String local_identifier; // id
|
private String local_identifier; // id
|
||||||
private List<Identifier> identifiers; // pid.qualifier.classid; pid.value list
|
private List<Identifier> identifiers; // pid.qualifier.classid; pid.value list
|
||||||
private String name ; //legalname.value
|
private String name; // legalname.value
|
||||||
|
|
||||||
private String short_name; // legalshortname.value
|
private String short_name; // legalshortname.value
|
||||||
private List<String> other_names;// alternative_names.value list
|
private List<String> other_names;// alternative_names.value list
|
||||||
private String website ;//websiteurl.value
|
private String website;// websiteurl.value
|
||||||
private String country; // country.classid
|
private String country; // country.classid
|
||||||
private String type; // map relevant types from the ec* fields of organisations. If no match, default to "other"
|
private String type; // map relevant types from the ec* fields of organisations. If no match, default to "other"
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLocal_identifier(String local_identifier) {
|
public void setLocal_identifier(String local_identifier) {
|
||||||
this.local_identifier = local_identifier;
|
this.local_identifier = local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Identifier> getIdentifiers() {
|
public List<Identifier> getIdentifiers() {
|
||||||
return identifiers;
|
return identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIdentifiers(List<Identifier> identifiers) {
|
public void setIdentifiers(List<Identifier> identifiers) {
|
||||||
this.identifiers = identifiers;
|
this.identifiers = identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setName(String name) {
|
public void setName(String name) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getShort_name() {
|
public String getShort_name() {
|
||||||
return short_name;
|
return short_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setShort_name(String short_name) {
|
public void setShort_name(String short_name) {
|
||||||
this.short_name = short_name;
|
this.short_name = short_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getOther_names() {
|
public List<String> getOther_names() {
|
||||||
return other_names;
|
return other_names;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setOther_names(List<String> other_names) {
|
public void setOther_names(List<String> other_names) {
|
||||||
this.other_names = other_names;
|
this.other_names = other_names;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getWebsite() {
|
public String getWebsite() {
|
||||||
return website;
|
return website;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setWebsite(String website) {
|
public void setWebsite(String website) {
|
||||||
this.website = website;
|
this.website = website;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCountry() {
|
public String getCountry() {
|
||||||
return country;
|
return country;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCountry(String country) {
|
public void setCountry(String country) {
|
||||||
this.country = country;
|
this.country = country;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setType(String type) {
|
public void setType(String type) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,20 +1,17 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
public enum OrganizationTypes {
|
public enum OrganizationTypes {
|
||||||
ARCHIVE ("archive"),
|
ARCHIVE("archive"),
|
||||||
|
|
||||||
COMPANY("company"),
|
COMPANY("company"),
|
||||||
|
|
||||||
EDUCATION("education"),
|
EDUCATION("education"), FACILITY("facility"), GOVERNMENT("government"), HEALTHCARE("healthcare"), NONPROFIT(
|
||||||
FACILITY("facility"),
|
"nonprofit"), FUNDER("funder"), OTHER("other");
|
||||||
GOVERNMENT("government"),
|
|
||||||
HEALTHCARE("healthcare"),
|
|
||||||
NONPROFIT("nonprofit"),
|
|
||||||
FUNDER("funder"),
|
|
||||||
OTHER("other");
|
|
||||||
public final String label;
|
|
||||||
|
|
||||||
private OrganizationTypes(String label) {
|
public final String label;
|
||||||
this.label = label;
|
|
||||||
}
|
private OrganizationTypes(String label) {
|
||||||
|
this.label = label;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
@ -8,24 +8,22 @@ import java.io.Serializable;
|
||||||
* @Date 21/02/24
|
* @Date 21/02/24
|
||||||
*/
|
*/
|
||||||
public enum Prefixes implements Serializable {
|
public enum Prefixes implements Serializable {
|
||||||
RESEARCH_PRODUCT("product_____::"),
|
RESEARCH_PRODUCT("product_____::"),
|
||||||
|
|
||||||
ORGANIZATION("organization::"),
|
ORGANIZATION("organization::"),
|
||||||
|
|
||||||
GRANT("grant_______::"),
|
GRANT("grant_______::"),
|
||||||
|
|
||||||
PERSON(
|
PERSON(
|
||||||
"person______::"),
|
"person______::"),
|
||||||
|
|
||||||
TEMPORARY_PERSON("temp_person_::"),
|
TEMPORARY_PERSON("temp_person_::"),
|
||||||
|
|
||||||
DATASOURCE("datasource__::"),
|
DATASOURCE("datasource__::"), TOPIC("topic_______::"), VENUE("venue_______::");
|
||||||
TOPIC("topic_______::"),
|
|
||||||
VENUE("venue_______::");
|
|
||||||
|
|
||||||
public final String label;
|
public final String label;
|
||||||
|
|
||||||
private Prefixes(String label) {
|
private Prefixes(String label) {
|
||||||
this.label = label;
|
this.label = label;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,15 +8,10 @@ import java.io.Serializable;
|
||||||
* @Date 05/09/23
|
* @Date 05/09/23
|
||||||
*/
|
*/
|
||||||
public enum RelationType implements Serializable {
|
public enum RelationType implements Serializable {
|
||||||
RESULT_OUTCOME_FUNDING("isProducedBy"),
|
RESULT_OUTCOME_FUNDING("isProducedBy"), RESULT_AFFILIATIED_TO_ORGANIZATION(
|
||||||
RESULT_AFFILIATIED_TO_ORGANIZATION("hasAuthorInstitution"),
|
"hasAuthorInstitution"), ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"), SUPPLEMENT(
|
||||||
ORGANIZATION_PARTICIPANT_IN_PROJECT("isParticipant"),
|
"IsSupplementedBy"), DOCUMENTS(
|
||||||
SUPPLEMENT("IsSupplementedBy"),
|
"IsDocumentedBy"), PART("IsPartOf"), VERSION("IsNewVersionOf"), CITATION("Cites");
|
||||||
DOCUMENTS(
|
|
||||||
"IsDocumentedBy"),
|
|
||||||
PART("IsPartOf"),
|
|
||||||
VERSION("IsNewVersionOf"),
|
|
||||||
CITATION("Cites");
|
|
||||||
|
|
||||||
public final String label;
|
public final String label;
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ public class ResearchProduct implements Serializable {
|
||||||
private String local_identifier;
|
private String local_identifier;
|
||||||
private List<Identifier> identifiers;
|
private List<Identifier> identifiers;
|
||||||
private Map<String, List<String>> titles;
|
private Map<String, List<String>> titles;
|
||||||
private Map<String,List<String>> abstracts;
|
private Map<String, List<String>> abstracts;
|
||||||
@JsonProperty("product_type")
|
@JsonProperty("product_type")
|
||||||
private String product_type;
|
private String product_type;
|
||||||
private List<ResultTopic> topics;
|
private List<ResultTopic> topics;
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -8,95 +9,95 @@ import java.util.List;
|
||||||
* @Date 27/02/24
|
* @Date 27/02/24
|
||||||
*/
|
*/
|
||||||
public class Venue implements Serializable {
|
public class Venue implements Serializable {
|
||||||
private String local_identifier;
|
private String local_identifier;
|
||||||
private List<Identifier> identifiers;
|
private List<Identifier> identifiers;
|
||||||
private String name;
|
private String name;
|
||||||
private String acronym;
|
private String acronym;
|
||||||
private String type;
|
private String type;
|
||||||
private String publisher;
|
private String publisher;
|
||||||
private String series;
|
private String series;
|
||||||
private Boolean is_currently_full_oa;
|
private Boolean is_currently_full_oa;
|
||||||
|
|
||||||
private String creation_date;
|
private String creation_date;
|
||||||
private List<VenueContribution> contributions;
|
private List<VenueContribution> contributions;
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLocal_identifier(String local_identifier) {
|
public void setLocal_identifier(String local_identifier) {
|
||||||
this.local_identifier = local_identifier;
|
this.local_identifier = local_identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Identifier> getIdentifiers() {
|
public List<Identifier> getIdentifiers() {
|
||||||
return identifiers;
|
return identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIdentifiers(List<Identifier> identifiers) {
|
public void setIdentifiers(List<Identifier> identifiers) {
|
||||||
this.identifiers = identifiers;
|
this.identifiers = identifiers;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setName(String name) {
|
public void setName(String name) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAcronym() {
|
public String getAcronym() {
|
||||||
return acronym;
|
return acronym;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setAcronym(String acronym) {
|
public void setAcronym(String acronym) {
|
||||||
this.acronym = acronym;
|
this.acronym = acronym;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setType(String type) {
|
public void setType(String type) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getPublisher() {
|
public String getPublisher() {
|
||||||
return publisher;
|
return publisher;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPublisher(String publisher) {
|
public void setPublisher(String publisher) {
|
||||||
this.publisher = publisher;
|
this.publisher = publisher;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getSeries() {
|
public String getSeries() {
|
||||||
return series;
|
return series;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSeries(String series) {
|
public void setSeries(String series) {
|
||||||
this.series = series;
|
this.series = series;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Boolean getIs_currently_full_oa() {
|
public Boolean getIs_currently_full_oa() {
|
||||||
return is_currently_full_oa;
|
return is_currently_full_oa;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIs_currently_full_oa(Boolean is_currently_full_oa) {
|
public void setIs_currently_full_oa(Boolean is_currently_full_oa) {
|
||||||
this.is_currently_full_oa = is_currently_full_oa;
|
this.is_currently_full_oa = is_currently_full_oa;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCreation_date() {
|
public String getCreation_date() {
|
||||||
return creation_date;
|
return creation_date;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCreation_date(String creation_date) {
|
public void setCreation_date(String creation_date) {
|
||||||
this.creation_date = creation_date;
|
this.creation_date = creation_date;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<VenueContribution> getContributions() {
|
public List<VenueContribution> getContributions() {
|
||||||
return contributions;
|
return contributions;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setContributions(List<VenueContribution> contributions) {
|
public void setContributions(List<VenueContribution> contributions) {
|
||||||
this.contributions = contributions;
|
this.contributions = contributions;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -8,23 +9,23 @@ import java.util.List;
|
||||||
* @Date 27/02/24
|
* @Date 27/02/24
|
||||||
*/
|
*/
|
||||||
public class VenueContribution implements Serializable {
|
public class VenueContribution implements Serializable {
|
||||||
private String person;
|
private String person;
|
||||||
private List<String> roles;
|
private List<String> roles;
|
||||||
|
|
||||||
public String getPerson() {
|
public String getPerson() {
|
||||||
|
|
||||||
return person;
|
return person;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPerson(String person) {
|
public void setPerson(String person) {
|
||||||
this.person = person;
|
this.person = person;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getRoles() {
|
public List<String> getRoles() {
|
||||||
return roles;
|
return roles;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setRoles(List<String> roles) {
|
public void setRoles(List<String> roles) {
|
||||||
this.roles = roles;
|
this.roles = roles;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
public enum VenueIdentifierType implements Serializable {
|
public enum VenueIdentifierType implements Serializable {
|
||||||
|
|
||||||
EISSN("eissn"),
|
EISSN("eissn"), ISSN("issn"), LISSN("lissn"), ISBN("isbn"), OPENDOAR(
|
||||||
ISSN("issn"),
|
"opendoar"), R3DATA("re3data.org"), FAIRSHARING("fairsharing");
|
||||||
LISSN("lissn"),
|
|
||||||
ISBN("isbn"),
|
|
||||||
OPENDOAR(
|
|
||||||
"opendoar"),
|
|
||||||
R3DATA("re3data.org"),
|
|
||||||
FAIRSHARING("fairsharing");
|
|
||||||
|
|
||||||
|
public final String label;
|
||||||
|
|
||||||
public final String label;
|
private VenueIdentifierType(String label) {
|
||||||
|
this.label = label;
|
||||||
private VenueIdentifierType(String label) {
|
}
|
||||||
this.label = label;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.skgif.model;
|
package eu.dnetlib.dhp.skgif.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
public enum VenueType implements Serializable {
|
public enum VenueType implements Serializable {
|
||||||
|
|
||||||
REPOSITORY("repository"),
|
REPOSITORY("repository"), JOURNAL("journal"), CONFERENCE("conference"), BOOK("book"), OTHER(
|
||||||
JOURNAL("journal"),
|
"other"), UNKNOWN("unknown");
|
||||||
CONFERENCE("conference"),
|
|
||||||
BOOK("book"),
|
|
||||||
OTHER(
|
|
||||||
"other"),
|
|
||||||
UNKNOWN("unknown");
|
|
||||||
|
|
||||||
|
public final String label;
|
||||||
|
|
||||||
public final String label;
|
private VenueType(String label) {
|
||||||
|
this.label = label;
|
||||||
private VenueType(String label) {
|
}
|
||||||
this.label = label;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||||
|
|
||||||
public class CardinalityTooHighException extends Exception {
|
public class CardinalityTooHighException extends Exception {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
package eu.dnetlib.dhp.oa.graph.dump.exceptions;
|
||||||
|
|
||||||
public class NoAvailableEntityTypeException extends Exception {
|
public class NoAvailableEntityTypeException extends Exception {
|
||||||
|
|
|
@ -1,9 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -15,133 +18,156 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import java.util.*;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
import java.util.stream.Collectors;
|
import eu.dnetlib.dhp.skgif.model.Identifier;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 21/02/24
|
* @Date 21/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpDatasource implements Serializable {
|
public class DumpDatasource implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpDatasource.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpDatasource.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
DumpDatasource.class
|
DumpDatasource.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/dump_datasource_parameters.json"));
|
"/eu/dnetlib/dhp/oa/graph/dump/dump_datasource_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
Boolean isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
final String inputPath = parser.get("sourcePath");
|
final String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String workingDir = parser.get("workingDir");
|
final String workingDir = parser.get("workingDir");
|
||||||
log.info("workingDir: {}", workingDir);
|
log.info("workingDir: {}", workingDir);
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath + "Datasources");
|
Utils.removeOutputDir(spark, outputPath + "Datasources");
|
||||||
|
|
||||||
mapDatasource(spark, inputPath, outputPath);
|
mapDatasource(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath) {
|
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath) {
|
||||||
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
Utils
|
||||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible() && ! d.getDataInfo().getDeletedbyinference())
|
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||||
.map((MapFunction<Datasource, eu.dnetlib.dhp.skgif.model.Datasource>) d -> {
|
.filter(
|
||||||
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
||||||
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
&& !d.getDataInfo().getDeletedbyinference())
|
||||||
datasource.setIdentifiers(d.getPid()
|
.map((MapFunction<Datasource, eu.dnetlib.dhp.skgif.model.Datasource>) d -> {
|
||||||
.stream()
|
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
||||||
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
||||||
.collect(Collectors.toList()));
|
datasource
|
||||||
|
.setIdentifiers(
|
||||||
|
d
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
datasource.setName(d.getOfficialname().getValue());
|
datasource.setName(d.getOfficialname().getValue());
|
||||||
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
|
datasource.setSubmission_policy_url(d.getSubmissionpolicyurl());
|
||||||
datasource.setJurisdiction(Optional.ofNullable(d.getJurisdiction())
|
datasource
|
||||||
.map(v -> v.getClassid()).
|
.setJurisdiction(
|
||||||
orElse(new String()));
|
Optional
|
||||||
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
|
.ofNullable(d.getJurisdiction())
|
||||||
datasource.setVersion_control(d.getVersioncontrol());
|
.map(v -> v.getClassid())
|
||||||
|
.orElse(new String()));
|
||||||
|
datasource.setPreservation_policy_url(d.getPreservationpolicyurl());
|
||||||
|
datasource.setVersion_control(d.getVersioncontrol());
|
||||||
|
|
||||||
datasource.setData_source_classification(Optional.ofNullable(d.getEoscdatasourcetype())
|
datasource
|
||||||
.map(v -> v.getClassname()).
|
.setData_source_classification(
|
||||||
orElse(new String()));
|
Optional
|
||||||
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
|
.ofNullable(d.getEoscdatasourcetype())
|
||||||
datasource.setThematic(d.getThematic());
|
.map(v -> v.getClassname())
|
||||||
datasource.setResearch_product_access_policy(Optional.ofNullable(d.getDatabaseaccesstype())
|
.orElse(new String()));
|
||||||
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
datasource.setResearch_product_type(getEoscProductType(d.getResearchentitytypes()));
|
||||||
.orElse(new ArrayList<>()));
|
datasource.setThematic(d.getThematic());
|
||||||
datasource.setResearch_product_metadata_access_policy(Optional.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
datasource
|
||||||
.map(v->getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
.setResearch_product_access_policy(
|
||||||
.orElse(new ArrayList<>()));
|
Optional
|
||||||
return datasource;
|
.ofNullable(d.getDatabaseaccesstype())
|
||||||
}, Encoders.bean(eu.dnetlib.dhp.skgif.model.Datasource.class) )
|
.map(v -> getResearchProductAccessPolicy(d.getDatabaseaccesstype().getValue()))
|
||||||
.write()
|
.orElse(new ArrayList<>()));
|
||||||
.mode(SaveMode.Overwrite)
|
datasource
|
||||||
.option("compression","gzip")
|
.setResearch_product_metadata_access_policy(
|
||||||
.json(outputPath + "Datasource");
|
Optional
|
||||||
}
|
.ofNullable(d.getResearchproductmetadataaccesspolicies())
|
||||||
|
.map(v -> getResearchProductAccessPolicy(d.getResearchproductmetadataaccesspolicies()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
return datasource;
|
||||||
|
}, Encoders.bean(eu.dnetlib.dhp.skgif.model.Datasource.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "Datasource");
|
||||||
|
}
|
||||||
|
|
||||||
private static List<String> getResearchProductAccessPolicy(List<String> value) {
|
private static List<String> getResearchProductAccessPolicy(List<String> value) {
|
||||||
|
|
||||||
return value.stream().map(v -> getResearchProductAccessPolicy(v)).filter(Objects::nonNull)
|
return value
|
||||||
.map(v -> v.get(0)).distinct().collect(Collectors.toList());
|
.stream()
|
||||||
}
|
.map(v -> getResearchProductAccessPolicy(v))
|
||||||
private static List<String> getResearchProductAccessPolicy(String value) {
|
.filter(Objects::nonNull)
|
||||||
// "databaseaccesstype if open => open access (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
.map(v -> v.get(0))
|
||||||
//if restricted => restricted access (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
.distinct()
|
||||||
//if closed => metadata only access (https://vocabularies.coar-repositories.org/access_rights/c_14cb/) "
|
.collect(Collectors.toList());
|
||||||
switch(value){
|
}
|
||||||
case "open"://(https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
|
||||||
return Arrays.asList("open access");
|
|
||||||
case "restricted"://(https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
|
||||||
return Arrays.asList("restricted access");
|
|
||||||
case "closed"://(https://vocabularies.coar-repositories.org/access_rights/c_14cb/)
|
|
||||||
return Arrays.asList("metadata only access");
|
|
||||||
default:
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<String> getEoscProductType(List<String> researchentitytypes) {
|
private static List<String> getResearchProductAccessPolicy(String value) {
|
||||||
|
// "databaseaccesstype if open => open access (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
||||||
|
// if restricted => restricted access (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
||||||
|
// if closed => metadata only access (https://vocabularies.coar-repositories.org/access_rights/c_14cb/) "
|
||||||
|
switch (value) {
|
||||||
|
case "open":// (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
||||||
|
return Arrays.asList("open access");
|
||||||
|
case "restricted":// (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
||||||
|
return Arrays.asList("restricted access");
|
||||||
|
case "closed":// (https://vocabularies.coar-repositories.org/access_rights/c_14cb/)
|
||||||
|
return Arrays.asList("metadata only access");
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
List<String> eoscProductType = new ArrayList<>();
|
private static List<String> getEoscProductType(List<String> researchentitytypes) {
|
||||||
if(researchentitytypes != null) {
|
|
||||||
|
|
||||||
if (researchentitytypes.contains("Software"))
|
List<String> eoscProductType = new ArrayList<>();
|
||||||
eoscProductType.add("Research Software");
|
if (researchentitytypes != null) {
|
||||||
if (researchentitytypes.contains("Research Publications") || researchentitytypes.contains("Literature"))
|
|
||||||
eoscProductType.add("Research Literature");
|
if (researchentitytypes.contains("Software"))
|
||||||
if (researchentitytypes.contains("Research Data"))
|
eoscProductType.add("Research Software");
|
||||||
eoscProductType.add("Research Data");
|
if (researchentitytypes.contains("Research Publications") || researchentitytypes.contains("Literature"))
|
||||||
if (researchentitytypes.contains("Organization") ||
|
eoscProductType.add("Research Literature");
|
||||||
researchentitytypes.contains("Organizations") ||
|
if (researchentitytypes.contains("Research Data"))
|
||||||
researchentitytypes.contains("Services") ||
|
eoscProductType.add("Research Data");
|
||||||
researchentitytypes.contains("Projects"))
|
if (researchentitytypes.contains("Organization") ||
|
||||||
eoscProductType.add("Other research product");
|
researchentitytypes.contains("Organizations") ||
|
||||||
}
|
researchentitytypes.contains("Services") ||
|
||||||
return eoscProductType;
|
researchentitytypes.contains("Projects"))
|
||||||
}
|
eoscProductType.add("Other research product");
|
||||||
|
}
|
||||||
|
return eoscProductType;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import java.io.StringReader;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import java.util.ArrayList;
|
||||||
import eu.dnetlib.dhp.skgif.model.Grant;
|
import java.util.List;
|
||||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
import java.util.Optional;
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
import java.util.stream.Collectors;
|
||||||
import eu.dnetlib.dhp.skgif.model.RelationType;
|
|
||||||
|
import org.apache.avro.generic.GenericData;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -22,141 +25,185 @@ import org.dom4j.DocumentException;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.Grant;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.Identifier;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.RelationType;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 22/02/24
|
* @Date 22/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpGrant implements Serializable {
|
public class DumpGrant implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpGrant.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpGrant.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
DumpGrant.class
|
DumpGrant.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/dump_grant_parameters.json"));
|
"/eu/dnetlib/dhp/oa/graph/dump/dump_grant_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
Boolean isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
final String inputPath = parser.get("sourcePath");
|
final String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String workingDir = parser.get("workingDir");
|
final String workingDir = parser.get("workingDir");
|
||||||
log.info("workingDir: {}", workingDir);
|
log.info("workingDir: {}", workingDir);
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath + "Grant");
|
Utils.removeOutputDir(spark, outputPath + "Grant");
|
||||||
|
|
||||||
mapGrants(spark, inputPath, outputPath);
|
mapGrants(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void mapGrants(SparkSession spark, String inputPath, String outputPath) {
|
private static void mapGrants(SparkSession spark, String inputPath, String outputPath) {
|
||||||
Dataset<Project> projects = Utils.readPath(spark, inputPath + "project", Project.class)
|
Dataset<Project> projects = Utils
|
||||||
.filter((FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference() &&
|
.readPath(spark, inputPath + "project", Project.class)
|
||||||
!p.getDataInfo().getInvisible());
|
.filter(
|
||||||
Dataset<Relation> relations = Utils.readPath(spark, inputPath + "relation", Relation.class)
|
(FilterFunction<Project>) p -> !p.getDataInfo().getDeletedbyinference() &&
|
||||||
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
!p.getDataInfo().getInvisible());
|
||||||
!r.getDataInfo().getInvisible() &&
|
Dataset<Relation> relations = Utils
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
.readPath(spark, inputPath + "relation", Relation.class)
|
||||||
projects.joinWith(relations, projects.col("id").equalTo(relations.col("target")), "left")
|
.filter(
|
||||||
.groupByKey((MapFunction<Tuple2<Project, Relation>, String>) t2 -> t2._1().getId(), Encoders.STRING() )
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Relation>, Grant>) (k,v) ->{
|
!r.getDataInfo().getInvisible() &&
|
||||||
Grant g = new Grant();
|
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
||||||
Tuple2<Project, Relation> first = v.next();
|
projects
|
||||||
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
.joinWith(relations, projects.col("id").equalTo(relations.col("target")), "left")
|
||||||
g.setIdentifiers(getProjectIdentifier(first._1()));
|
.groupByKey((MapFunction<Tuple2<Project, Relation>, String>) t2 -> t2._1().getId(), Encoders.STRING())
|
||||||
g.setTitle(first._1().getTitle().getValue());
|
.mapGroups((MapGroupsFunction<String, Tuple2<Project, Relation>, Grant>) (k, v) -> {
|
||||||
g.setSummary(Optional.ofNullable(first._1().getSummary())
|
Grant g = new Grant();
|
||||||
.map(value->value.getValue()).orElse(new String()));
|
Tuple2<Project, Relation> first = v.next();
|
||||||
g.setAcronym(Optional.ofNullable(first._1().getAcronym())
|
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
||||||
.map(value->value.getValue()).orElse(new String()));
|
g.setIdentifiers(getProjectIdentifier(first._1()));
|
||||||
g.setFunder(getFunderName(first._1().getFundingtree().get(0).getValue()));
|
g.setTitle(first._1().getTitle().getValue());
|
||||||
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
g
|
||||||
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
|
.setSummary(
|
||||||
g.setCurrency(Optional.ofNullable(first._1().getCurrency())
|
Optional
|
||||||
.map(value -> value.getValue()).orElse(new String()));
|
.ofNullable(first._1().getSummary())
|
||||||
g.setFunded_amount(Optional.ofNullable(first._1().getFundedamount())
|
.map(value -> value.getValue())
|
||||||
.orElse(null));
|
.orElse(new String()));
|
||||||
g.setKeywords(first._1().getSubjects()
|
g
|
||||||
.stream().map(s -> s.getValue()).collect(Collectors.toList()));
|
.setAcronym(
|
||||||
g.setStart_date(Optional.ofNullable(first._1().getStartdate())
|
Optional
|
||||||
.map(value -> value.getValue()).orElse(new String()));
|
.ofNullable(first._1().getAcronym())
|
||||||
g.setEnd_date(Optional.ofNullable(first._1().getEnddate())
|
.map(value -> value.getValue())
|
||||||
.map(value -> value.getValue()).orElse(new String()));
|
.orElse(new String()));
|
||||||
g.setWebsite(Optional.ofNullable(first._1().getWebsiteurl())
|
g.setFunder(getFunderName(first._1().getFundingtree().get(0).getValue()));
|
||||||
.map(value -> value.getValue()).orElse(new String()));
|
// * private String funding_stream;// fundingtree to be used the xpath //funding_level_[n]
|
||||||
if(Optional.ofNullable(first._2()).isPresent()) {
|
g.setFunding_stream(getFundingStream(first._1().getFundingtree().get(0).getValue()));
|
||||||
List<String> relevantOrganizatios = new ArrayList<>();
|
g
|
||||||
relevantOrganizatios.add(Utils.getIdentifier(Prefixes.ORGANIZATION, first._2().getSource()));
|
.setCurrency(
|
||||||
v.forEachRemaining(t2 -> relevantOrganizatios.add(Utils.getIdentifier(Prefixes.ORGANIZATION, t2._2().getSource())));
|
Optional
|
||||||
g.setBeneficiaries(relevantOrganizatios);
|
.ofNullable(first._1().getCurrency())
|
||||||
}
|
.map(value -> value.getValue())
|
||||||
return g;
|
.orElse(new String()));
|
||||||
} , Encoders.bean(Grant.class))
|
g
|
||||||
.write()
|
.setFunded_amount(
|
||||||
.mode(SaveMode.Overwrite)
|
Optional
|
||||||
.option("compression","gzip")
|
.ofNullable(first._1().getFundedamount())
|
||||||
.json(outputPath + "Grant");
|
.orElse(null));
|
||||||
}
|
g
|
||||||
|
.setKeywords(
|
||||||
|
first
|
||||||
|
._1()
|
||||||
|
.getSubjects()
|
||||||
|
.stream()
|
||||||
|
.map(s -> s.getValue())
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
g
|
||||||
|
.setStart_date(
|
||||||
|
Optional
|
||||||
|
.ofNullable(first._1().getStartdate())
|
||||||
|
.map(value -> value.getValue())
|
||||||
|
.orElse(new String()));
|
||||||
|
g
|
||||||
|
.setEnd_date(
|
||||||
|
Optional
|
||||||
|
.ofNullable(first._1().getEnddate())
|
||||||
|
.map(value -> value.getValue())
|
||||||
|
.orElse(new String()));
|
||||||
|
g
|
||||||
|
.setWebsite(
|
||||||
|
Optional
|
||||||
|
.ofNullable(first._1().getWebsiteurl())
|
||||||
|
.map(value -> value.getValue())
|
||||||
|
.orElse(new String()));
|
||||||
|
if (Optional.ofNullable(first._2()).isPresent()) {
|
||||||
|
List<String> relevantOrganizatios = new ArrayList<>();
|
||||||
|
relevantOrganizatios.add(Utils.getIdentifier(Prefixes.ORGANIZATION, first._2().getSource()));
|
||||||
|
v
|
||||||
|
.forEachRemaining(
|
||||||
|
t2 -> relevantOrganizatios
|
||||||
|
.add(Utils.getIdentifier(Prefixes.ORGANIZATION, t2._2().getSource())));
|
||||||
|
g.setBeneficiaries(relevantOrganizatios);
|
||||||
|
}
|
||||||
|
return g;
|
||||||
|
}, Encoders.bean(Grant.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "Grant");
|
||||||
|
}
|
||||||
|
|
||||||
private static String getFundingStream(String fundingtree) throws DocumentException {
|
private static String getFundingStream(String fundingtree) throws DocumentException {
|
||||||
final Document doc;
|
final Document doc;
|
||||||
|
|
||||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
if(Optional.ofNullable(doc.selectNodes("//funding_level_0")).isPresent() &&
|
if (Optional.ofNullable(doc.selectNodes("//funding_level_0")).isPresent() &&
|
||||||
doc.selectNodes("//funding_level_0").size() > 0)
|
doc.selectNodes("//funding_level_0").size() > 0)
|
||||||
return ((org.dom4j.Node) (doc.selectNodes("//funding_level_0").get(0))).getText();
|
return ((org.dom4j.Node) (doc.selectNodes("//funding_level_0").get(0))).getText();
|
||||||
return new String();
|
return new String();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getFunderName(String fundingtree) throws DocumentException {
|
private static String getFunderName(String fundingtree) throws DocumentException {
|
||||||
final Document doc;
|
final Document doc;
|
||||||
|
|
||||||
doc = new SAXReader().read(new StringReader(fundingtree));
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
// f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
// f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
return ((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText();
|
return ((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText();
|
||||||
//f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
// f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Identifier> getProjectIdentifier(Project project) {
|
private static List<Identifier> getProjectIdentifier(Project project) throws DocumentException {
|
||||||
if (project.getPid().size() > 0 )
|
List<Identifier> identifiers = new ArrayList<>();
|
||||||
return project.getPid().stream().map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue())).collect(Collectors.toList());
|
if (project.getPid().size() > 0)
|
||||||
return new ArrayList<>();
|
project
|
||||||
// private List<Identifier> identifiers;//.schema pid.qualifier.classid identifiers.value pid.value
|
.getPid()
|
||||||
//identifiers.schema funder acronym to be used the xpath //fundingtree/funder/shortname
|
.stream()
|
||||||
//identifiers.value project.code
|
.forEach(p -> identifiers.add(Identifier.newInstance(p.getQualifier().getClassid(), p.getValue())));
|
||||||
|
identifiers
|
||||||
|
.add(
|
||||||
|
Identifier
|
||||||
|
.newInstance(
|
||||||
|
getFunderName(project.getFundingtree().get(0).getValue()), project.getCode().getValue()));
|
||||||
|
return identifiers;
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
|
||||||
import eu.dnetlib.dhp.skgif.model.Identifier;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.skgif.model.OrganizationTypes;
|
import java.util.Optional;
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -16,95 +18,117 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import java.util.Optional;
|
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||||
import java.util.stream.Collectors;
|
import eu.dnetlib.dhp.skgif.model.Identifier;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.OrganizationTypes;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 21/02/24
|
* @Date 21/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpOrganization implements Serializable {
|
public class DumpOrganization implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpOrganization.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpOrganization.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
DumpOrganization.class
|
DumpOrganization.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/dump_organization_parameters.json"));
|
"/eu/dnetlib/dhp/oa/graph/dump/dump_organization_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
Boolean isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
final String inputPath = parser.get("sourcePath");
|
final String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath + "Organization");
|
Utils.removeOutputDir(spark, outputPath + "Organization");
|
||||||
|
|
||||||
mapOrganization(spark, inputPath, outputPath);
|
mapOrganization(spark, inputPath, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void mapOrganization(SparkSession spark, String inputPath, String outputPath) {
|
private static void mapOrganization(SparkSession spark, String inputPath, String outputPath) {
|
||||||
Dataset<Organization> organizations = Utils.readPath(spark, inputPath + "organization", Organization.class);
|
Dataset<Organization> organizations = Utils.readPath(spark, inputPath + "organization", Organization.class);
|
||||||
organizations.filter((FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference() && !o.getDataInfo().getInvisible())
|
organizations
|
||||||
.map((MapFunction<Organization, eu.dnetlib.dhp.skgif.model.Organization>) o -> {
|
.filter(
|
||||||
eu.dnetlib.dhp.skgif.model.Organization organization = new eu.dnetlib.dhp.skgif.model.Organization();
|
(FilterFunction<Organization>) o -> !o.getDataInfo().getDeletedbyinference()
|
||||||
organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
&& !o.getDataInfo().getInvisible())
|
||||||
organization.setCountry(Optional.ofNullable(o.getCountry().getClassid())
|
.map((MapFunction<Organization, eu.dnetlib.dhp.skgif.model.Organization>) o -> {
|
||||||
.orElse(new String()));
|
eu.dnetlib.dhp.skgif.model.Organization organization = new eu.dnetlib.dhp.skgif.model.Organization();
|
||||||
organization.setName(Optional.ofNullable(o.getLegalname().getValue())
|
organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
||||||
.orElse(new String()));
|
organization
|
||||||
organization.setShort_name(Optional.ofNullable(o.getLegalshortname())
|
.setCountry(
|
||||||
.map(v-> v.getValue())
|
Optional
|
||||||
.orElse(new String()));
|
.ofNullable(o.getCountry().getClassid())
|
||||||
organization.setIdentifiers(o.getPid()
|
.orElse(new String()));
|
||||||
.stream()
|
organization
|
||||||
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
.setName(
|
||||||
.collect(Collectors.toList()));
|
Optional
|
||||||
organization.setOther_names(o.getAlternativeNames().stream()
|
.ofNullable(o.getLegalname().getValue())
|
||||||
.map(a -> a.getValue())
|
.orElse(new String()));
|
||||||
.collect(Collectors.toList()));
|
organization
|
||||||
organization.setType(getOrganizationType(o));
|
.setShort_name(
|
||||||
return organization;
|
Optional
|
||||||
}
|
.ofNullable(o.getLegalshortname())
|
||||||
, Encoders.bean(eu.dnetlib.dhp.skgif.model.Organization.class))
|
.map(v -> v.getValue())
|
||||||
.write()
|
.orElse(new String()));
|
||||||
.mode(SaveMode.Overwrite)
|
organization
|
||||||
.option("compression","gzip")
|
.setIdentifiers(
|
||||||
.json(outputPath + "Organization");
|
o
|
||||||
}
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.map(p -> Identifier.newInstance(p.getQualifier().getClassid(), p.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
organization
|
||||||
|
.setOther_names(
|
||||||
|
o
|
||||||
|
.getAlternativeNames()
|
||||||
|
.stream()
|
||||||
|
.map(a -> a.getValue())
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
organization.setType(getOrganizationType(o));
|
||||||
|
return organization;
|
||||||
|
}, Encoders.bean(eu.dnetlib.dhp.skgif.model.Organization.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(outputPath + "Organization");
|
||||||
|
}
|
||||||
|
|
||||||
private static String getOrganizationType(Organization o) {
|
private static String getOrganizationType(Organization o) {
|
||||||
if(Optional.ofNullable(o.getEcenterprise()).isPresent() && o.getEcenterprise().getValue().equalsIgnoreCase("true"))
|
if (Optional.ofNullable(o.getEcenterprise()).isPresent()
|
||||||
return OrganizationTypes.COMPANY.label;
|
&& o.getEcenterprise().getValue().equalsIgnoreCase("true"))
|
||||||
if(Optional.ofNullable(o.getEchighereducation()).isPresent() && o.getEchighereducation().getValue().equalsIgnoreCase("true"))
|
return OrganizationTypes.COMPANY.label;
|
||||||
return OrganizationTypes.EDUCATION.label;
|
if (Optional.ofNullable(o.getEchighereducation()).isPresent()
|
||||||
if(Optional.ofNullable(o.getEcresearchorganization()).isPresent() && o.getEcresearchorganization().getValue().equalsIgnoreCase("true"))
|
&& o.getEchighereducation().getValue().equalsIgnoreCase("true"))
|
||||||
return OrganizationTypes.EDUCATION.label;
|
return OrganizationTypes.EDUCATION.label;
|
||||||
if(Optional.ofNullable(o.getEcnonprofit()).isPresent() && o.getEcnonprofit().getValue().equalsIgnoreCase("true"))
|
if (Optional.ofNullable(o.getEcresearchorganization()).isPresent()
|
||||||
return OrganizationTypes.NONPROFIT.label;
|
&& o.getEcresearchorganization().getValue().equalsIgnoreCase("true"))
|
||||||
|
return OrganizationTypes.EDUCATION.label;
|
||||||
|
if (Optional.ofNullable(o.getEcnonprofit()).isPresent()
|
||||||
|
&& o.getEcnonprofit().getValue().equalsIgnoreCase("true"))
|
||||||
|
return OrganizationTypes.NONPROFIT.label;
|
||||||
|
|
||||||
return OrganizationTypes.OTHER.label;
|
return OrganizationTypes.OTHER.label;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,11 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.PartialResearchProduct;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -22,9 +17,13 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.PartialResearchProduct;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
||||||
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
import eu.dnetlib.dhp.skgif.model.*;
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
import eu.dnetlib.dhp.skgif.model.AccessRight;
|
import eu.dnetlib.dhp.skgif.model.AccessRight;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
@ -106,15 +105,20 @@ public class DumpResult implements Serializable {
|
||||||
Dataset<Datasource> datasource = Utils
|
Dataset<Datasource> datasource = Utils
|
||||||
.readPath(spark, inputPath + "/datasource", Datasource.class)
|
.readPath(spark, inputPath + "/datasource", Datasource.class)
|
||||||
.filter(
|
.filter(
|
||||||
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
|
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEoscdatasourcetype()).isPresent() &&
|
||||||
d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"));
|
d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"));
|
||||||
|
|
||||||
Dataset<EmitPerManifestation> man = Utils
|
Dataset<EmitPerManifestation> man = Utils
|
||||||
.readPath(spark, workingDir + e.name() + "/manifestation", EmitPerManifestation.class);
|
.readPath(spark, workingDir + e.name() + "/manifestation", EmitPerManifestation.class);
|
||||||
|
|
||||||
Dataset<PartialResearchProduct> partialResearchProduct = man.joinWith(datasource, man.col("instance.hostedby.key").equalTo(datasource.col("id")), "left")
|
Dataset<PartialResearchProduct> partialResearchProduct = man
|
||||||
.groupByKey((MapFunction<Tuple2<EmitPerManifestation, Datasource>, String>) t2 -> t2._1().getResultId(), Encoders.STRING())
|
.joinWith(datasource, man.col("instance.hostedby.key").equalTo(datasource.col("id")), "left")
|
||||||
.mapGroups((MapGroupsFunction<String, Tuple2<EmitPerManifestation, Datasource>, PartialResearchProduct>) (k, v) -> {
|
.groupByKey(
|
||||||
|
(MapFunction<Tuple2<EmitPerManifestation, Datasource>, String>) t2 -> t2._1().getResultId(),
|
||||||
|
Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, Tuple2<EmitPerManifestation, Datasource>, PartialResearchProduct>) (
|
||||||
|
k, v) -> {
|
||||||
PartialResearchProduct prp = new PartialResearchProduct();
|
PartialResearchProduct prp = new PartialResearchProduct();
|
||||||
prp.setResultId(k);
|
prp.setResultId(k);
|
||||||
List<Manifestation> manifestationList = new ArrayList<>();
|
List<Manifestation> manifestationList = new ArrayList<>();
|
||||||
|
@ -124,10 +128,13 @@ public class DumpResult implements Serializable {
|
||||||
return prp;
|
return prp;
|
||||||
}, Encoders.bean(PartialResearchProduct.class));
|
}, Encoders.bean(PartialResearchProduct.class));
|
||||||
partialResearchProduct
|
partialResearchProduct
|
||||||
.joinWith(aggRelations, partialResearchProduct.col("resultId").equalTo(aggRelations.col("resultId")), "left")
|
.joinWith(
|
||||||
.map((MapFunction<Tuple2<PartialResearchProduct, RelationPerProduct>, PartialResearchProduct>) t2 -> {
|
aggRelations, partialResearchProduct.col("resultId").equalTo(aggRelations.col("resultId")),
|
||||||
|
"left")
|
||||||
|
.map(
|
||||||
|
(MapFunction<Tuple2<PartialResearchProduct, RelationPerProduct>, PartialResearchProduct>) t2 -> {
|
||||||
PartialResearchProduct prp = t2._1();
|
PartialResearchProduct prp = t2._1();
|
||||||
if(Optional.ofNullable(t2._2()).isPresent()){
|
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||||
prp.setRelated_products(t2._2().getRelatedProduct());
|
prp.setRelated_products(t2._2().getRelatedProduct());
|
||||||
prp.setRelevant_organizations(t2._2().getOrganizations());
|
prp.setRelevant_organizations(t2._2().getOrganizations());
|
||||||
prp.setFunding(t2._2().getFunding());
|
prp.setFunding(t2._2().getFunding());
|
||||||
|
@ -144,148 +151,83 @@ public class DumpResult implements Serializable {
|
||||||
|
|
||||||
private static Manifestation getManifestation(Tuple2<EmitPerManifestation, Datasource> t2) {
|
private static Manifestation getManifestation(Tuple2<EmitPerManifestation, Datasource> t2) {
|
||||||
|
|
||||||
// se il lato sinistro c'e' allora ho la biblio e la venue
|
// se il lato sinistro c'e' allora ho la biblio e la venue
|
||||||
// se non c'e' allora ho solo gli altri valori
|
// se non c'e' allora ho solo gli altri valori
|
||||||
EmitPerManifestation epm = t2._1();
|
EmitPerManifestation epm = t2._1();
|
||||||
Manifestation manifestation = new Manifestation();
|
Manifestation manifestation = new Manifestation();
|
||||||
manifestation.setProduct_local_type(epm.getInstance().getInstancetype().getClassname());
|
manifestation.setProduct_local_type(epm.getInstance().getInstancetype().getClassname());
|
||||||
manifestation.setProduct_local_type_schema(epm.getInstance().getInstancetype().getSchemename());
|
manifestation.setProduct_local_type_schema(epm.getInstance().getInstancetype().getSchemename());
|
||||||
if(Optional.ofNullable(epm.getInstance().getDateofacceptance()).isPresent())
|
if (Optional.ofNullable(epm.getInstance().getDateofacceptance()).isPresent())
|
||||||
manifestation
|
manifestation
|
||||||
.setDates(
|
.setDates(
|
||||||
Arrays
|
Arrays
|
||||||
.asList(
|
.asList(
|
||||||
Dates.newInstance(epm.getInstance().getDateofacceptance().getValue(), "publishing")));
|
Dates.newInstance(epm.getInstance().getDateofacceptance().getValue(), "publishing")));
|
||||||
if (Optional.ofNullable(epm.getInstance().getRefereed()).isPresent())
|
if (Optional.ofNullable(epm.getInstance().getRefereed()).isPresent())
|
||||||
switch (epm.getInstance().getRefereed().getClassid()) {
|
switch (epm.getInstance().getRefereed().getClassid()) {
|
||||||
case "0000":
|
case "0000":
|
||||||
manifestation.setPeer_review(PeerReview.UNAVAILABLE.label);
|
manifestation.setPeer_review(PeerReview.UNAVAILABLE.label);
|
||||||
break;
|
break;
|
||||||
case "0001":
|
case "0001":
|
||||||
manifestation.setPeer_review(PeerReview.PEER_REVIEWED.label);
|
manifestation.setPeer_review(PeerReview.PEER_REVIEWED.label);
|
||||||
break;
|
break;
|
||||||
case "0002":
|
case "0002":
|
||||||
manifestation.setPeer_review(PeerReview.NON_PEER_REVIEWED.label);
|
manifestation.setPeer_review(PeerReview.NON_PEER_REVIEWED.label);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
manifestation.setMetadata_curation("unavailable");
|
manifestation.setMetadata_curation("unavailable");
|
||||||
if (Optional.ofNullable(epm.getInstance().getAccessright()).isPresent())
|
if (Optional.ofNullable(epm.getInstance().getAccessright()).isPresent())
|
||||||
switch (epm.getInstance().getAccessright().getClassid()) {
|
switch (epm.getInstance().getAccessright().getClassid()) {
|
||||||
case "OPEN":
|
case "OPEN":
|
||||||
case "OPEN DATA":
|
case "OPEN DATA":
|
||||||
case "OPEN SOURCE":
|
case "OPEN SOURCE":
|
||||||
manifestation.setAccess_right(AccessRight.OPEN.label);
|
manifestation.setAccess_right(AccessRight.OPEN.label);
|
||||||
break;
|
break;
|
||||||
case "CLOSED":
|
case "CLOSED":
|
||||||
manifestation.setAccess_right(AccessRight.CLOSED.label);
|
manifestation.setAccess_right(AccessRight.CLOSED.label);
|
||||||
break;
|
break;
|
||||||
case "RESTRICTED":
|
case "RESTRICTED":
|
||||||
manifestation.setAccess_right(AccessRight.RESTRICTED.label);
|
manifestation.setAccess_right(AccessRight.RESTRICTED.label);
|
||||||
break;
|
break;
|
||||||
case "EMBARGO":
|
case "EMBARGO":
|
||||||
case "12MONTHS":
|
case "12MONTHS":
|
||||||
case "6MONTHS":
|
case "6MONTHS":
|
||||||
manifestation.setAccess_right(AccessRight.EMBARGO.label);
|
manifestation.setAccess_right(AccessRight.EMBARGO.label);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
manifestation.setAccess_right(AccessRight.UNAVAILABLE.label);
|
manifestation.setAccess_right(AccessRight.UNAVAILABLE.label);
|
||||||
|
|
||||||
}
|
}
|
||||||
manifestation.setLicence(Optional.ofNullable(epm.getInstance().getLicense())
|
manifestation
|
||||||
.map(value -> value.getValue())
|
.setLicence(
|
||||||
.orElse(null));
|
Optional
|
||||||
manifestation.setUrl(Optional.ofNullable(epm.getInstance().getUrl())
|
.ofNullable(epm.getInstance().getLicense())
|
||||||
.map(value -> value.get(0))
|
.map(value -> value.getValue())
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
|
manifestation
|
||||||
|
.setUrl(
|
||||||
|
Optional
|
||||||
|
.ofNullable(epm.getInstance().getUrl())
|
||||||
|
.map(value -> value.get(0))
|
||||||
|
.orElse(null));
|
||||||
|
|
||||||
if (Optional.ofNullable(epm.getInstance().getPid()).isPresent()) {
|
if (Optional.ofNullable(epm.getInstance().getPid()).isPresent() && epm.getInstance().getPid().size() > 0) {
|
||||||
manifestation.setPid(epm.getInstance().getPid().get(0).getValue());
|
manifestation.setPid(epm.getInstance().getPid().get(0).getValue());
|
||||||
}
|
}
|
||||||
if (Optional.ofNullable(t2._2()).isPresent()) {
|
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||||
manifestation.setBiblio(getBiblio(epm));
|
manifestation.setBiblio(getBiblio(epm));
|
||||||
if (Optional.ofNullable(t2._2().getJournal().getIssnPrinted()).isPresent())
|
if (Optional.ofNullable(t2._2().getJournal().getIssnPrinted()).isPresent())
|
||||||
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()));
|
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnPrinted()));
|
||||||
else if(Optional.ofNullable(t2._2().getJournal().getIssnOnline()).isPresent())
|
else if (Optional.ofNullable(t2._2().getJournal().getIssnOnline()).isPresent())
|
||||||
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()));
|
manifestation.setVenue(Utils.getIdentifier(Prefixes.VENUE, t2._1().getJournal().getIssnOnline()));
|
||||||
}
|
}
|
||||||
manifestation
|
manifestation
|
||||||
.setHosting_datasource(Utils.getIdentifier(Prefixes.DATASOURCE,epm.getInstance().getHostedby().getKey()));
|
.setHosting_datasource(Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()));
|
||||||
|
|
||||||
return manifestation;
|
return manifestation;
|
||||||
}
|
}
|
||||||
|
|
||||||
// private static List<Manifestation> getManifestationList(Dataset<EmitPerManifestation> emitformanifestation,
|
|
||||||
// Dataset<Datasource> datasource) {
|
|
||||||
// return emitformanifestation
|
|
||||||
// .joinWith(
|
|
||||||
// datasource, emitformanifestation
|
|
||||||
// .col("hostedBy")
|
|
||||||
// .equalTo(datasource.col("id")),
|
|
||||||
// "left")
|
|
||||||
// .map((MapFunction<Tuple2<EmitPerManifestation, Datasource>, Manifestation>) t2 -> {
|
|
||||||
// // se il lato sinistro c'e' allora ho la biblio e la venue
|
|
||||||
// // se non c'e' allora ho solo gli altri valori
|
|
||||||
// EmitPerManifestation epm = t2._1();
|
|
||||||
// Manifestation manifestation = new Manifestation();
|
|
||||||
// manifestation.setProduct_local_type_schema(epm.getInstance().getInstancetype().getClassname());
|
|
||||||
// manifestation.setProduct_local_type_schema(epm.getInstance().getInstancetype().getSchemename());
|
|
||||||
// manifestation
|
|
||||||
// .setDates(
|
|
||||||
// Arrays
|
|
||||||
// .asList(
|
|
||||||
// Dates.newInstance(epm.getInstance().getDateofacceptance().getValue(), "publishing")));
|
|
||||||
// if (Optional.ofNullable(epm.getInstance().getRefereed()).isPresent())
|
|
||||||
// switch (epm.getInstance().getRefereed().getClassid()) {
|
|
||||||
// case "0000":
|
|
||||||
// manifestation.setPeer_review(PeerReview.UNAVAILABLE.label);
|
|
||||||
// break;
|
|
||||||
// case "0001":
|
|
||||||
// manifestation.setPeer_review(PeerReview.PEER_REVIEWED.label);
|
|
||||||
// break;
|
|
||||||
// case "0002":
|
|
||||||
// manifestation.setPeer_review(PeerReview.NON_PEER_REVIEWED.label);
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// manifestation.setMetadata_curation("unavailable");
|
|
||||||
// if (Optional.ofNullable(epm.getInstance().getAccessright()).isPresent())
|
|
||||||
// switch (epm.getInstance().getAccessright().getClassid()) {
|
|
||||||
// case "OPEN":
|
|
||||||
// case "OPEN DATA":
|
|
||||||
// case "OPEN SOURCE":
|
|
||||||
// manifestation.setAccess_right(AccessRight.OPEN.label);
|
|
||||||
// break;
|
|
||||||
// case "CLOSED":
|
|
||||||
// manifestation.setAccess_right(AccessRight.CLOSED.label);
|
|
||||||
// break;
|
|
||||||
// case "RESTRICTED":
|
|
||||||
// manifestation.setAccess_right(AccessRight.RESTRICTED.label);
|
|
||||||
// break;
|
|
||||||
// case "EMBARGO":
|
|
||||||
// case "12MONTHS":
|
|
||||||
// case "6MONTHS":
|
|
||||||
// manifestation.setAccess_right(AccessRight.EMBARGO.label);
|
|
||||||
// break;
|
|
||||||
// default:
|
|
||||||
// manifestation.setAccess_right(AccessRight.UNAVAILABLE.label);
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
// manifestation.setLicence(epm.getInstance().getLicense().getValue());
|
|
||||||
// manifestation.setUrl(epm.getInstance().getUrl().get(0));
|
|
||||||
// if (Optional.ofNullable(epm.getInstance().getPid()).isPresent()) {
|
|
||||||
// manifestation.setPid(epm.getInstance().getPid().get(0).getValue());
|
|
||||||
// }
|
|
||||||
// if (Optional.ofNullable(t2._2()).isPresent())
|
|
||||||
// manifestation.setBiblio(getBiblio(epm));
|
|
||||||
// manifestation.setVenue("venue_______::" + DHPUtils.md5(epm.getInstance().getHostedby().getKey()));
|
|
||||||
// manifestation
|
|
||||||
// .setHosting_datasource("datasource__::" + DHPUtils.md5(epm.getInstance().getHostedby().getKey()));
|
|
||||||
// return manifestation;
|
|
||||||
// }, Encoders.bean(Manifestation.class))
|
|
||||||
// .collectAsList();
|
|
||||||
// }
|
|
||||||
|
|
||||||
private static Biblio getBiblio(EmitPerManifestation epm) {
|
private static Biblio getBiblio(EmitPerManifestation epm) {
|
||||||
Biblio biblio = new Biblio();
|
Biblio biblio = new Biblio();
|
||||||
biblio.setEdition(epm.getJournal().getEdition());
|
biblio.setEdition(epm.getJournal().getEdition());
|
||||||
|
@ -298,7 +240,7 @@ public class DumpResult implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <R extends Result> void dumpResult(SparkSession spark, String inputPath, String workingDir,
|
private static <R extends Result> void dumpResult(SparkSession spark, String inputPath, String workingDir,
|
||||||
String outputPath) {
|
String outputPath) {
|
||||||
ModelSupport.entityTypes
|
ModelSupport.entityTypes
|
||||||
.keySet()
|
.keySet()
|
||||||
.parallelStream()
|
.parallelStream()
|
||||||
|
@ -314,14 +256,14 @@ public class DumpResult implements Serializable {
|
||||||
.joinWith(prr, results.col("id").equalTo(prr.col("resultId")), "left")
|
.joinWith(prr, results.col("id").equalTo(prr.col("resultId")), "left")
|
||||||
.map((MapFunction<Tuple2<R, PartialResearchProduct>, ResearchProduct>) t2 -> {
|
.map((MapFunction<Tuple2<R, PartialResearchProduct>, ResearchProduct>) t2 -> {
|
||||||
ResearchProduct rp = ResultMapper.map(t2._1());
|
ResearchProduct rp = ResultMapper.map(t2._1());
|
||||||
if(Optional.ofNullable(t2._2()).isPresent()) {
|
if (Optional.ofNullable(t2._2()).isPresent()) {
|
||||||
if(Optional.ofNullable(t2._2().getRelated_products()).isPresent())
|
if (Optional.ofNullable(t2._2().getRelated_products()).isPresent())
|
||||||
rp.setRelated_products(t2._2().getRelated_products());
|
rp.setRelated_products(t2._2().getRelated_products());
|
||||||
if(Optional.ofNullable(t2._2().getFunding()).isPresent())
|
if (Optional.ofNullable(t2._2().getFunding()).isPresent())
|
||||||
rp.setFunding(t2._2().getFunding());
|
rp.setFunding(t2._2().getFunding());
|
||||||
if(Optional.ofNullable(t2._2().getRelevant_organizations()).isPresent())
|
if (Optional.ofNullable(t2._2().getRelevant_organizations()).isPresent())
|
||||||
rp.setRelevant_organizations(t2._2().getRelevant_organizations());
|
rp.setRelevant_organizations(t2._2().getRelevant_organizations());
|
||||||
if(Optional.ofNullable(t2._2().getManifestations()).isPresent())
|
if (Optional.ofNullable(t2._2().getManifestations()).isPresent())
|
||||||
rp.setManifestations(t2._2().getManifestations());
|
rp.setManifestations(t2._2().getManifestations());
|
||||||
}
|
}
|
||||||
return rp;
|
return rp;
|
||||||
|
@ -333,30 +275,37 @@ public class DumpResult implements Serializable {
|
||||||
|
|
||||||
});
|
});
|
||||||
Dataset<ResearchProduct> researchProducts = spark.emptyDataset(Encoders.bean(ResearchProduct.class));
|
Dataset<ResearchProduct> researchProducts = spark.emptyDataset(Encoders.bean(ResearchProduct.class));
|
||||||
for(EntityType e : ModelSupport.entityTypes.keySet()) {
|
for (EntityType e : ModelSupport.entityTypes.keySet()) {
|
||||||
if(ModelSupport.isResult(e))
|
if (ModelSupport.isResult(e))
|
||||||
researchProducts = researchProducts.union(Utils.readPath(spark,workingDir + e.name() + "/researchproduct", ResearchProduct.class));
|
researchProducts = researchProducts
|
||||||
}
|
.union(Utils.readPath(spark, workingDir + e.name() + "/researchproduct", ResearchProduct.class));
|
||||||
|
}
|
||||||
researchProducts
|
researchProducts
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.option("compression","gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath + "ResearchProduct");
|
.json(outputPath + "ResearchProduct");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void selectRelations(SparkSession spark, String inputPath, String workingDir) {
|
private static void selectRelations(SparkSession spark, String inputPath, String workingDir) {
|
||||||
Dataset<Relation> relation = Utils.readPath(spark,
|
Dataset<Relation> relation = Utils
|
||||||
inputPath + "relation", Relation.class)
|
.readPath(
|
||||||
.filter((FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
spark,
|
||||||
!r.getDataInfo().getInvisible())
|
inputPath + "relation", Relation.class)
|
||||||
.filter((FilterFunction<Relation>) r -> r.getRelClass().equalsIgnoreCase(RelationType.RESULT_AFFILIATIED_TO_ORGANIZATION.label) ||
|
.filter(
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.RESULT_OUTCOME_FUNDING.label) ||
|
(FilterFunction<Relation>) r -> !r.getDataInfo().getDeletedbyinference() &&
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.SUPPLEMENT.label) ||
|
!r.getDataInfo().getInvisible())
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.DOCUMENTS.label)||
|
.filter(
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.PART.label) ||
|
(FilterFunction<Relation>) r -> r
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.VERSION.label) ||
|
.getRelClass()
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.CITATION.label));
|
.equalsIgnoreCase(RelationType.RESULT_AFFILIATIED_TO_ORGANIZATION.label) ||
|
||||||
|
r.getRelClass().equalsIgnoreCase(RelationType.RESULT_OUTCOME_FUNDING.label) ||
|
||||||
|
r.getRelClass().equalsIgnoreCase(RelationType.SUPPLEMENT.label) ||
|
||||||
|
r.getRelClass().equalsIgnoreCase(RelationType.DOCUMENTS.label) ||
|
||||||
|
r.getRelClass().equalsIgnoreCase(RelationType.PART.label) ||
|
||||||
|
r.getRelClass().equalsIgnoreCase(RelationType.VERSION.label) ||
|
||||||
|
r.getRelClass().equalsIgnoreCase(RelationType.CITATION.label));
|
||||||
|
|
||||||
relation
|
relation
|
||||||
.groupByKey((MapFunction<Relation, String>) r -> r.getSource(), Encoders.STRING())
|
.groupByKey((MapFunction<Relation, String>) r -> r.getSource(), Encoders.STRING())
|
||||||
|
@ -373,12 +322,14 @@ public class DumpResult implements Serializable {
|
||||||
rpp.getOrganizations().add(Utils.getIdentifier(Prefixes.ORGANIZATION, target));
|
rpp.getOrganizations().add(Utils.getIdentifier(Prefixes.ORGANIZATION, target));
|
||||||
break;
|
break;
|
||||||
case "isproducedby":
|
case "isproducedby":
|
||||||
rpp.getFunding().add(Utils.getIdentifier(Prefixes.GRANT ,target));
|
rpp.getFunding().add(Utils.getIdentifier(Prefixes.GRANT, target));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (!remainignRelations.keySet().contains(relClass))
|
if (!remainignRelations.keySet().contains(relClass))
|
||||||
remainignRelations.put(relClass, new ArrayList<>());
|
remainignRelations.put(relClass, new ArrayList<>());
|
||||||
remainignRelations.get(relClass).add(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, target));
|
remainignRelations
|
||||||
|
.get(relClass)
|
||||||
|
.add(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, target));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (String key : remainignRelations.keySet())
|
for (String key : remainignRelations.keySet())
|
||||||
|
|
|
@ -1,156 +1,179 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.skgif.model.*;
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.api.java.function.MapGroupsFunction;
|
import org.apache.spark.api.java.function.MapGroupsFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SaveMode;
|
import org.apache.spark.sql.SaveMode;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import java.util.*;
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
||||||
import java.util.stream.Collectors;
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Journal;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 21/02/24
|
* @Date 21/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpVenue implements Serializable {
|
public class DumpVenue implements Serializable {
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpVenue.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpVenue.class);
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
DumpVenue.class
|
DumpVenue.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/dump_datasource_parameters.json"));
|
"/eu/dnetlib/dhp/oa/graph/dump/dump_datasource_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
Boolean isSparkSessionManaged = Optional
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
final String inputPath = parser.get("sourcePath");
|
final String inputPath = parser.get("sourcePath");
|
||||||
log.info("inputPath: {}", inputPath);
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
final String workingDir = parser.get("workingDir");
|
final String workingDir = parser.get("workingDir");
|
||||||
log.info("workingDir: {}", workingDir);
|
log.info("workingDir: {}", workingDir);
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
conf,
|
conf,
|
||||||
isSparkSessionManaged,
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
Utils.removeOutputDir(spark, outputPath + "Venue");
|
Utils.removeOutputDir(spark, outputPath + "Venue");
|
||||||
|
|
||||||
mapDatasource(spark, inputPath, outputPath, workingDir);
|
mapVenue(spark, inputPath, outputPath, workingDir);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void mapDatasource(SparkSession spark, String inputPath, String outputPath, String workingDir) {
|
private static void mapVenue(SparkSession spark, String inputPath, String outputPath, String workingDir) {
|
||||||
Utils.readPath(spark, inputPath + "datasource", Datasource.class)
|
Dataset<EmitPerManifestation> manifestationDataset = Utils
|
||||||
.filter((FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible() && ! d.getDataInfo().getDeletedbyinference()
|
.readPath(spark, workingDir + "datasourcePublisher", EmitPerManifestation.class);
|
||||||
&& d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"))
|
Dataset<Datasource> datasourceDataset = Utils
|
||||||
.map((MapFunction<Datasource, eu.dnetlib.dhp.skgif.model.Venue>) d -> {
|
.readPath(spark, inputPath + "datasource", Datasource.class)
|
||||||
Venue venue = new Venue();
|
.filter(
|
||||||
if(Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
|
(FilterFunction<Datasource>) d -> !d.getDataInfo().getInvisible()
|
||||||
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()));
|
&& !d.getDataInfo().getDeletedbyinference()
|
||||||
else if(Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
|
&& d.getEoscdatasourcetype().getClassid().equalsIgnoreCase("Journal archive"));
|
||||||
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()));
|
datasourceDataset
|
||||||
venue.setIdentifiers(getVenueIdentifier(d.getJournal()));
|
.joinWith(
|
||||||
venue.setName(d.getOfficialname().getValue());
|
manifestationDataset, datasourceDataset.col("id").equalTo(manifestationDataset.col("hostedby.key")),
|
||||||
venue.setType(VenueType.JOURNAL.label);
|
"left")
|
||||||
//todo add map for publisher. Get from results?
|
.map((MapFunction<Tuple2<Datasource, EmitPerManifestation>, Venue>) t2 -> {
|
||||||
venue.setPublisher("find it from result");
|
Venue venue = new Venue();
|
||||||
venue.setAcronym(null);
|
Datasource d = t2._1();
|
||||||
venue.setSeries(null);
|
if (Optional.ofNullable(d.getJournal().getIssnPrinted()).isPresent())
|
||||||
venue.setIs_currently_full_oa(null);
|
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnPrinted()));
|
||||||
venue.setCreation_date(null);
|
else if (Optional.ofNullable(d.getJournal().getIssnOnline()).isPresent())
|
||||||
venue.setContributions(null);
|
venue.setLocal_identifier(Utils.getIdentifier(Prefixes.VENUE, d.getJournal().getIssnOnline()));
|
||||||
return venue;
|
venue.setIdentifiers(getVenueIdentifier(d.getJournal()));
|
||||||
}, Encoders.bean(Venue.class) )
|
venue.setName(d.getOfficialname().getValue());
|
||||||
.write()
|
venue.setType(VenueType.JOURNAL.label);
|
||||||
.mode(SaveMode.Overwrite)
|
if (Optional.ofNullable(t2._2()).isPresent())
|
||||||
.option("compression","gzip")
|
venue.setPublisher(t2._2().getPublisher());
|
||||||
.json(workingDir + "Venues");
|
venue.setAcronym(null);
|
||||||
|
venue.setSeries(null);
|
||||||
|
venue.setIs_currently_full_oa(null);
|
||||||
|
venue.setCreation_date(null);
|
||||||
|
venue.setContributions(null);
|
||||||
|
return venue;
|
||||||
|
}, Encoders.bean(Venue.class))
|
||||||
|
|
||||||
Utils.readPath(spark, workingDir + "Venues", Venue.class)
|
.write()
|
||||||
.groupByKey((MapFunction<Venue, String>)v -> v.getLocal_identifier() , Encoders.STRING())
|
.mode(SaveMode.Overwrite)
|
||||||
.mapGroups((MapGroupsFunction<String, Venue, Venue>) (k,v) -> v.next(), Encoders.bean(Venue.class) )
|
.option("compression", "gzip")
|
||||||
.write()
|
.json(workingDir + "Venues");
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression","gzip")
|
|
||||||
.json(outputPath + "Venues");
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Identifier> getVenueIdentifier(Journal journal) {
|
Utils
|
||||||
List<Identifier> identifiers = new ArrayList<>();
|
.readPath(spark, workingDir + "Venues", Venue.class)
|
||||||
if (Optional.ofNullable((journal.getIssnOnline())).isPresent())
|
.groupByKey((MapFunction<Venue, String>) v -> v.getLocal_identifier(), Encoders.STRING())
|
||||||
identifiers.add(Identifier.newInstance(VenueIdentifierType.EISSN.label, journal.getIssnOnline()));
|
.mapGroups((MapGroupsFunction<String, Venue, Venue>) (k, v) -> v.next(), Encoders.bean(Venue.class))
|
||||||
if(Optional.ofNullable(journal.getIssnPrinted()).isPresent())
|
.write()
|
||||||
identifiers.add(Identifier.newInstance(VenueIdentifierType.ISSN.label, journal.getIssnPrinted()));
|
.mode(SaveMode.Overwrite)
|
||||||
if (Optional.ofNullable(journal.getIssnLinking()).isPresent())
|
.option("compression", "gzip")
|
||||||
identifiers.add(Identifier.newInstance(VenueIdentifierType.LISSN.label, journal.getIssnLinking()));
|
.json(outputPath + "Venues");
|
||||||
return identifiers;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private static List<String> getResearchProductAccessPolicy(List<String> value) {
|
private static List<Identifier> getVenueIdentifier(Journal journal) {
|
||||||
|
List<Identifier> identifiers = new ArrayList<>();
|
||||||
|
if (Optional.ofNullable((journal.getIssnOnline())).isPresent())
|
||||||
|
identifiers.add(Identifier.newInstance(VenueIdentifierType.EISSN.label, journal.getIssnOnline()));
|
||||||
|
if (Optional.ofNullable(journal.getIssnPrinted()).isPresent())
|
||||||
|
identifiers.add(Identifier.newInstance(VenueIdentifierType.ISSN.label, journal.getIssnPrinted()));
|
||||||
|
if (Optional.ofNullable(journal.getIssnLinking()).isPresent())
|
||||||
|
identifiers.add(Identifier.newInstance(VenueIdentifierType.LISSN.label, journal.getIssnLinking()));
|
||||||
|
return identifiers;
|
||||||
|
}
|
||||||
|
|
||||||
return value.stream().map(v -> getResearchProductAccessPolicy(v)).filter(Objects::nonNull)
|
private static List<String> getResearchProductAccessPolicy(List<String> value) {
|
||||||
.map(v -> v.get(0)).distinct().collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
private static List<String> getResearchProductAccessPolicy(String value) {
|
|
||||||
// "databaseaccesstype if open => open access (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
|
||||||
//if restricted => restricted access (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
|
||||||
//if closed => metadata only access (https://vocabularies.coar-repositories.org/access_rights/c_14cb/) "
|
|
||||||
switch(value){
|
|
||||||
case "open"://(https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
|
||||||
return Arrays.asList("open access");
|
|
||||||
case "restricted"://(https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
|
||||||
return Arrays.asList("restricted access");
|
|
||||||
case "closed"://(https://vocabularies.coar-repositories.org/access_rights/c_14cb/)
|
|
||||||
return Arrays.asList("metadata only access");
|
|
||||||
default:
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<String> getEoscProductType(List<String> researchentitytypes) {
|
return value
|
||||||
|
.stream()
|
||||||
|
.map(v -> getResearchProductAccessPolicy(v))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(v -> v.get(0))
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
List<String> eoscProductType = new ArrayList<>();
|
private static List<String> getResearchProductAccessPolicy(String value) {
|
||||||
if(researchentitytypes != null) {
|
// "databaseaccesstype if open => open access (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
||||||
|
// if restricted => restricted access (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
||||||
|
// if closed => metadata only access (https://vocabularies.coar-repositories.org/access_rights/c_14cb/) "
|
||||||
|
switch (value) {
|
||||||
|
case "open":// (https://vocabularies.coar-repositories.org/access_rights/c_abf2/)
|
||||||
|
return Arrays.asList("open access");
|
||||||
|
case "restricted":// (https://vocabularies.coar-repositories.org/access_rights/c_16ec/)
|
||||||
|
return Arrays.asList("restricted access");
|
||||||
|
case "closed":// (https://vocabularies.coar-repositories.org/access_rights/c_14cb/)
|
||||||
|
return Arrays.asList("metadata only access");
|
||||||
|
default:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (researchentitytypes.contains("Software"))
|
private static List<String> getEoscProductType(List<String> researchentitytypes) {
|
||||||
eoscProductType.add("Research Software");
|
|
||||||
if (researchentitytypes.contains("Research Publications") || researchentitytypes.contains("Literature"))
|
List<String> eoscProductType = new ArrayList<>();
|
||||||
eoscProductType.add("Research Literature");
|
if (researchentitytypes != null) {
|
||||||
if (researchentitytypes.contains("Research Data"))
|
|
||||||
eoscProductType.add("Research Data");
|
if (researchentitytypes.contains("Software"))
|
||||||
if (researchentitytypes.contains("Organization") ||
|
eoscProductType.add("Research Software");
|
||||||
researchentitytypes.contains("Organizations") ||
|
if (researchentitytypes.contains("Research Publications") || researchentitytypes.contains("Literature"))
|
||||||
researchentitytypes.contains("Services") ||
|
eoscProductType.add("Research Literature");
|
||||||
researchentitytypes.contains("Projects"))
|
if (researchentitytypes.contains("Research Data"))
|
||||||
eoscProductType.add("Other research product");
|
eoscProductType.add("Research Data");
|
||||||
}
|
if (researchentitytypes.contains("Organization") ||
|
||||||
return eoscProductType;
|
researchentitytypes.contains("Organizations") ||
|
||||||
}
|
researchentitytypes.contains("Services") ||
|
||||||
|
researchentitytypes.contains("Projects"))
|
||||||
|
eoscProductType.add("Other research product");
|
||||||
|
}
|
||||||
|
return eoscProductType;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,8 +7,6 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
|
@ -23,10 +21,11 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
import eu.dnetlib.dhp.schema.common.EntityType;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||||
import eu.dnetlib.dhp.skgif.model.*;
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -42,7 +41,7 @@ public class EmitFromResults implements Serializable {
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
EmitFromResults.class
|
EmitFromResults.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/emit_biblio_parameters.json"));
|
"/eu/dnetlib/dhp/oa/graph/dump/emit_biblio_parameters.json"));
|
||||||
|
|
||||||
|
@ -91,17 +90,21 @@ public class EmitFromResults implements Serializable {
|
||||||
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
|
||||||
Utils
|
Utils
|
||||||
.readPath(spark, inputPath + e.name(), resultClazz)
|
.readPath(spark, inputPath + e.name(), resultClazz)
|
||||||
.filter((FilterFunction<R>) r -> Optional.of(r.getSubject()).isPresent())
|
.filter((FilterFunction<R>) r -> Optional.ofNullable(r.getSubject()).isPresent())
|
||||||
.flatMap(
|
.flatMap(
|
||||||
(FlatMapFunction<R, Topic>) r -> r
|
(FlatMapFunction<R, Topic>) r -> r
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") || s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
.filter(
|
||||||
|
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")
|
||||||
|
|| s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
||||||
.map(s -> {
|
.map(s -> {
|
||||||
Topic t = new Topic();
|
Topic t = new Topic();
|
||||||
t
|
t
|
||||||
.setLocal_identifier(
|
.setLocal_identifier(
|
||||||
Utils.getIdentifier(Prefixes.TOPIC ,s.getQualifier().getClassid() + s.getValue()));
|
Utils
|
||||||
|
.getIdentifier(
|
||||||
|
Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
||||||
t
|
t
|
||||||
.setIdentifiers(
|
.setIdentifiers(
|
||||||
Arrays
|
Arrays
|
||||||
|
@ -154,7 +157,8 @@ public class EmitFromResults implements Serializable {
|
||||||
p.setGiven_name(a.getName());
|
p.setGiven_name(a.getName());
|
||||||
String identifier = new String();
|
String identifier = new String();
|
||||||
if (Optional.ofNullable(a.getPid()).isPresent()) {
|
if (Optional.ofNullable(a.getPid()).isPresent()) {
|
||||||
Tuple2<String, Boolean> orcid = eu.dnetlib.dhp.oa.graph.dump.skgif.Utils.getOrcid(a.getPid());
|
Tuple2<String, Boolean> orcid = eu.dnetlib.dhp.oa.graph.dump.skgif.Utils
|
||||||
|
.getOrcid(a.getPid());
|
||||||
if (orcid != null) {
|
if (orcid != null) {
|
||||||
identifier = Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2());
|
identifier = Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2());
|
||||||
if (orcid._2())
|
if (orcid._2())
|
||||||
|
@ -164,12 +168,15 @@ public class EmitFromResults implements Serializable {
|
||||||
else
|
else
|
||||||
p
|
p
|
||||||
.setIdentifiers(
|
.setIdentifiers(
|
||||||
Arrays.asList(Identifier.newInstance("inferred_orcid", orcid._1())));
|
Arrays
|
||||||
|
.asList(Identifier.newInstance("inferred_orcid", orcid._1())));
|
||||||
} else {
|
} else {
|
||||||
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||||
identifier = Utils.getIdentifier(Prefixes.TEMPORARY_PERSON,r.getId() + a.getRank());
|
identifier = Utils
|
||||||
|
.getIdentifier(Prefixes.TEMPORARY_PERSON, r.getId() + a.getRank());
|
||||||
} else {
|
} else {
|
||||||
identifier = Utils.getIdentifier(Prefixes.TEMPORARY_PERSON,r.getId() + count);
|
identifier = Utils
|
||||||
|
.getIdentifier(Prefixes.TEMPORARY_PERSON, r.getId() + count);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -243,6 +250,32 @@ public class EmitFromResults implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Dataset<EmitPerManifestation> emitPerManifestationDataset = Utils
|
||||||
|
.readPath(
|
||||||
|
spark, workingDir + "software/manifestation", EmitPerManifestation.class)
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(
|
||||||
|
spark, workingDir + "dataset/manifestation", EmitPerManifestation.class))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(
|
||||||
|
spark, workingDir + "publication/manifestation", EmitPerManifestation.class))
|
||||||
|
.union(
|
||||||
|
Utils
|
||||||
|
.readPath(
|
||||||
|
spark, workingDir + "otherresearchproduct/manifestation", EmitPerManifestation.class));
|
||||||
|
|
||||||
|
emitPerManifestationDataset
|
||||||
|
.groupByKey((MapFunction<EmitPerManifestation, String>) p -> p.getHostedBy(), Encoders.STRING())
|
||||||
|
.mapGroups(
|
||||||
|
(MapGroupsFunction<String, EmitPerManifestation, EmitPerManifestation>) (k, v) -> v.next(),
|
||||||
|
Encoders.bean(EmitPerManifestation.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression", "gzip")
|
||||||
|
.json(workingDir + "/datasourcePublisher");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,10 +5,9 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoAllowedTypeException;
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoAllowedTypeException;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoTitleFoundException;
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.exception.NoTitleFoundException;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.skgif.model.*;
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -54,20 +53,21 @@ public class ResultMapper implements Serializable {
|
||||||
for (Author a : input.getAuthor()) {
|
for (Author a : input.getAuthor()) {
|
||||||
count += 1;
|
count += 1;
|
||||||
Contribution contribution = new Contribution();
|
Contribution contribution = new Contribution();
|
||||||
Tuple2<String, Boolean> orcid = Utils.getOrcid(a.getPid());
|
Tuple2<String, Boolean> orcid = Utils.getOrcid(a.getPid());
|
||||||
if (orcid != null) {
|
if (orcid != null) {
|
||||||
contribution.setPerson(Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2()));
|
contribution.setPerson(Utils.getIdentifier(Prefixes.PERSON, orcid._1() + orcid._2()));
|
||||||
|
} else {
|
||||||
|
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||||
|
contribution
|
||||||
|
.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + a.getRank()));
|
||||||
} else {
|
} else {
|
||||||
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
contribution.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
||||||
contribution.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON,input.getId() + a.getRank()));
|
}
|
||||||
} else {
|
|
||||||
contribution.setPerson(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON,input.getId() + count));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
if(Optional.ofNullable(a.getRank()).isPresent()){
|
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||||
contribution.setRank(a.getRank());
|
contribution.setRank(a.getRank());
|
||||||
}
|
}
|
||||||
|
|
||||||
contributionList.add(contribution);
|
contributionList.add(contribution);
|
||||||
}
|
}
|
||||||
|
@ -83,12 +83,15 @@ public class ResultMapper implements Serializable {
|
||||||
input
|
input
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") ||
|
.filter(
|
||||||
|
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") ||
|
||||||
s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
||||||
.map(s -> {
|
.map(s -> {
|
||||||
ResultTopic topic = new ResultTopic();
|
ResultTopic topic = new ResultTopic();
|
||||||
topic.setTopic(Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
topic
|
||||||
if(Optional.ofNullable(s.getDataInfo()).isPresent()){
|
.setTopic(
|
||||||
|
Utils.getIdentifier(Prefixes.TOPIC, s.getQualifier().getClassid() + s.getValue()));
|
||||||
|
if (Optional.ofNullable(s.getDataInfo()).isPresent()) {
|
||||||
Provenance provenance = new Provenance();
|
Provenance provenance = new Provenance();
|
||||||
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
|
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
|
||||||
provenance.setType(s.getDataInfo().getInferenceprovenance());
|
provenance.setType(s.getDataInfo().getInferenceprovenance());
|
||||||
|
@ -101,7 +104,6 @@ public class ResultMapper implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static <E extends Result> void mapType(ResearchProduct out, E input) throws NoAllowedTypeException {
|
private static <E extends Result> void mapType(ResearchProduct out, E input) throws NoAllowedTypeException {
|
||||||
switch (input.getResulttype().getClassid()) {
|
switch (input.getResulttype().getClassid()) {
|
||||||
case "publication":
|
case "publication":
|
||||||
|
@ -148,7 +150,7 @@ public class ResultMapper implements Serializable {
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (!iTitle.isEmpty()) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setTitles(Collections.singletonMap("none",Arrays.asList(iTitle.get(0).getValue())));
|
out.setTitles(Collections.singletonMap("none", Arrays.asList(iTitle.get(0).getValue())));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,7 +160,7 @@ public class ResultMapper implements Serializable {
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (!iTitle.isEmpty()) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setTitles(Collections.singletonMap("none",Arrays.asList(iTitle.get(0).getValue())));
|
out.setTitles(Collections.singletonMap("none", Arrays.asList(iTitle.get(0).getValue())));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -169,6 +171,6 @@ public class ResultMapper implements Serializable {
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(input.getDescription())
|
.ofNullable(input.getDescription())
|
||||||
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
.ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
|
||||||
out.setAbstracts(Collections.singletonMap("none",descriptionList));
|
out.setAbstracts(Collections.singletonMap("none", descriptionList));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,16 +5,18 @@ import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -32,11 +34,11 @@ public class Utils implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <R> Dataset<R> readPath(
|
public static <R> Dataset<R> readPath(
|
||||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
return spark
|
return spark
|
||||||
.read()
|
.read()
|
||||||
.textFile(inputPath)
|
.textFile(inputPath)
|
||||||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
|
public static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
|
||||||
|
@ -57,7 +59,7 @@ public class Utils implements Serializable {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getIdentifier(Prefixes entity, String id){
|
public static String getIdentifier(Prefixes entity, String id) {
|
||||||
return entity.label + DHPUtils.md5(id);
|
return entity.label + DHPUtils.md5(id);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveMetastoreUris</name>
|
||||||
|
<value>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveJdbcUrl</name>
|
||||||
|
<value>jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hiveDbName</name>
|
||||||
|
<value>openaire</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,216 @@
|
||||||
|
<workflow-app name="dump_graph" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>sourcePath</name>
|
||||||
|
<description>the source path</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>outputPath</name>
|
||||||
|
<description>the output path</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
<global>
|
||||||
|
<job-tracker>${jobTracker}</job-tracker>
|
||||||
|
<name-node>${nameNode}</name-node>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.queuename</name>
|
||||||
|
<value>${queueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapred.job.queue.name</name>
|
||||||
|
<value>${oozieLauncherQueueName}</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>${oozieActionShareLibForSpark2}</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
</global>
|
||||||
|
<start to="emit_from_result"/>
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
<action name="emit_from_result">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Extraction</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromResults</class>
|
||||||
|
<jar>dump-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--workingDir</arg><arg>${workingDir}/</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="dump_result"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="dump_result">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.DumpResult</class>
|
||||||
|
<jar>dump-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--workingDir</arg><arg>${workingDir}/</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="dump_datasource"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="dump_datasource">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.DumpDatasource</class>
|
||||||
|
<jar>dump-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--workingDir</arg><arg>${workingDir}/</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="dump_venue"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="dump_venue">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.DumpVenue</class>
|
||||||
|
<jar>dump-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--workingDir</arg><arg>${workingDir}/</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="dump_organization"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="dump_organization">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.DumpOrganization</class>
|
||||||
|
<jar>dump-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--workingDir</arg><arg>${workingDir}/</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="dump_grant"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
<action name="dump_grant">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Dump table project </name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.dump.skgif.DumpGrant</class>
|
||||||
|
<jar>dump-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory=${sparkExecutorMemory}
|
||||||
|
--executor-cores=${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${sourcePath}</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${outputPath}</arg>
|
||||||
|
<arg>--workingDir</arg><arg>${workingDir}/</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="End"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -7,7 +7,6 @@ import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromResultJobTest;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.*;
|
import org.apache.hadoop.fs.*;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
@ -17,10 +16,10 @@ import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.EmitFromResultJobTest;
|
||||||
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
|
import eu.dnetlib.dhp.oa.zenodoapi.MissingConceptDoiException;
|
||||||
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
|
import eu.dnetlib.dhp.oa.zenodoapi.ZenodoAPIClient;
|
||||||
|
|
||||||
|
|
||||||
@Disabled
|
@Disabled
|
||||||
public class ZenodoUploadTest {
|
public class ZenodoUploadTest {
|
||||||
|
|
||||||
|
@ -162,8 +161,6 @@ public class ZenodoUploadTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void depositBigFile() throws MissingConceptDoiException, IOException {
|
void depositBigFile() throws MissingConceptDoiException, IOException {
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.dhp.skgif.model.Datasource;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.skgif.model.Organization;
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -18,76 +21,76 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import java.io.Serializable;
|
|
||||||
import java.nio.file.Files;
|
import eu.dnetlib.dhp.skgif.model.Datasource;
|
||||||
import java.nio.file.Path;
|
import eu.dnetlib.dhp.skgif.model.Organization;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 22/02/24
|
* @Date 22/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpDatasourceTest implements Serializable {
|
public class DumpDatasourceTest implements Serializable {
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpDatasourceTest.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpDatasourceTest.class);
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files.createTempDirectory(DumpDatasourceTest.class.getSimpleName());
|
workingDir = Files.createTempDirectory(DumpDatasourceTest.class.getSimpleName());
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(DumpDatasourceTest.class.getSimpleName());
|
conf.setAppName(DumpDatasourceTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
conf.set("hive.metastore.local", "true");
|
conf.set("hive.metastore.local", "true");
|
||||||
conf.set("spark.ui.enabled", "false");
|
conf.set("spark.ui.enabled", "false");
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(DumpDatasourceTest.class.getSimpleName())
|
.appName(DumpDatasourceTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void afterAll() throws IOException {
|
public static void afterAll() throws IOException {
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
spark.stop();
|
spark.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDumpDatasource() throws Exception {
|
public void testDumpDatasource() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
|
DumpDatasource
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-outputPath", workingDir.toString() + "/"
|
||||||
|
|
||||||
DumpDatasource.main(
|
});
|
||||||
new String[] {
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"-sourcePath", sourcePath,
|
|
||||||
"-outputPath", workingDir.toString() + "/"
|
|
||||||
|
|
||||||
});
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
JavaRDD<Datasource> datasource = sc
|
||||||
|
.textFile(workingDir.toString() + "/Datasource")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
|
||||||
|
|
||||||
JavaRDD<Datasource> datasource = sc
|
Dataset<Datasource> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Datasource.class));
|
||||||
.textFile(workingDir.toString() + "/Datasource")
|
Assertions.assertEquals(5, datasourceDataset.count());
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
|
datasourceDataset.show(false);
|
||||||
|
|
||||||
Dataset<Datasource> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Datasource.class));
|
|
||||||
Assertions.assertEquals(5,datasourceDataset.count());
|
|
||||||
datasourceDataset.show(false);
|
|
||||||
// Assertions.assertEquals(7, relationDataset.count());
|
// Assertions.assertEquals(7, relationDataset.count());
|
||||||
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
||||||
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
||||||
|
@ -115,32 +118,33 @@ Assertions.assertEquals(5,datasourceDataset.count());
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDumpDatasourceComplete() throws Exception {
|
public void testDumpDatasourceComplete() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
|
DumpDatasource
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-outputPath", workingDir.toString() + "/"
|
||||||
|
|
||||||
DumpDatasource.main(
|
});
|
||||||
new String[] {
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"-sourcePath", sourcePath,
|
|
||||||
"-outputPath", workingDir.toString() + "/"
|
|
||||||
|
|
||||||
});
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
JavaRDD<Datasource> datasource = sc
|
||||||
|
.textFile(workingDir.toString() + "/Datasource")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
|
||||||
|
|
||||||
JavaRDD<Datasource> datasource = sc
|
Dataset<Datasource> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Datasource.class));
|
||||||
.textFile(workingDir.toString() + "/Datasource")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Datasource.class));
|
|
||||||
|
|
||||||
Dataset<Datasource> datasourceDataset = spark.createDataset(datasource.rdd(), Encoders.bean(Datasource.class));
|
datasourceDataset
|
||||||
|
.foreach((ForeachFunction<Datasource>) d -> System.out.println(OBJECT_MAPPER.writeValueAsString(d)));
|
||||||
datasourceDataset.foreach((ForeachFunction<Datasource>) d -> System.out.println(OBJECT_MAPPER.writeValueAsString(d)));
|
|
||||||
// Assertions.assertEquals(7, relationDataset.count());
|
// Assertions.assertEquals(7, relationDataset.count());
|
||||||
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
||||||
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
||||||
|
@ -168,5 +172,5 @@ Assertions.assertEquals(5,datasourceDataset.count());
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.dhp.skgif.model.Datasource;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.skgif.model.Grant;
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -17,76 +20,76 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import java.io.Serializable;
|
|
||||||
import java.nio.file.Files;
|
import eu.dnetlib.dhp.skgif.model.Datasource;
|
||||||
import java.nio.file.Path;
|
import eu.dnetlib.dhp.skgif.model.Grant;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 22/02/24
|
* @Date 22/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpGrantTest implements Serializable {
|
public class DumpGrantTest implements Serializable {
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpGrantTest.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpGrantTest.class);
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files.createTempDirectory(DumpGrantTest.class.getSimpleName());
|
workingDir = Files.createTempDirectory(DumpGrantTest.class.getSimpleName());
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(DumpGrantTest.class.getSimpleName());
|
conf.setAppName(DumpGrantTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
conf.set("hive.metastore.local", "true");
|
conf.set("hive.metastore.local", "true");
|
||||||
conf.set("spark.ui.enabled", "false");
|
conf.set("spark.ui.enabled", "false");
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(DumpGrantTest.class.getSimpleName())
|
.appName(DumpGrantTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void afterAll() throws IOException {
|
public static void afterAll() throws IOException {
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
spark.stop();
|
spark.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDumpGrant() throws Exception {
|
public void testDumpGrant() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
|
DumpGrant
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-outputPath", workingDir.toString() + "/"
|
||||||
|
|
||||||
DumpGrant.main(
|
});
|
||||||
new String[] {
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"-sourcePath", sourcePath,
|
|
||||||
"-outputPath", workingDir.toString() + "/"
|
|
||||||
|
|
||||||
});
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
JavaRDD<Grant> grant = sc
|
||||||
|
.textFile(workingDir.toString() + "/Grant")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Grant.class));
|
||||||
|
|
||||||
JavaRDD<Grant> grant = sc
|
Dataset<Grant> grantDataset = spark.createDataset(grant.rdd(), Encoders.bean(Grant.class));
|
||||||
.textFile(workingDir.toString() + "/Grant")
|
Assertions.assertEquals(12, grantDataset.count());
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Grant.class));
|
grantDataset.show(false);
|
||||||
|
|
||||||
Dataset<Grant> grantDataset = spark.createDataset(grant.rdd(), Encoders.bean(Grant.class));
|
|
||||||
Assertions.assertEquals(12,grantDataset.count());
|
|
||||||
grantDataset.show(false);
|
|
||||||
// Assertions.assertEquals(7, relationDataset.count());
|
// Assertions.assertEquals(7, relationDataset.count());
|
||||||
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
||||||
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
||||||
|
@ -114,5 +117,5 @@ Assertions.assertEquals(12,grantDataset.count());
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
import java.io.Serializable;
|
||||||
import eu.dnetlib.dhp.skgif.model.Organization;
|
import java.nio.file.Files;
|
||||||
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
import java.nio.file.Path;
|
||||||
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -20,77 +21,79 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import java.io.Serializable;
|
|
||||||
import java.nio.file.Files;
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
||||||
import java.nio.file.Path;
|
import eu.dnetlib.dhp.skgif.model.Organization;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.Prefixes;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 22/02/24
|
* @Date 22/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpOrganizationTest implements Serializable {
|
public class DumpOrganizationTest implements Serializable {
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpOrganizationTest.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpOrganizationTest.class);
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files.createTempDirectory(DumpOrganizationTest.class.getSimpleName());
|
workingDir = Files.createTempDirectory(DumpOrganizationTest.class.getSimpleName());
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(DumpOrganizationTest.class.getSimpleName());
|
conf.setAppName(DumpOrganizationTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
conf.set("hive.metastore.local", "true");
|
conf.set("hive.metastore.local", "true");
|
||||||
conf.set("spark.ui.enabled", "false");
|
conf.set("spark.ui.enabled", "false");
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(DumpOrganizationTest.class.getSimpleName())
|
.appName(DumpOrganizationTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void afterAll() throws IOException {
|
public static void afterAll() throws IOException {
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
spark.stop();
|
spark.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDumpOrganization() throws Exception {
|
public void testDumpOrganization() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
|
DumpOrganization
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-outputPath", workingDir.toString() + "/"
|
||||||
|
|
||||||
DumpOrganization
|
});
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"-sourcePath", sourcePath,
|
|
||||||
"-outputPath", workingDir.toString() + "/"
|
|
||||||
|
|
||||||
});
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
JavaRDD<Organization> organization = sc
|
||||||
|
.textFile(workingDir.toString() + "/Organization")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
|
||||||
|
|
||||||
JavaRDD<Organization> organization = sc
|
Dataset<Organization> organizationDataset = spark
|
||||||
.textFile(workingDir.toString() + "/Organization")
|
.createDataset(organization.rdd(), Encoders.bean(Organization.class));
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
|
Assertions.assertEquals(34 - 19, organizationDataset.count());
|
||||||
|
organizationDataset.show(false);
|
||||||
Dataset<Organization> organizationDataset = spark.createDataset(organization.rdd(), Encoders.bean(Organization.class));
|
|
||||||
Assertions.assertEquals(34-19,organizationDataset.count());
|
|
||||||
organizationDataset.show(false);
|
|
||||||
// Assertions.assertEquals(7, relationDataset.count());
|
// Assertions.assertEquals(7, relationDataset.count());
|
||||||
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
// RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
||||||
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
// Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
||||||
|
@ -118,5 +121,5 @@ Assertions.assertEquals(34-19,organizationDataset.count());
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
package eu.dnetlib.dhp.oa.graph.dump.skgif;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.IOException;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
import java.nio.file.Files;
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
import java.nio.file.Path;
|
||||||
import eu.dnetlib.dhp.skgif.model.*;
|
import java.util.stream.Collectors;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
|
import javax.validation.constraints.AssertTrue;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -21,261 +24,456 @@ import org.junit.jupiter.api.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import javax.validation.constraints.AssertTrue;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
||||||
import java.nio.file.Path;
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.RelationPerProduct;
|
||||||
import java.util.stream.Collectors;
|
import eu.dnetlib.dhp.skgif.model.*;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author miriam.baglioni
|
* @author miriam.baglioni
|
||||||
* @Date 20/02/24
|
* @Date 20/02/24
|
||||||
*/
|
*/
|
||||||
public class DumpResultTest {
|
public class DumpResultTest {
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static Path workingDir;
|
private static Path workingDir;
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DumpResultTest.class);
|
private static final Logger log = LoggerFactory.getLogger(DumpResultTest.class);
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
workingDir = Files.createTempDirectory(DumpResultTest.class.getSimpleName());
|
workingDir = Files.createTempDirectory(DumpResultTest.class.getSimpleName());
|
||||||
log.info("using work dir {}", workingDir);
|
log.info("using work dir {}", workingDir);
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(DumpResultTest.class.getSimpleName());
|
conf.setAppName(DumpResultTest.class.getSimpleName());
|
||||||
|
|
||||||
conf.setMaster("local[*]");
|
conf.setMaster("local[*]");
|
||||||
conf.set("spark.driver.host", "localhost");
|
conf.set("spark.driver.host", "localhost");
|
||||||
conf.set("hive.metastore.local", "true");
|
conf.set("hive.metastore.local", "true");
|
||||||
conf.set("spark.ui.enabled", "false");
|
conf.set("spark.ui.enabled", "false");
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.toString());
|
||||||
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(DumpResultTest.class.getSimpleName())
|
.appName(DumpResultTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterAll
|
@AfterAll
|
||||||
public static void afterAll() throws IOException {
|
public static void afterAll() throws IOException {
|
||||||
FileUtils.deleteDirectory(workingDir.toFile());
|
FileUtils.deleteDirectory(workingDir.toFile());
|
||||||
spark.stop();
|
spark.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEmitFromResult() throws Exception {
|
public void testEmitFromResult() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
final String workingDir = getClass()
|
final String workingDir = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDir/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDir/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
DumpResult
|
DumpResult
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-workingDir", workingDir
|
"-workingDir", workingDir
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<RelationPerProduct> relation = sc
|
JavaRDD<RelationPerProduct> relation = sc
|
||||||
.textFile(workingDir + "/aggrelation")
|
.textFile(workingDir + "/aggrelation")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, RelationPerProduct.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, RelationPerProduct.class));
|
||||||
|
|
||||||
Dataset<RelationPerProduct> relationDataset = spark.createDataset(relation.rdd(), Encoders.bean(RelationPerProduct.class));
|
Dataset<RelationPerProduct> relationDataset = spark
|
||||||
|
.createDataset(relation.rdd(), Encoders.bean(RelationPerProduct.class));
|
||||||
|
|
||||||
relationDataset.show(false);
|
relationDataset.show(false);
|
||||||
Assertions.assertEquals(7, relationDataset.count());
|
Assertions.assertEquals(7, relationDataset.count());
|
||||||
RelationPerProduct temp = relationDataset.filter((FilterFunction<RelationPerProduct>) r -> r.getResultId().equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).first();
|
RelationPerProduct temp = relationDataset
|
||||||
Assertions.assertEquals(3, temp.getFunding().size()+temp.getRelatedProduct().size()+temp.getOrganizations().size());
|
.filter(
|
||||||
Assertions.assertEquals(1, temp.getFunding().size());
|
(FilterFunction<RelationPerProduct>) r -> r
|
||||||
Assertions.assertEquals(2, temp.getRelatedProduct().size());
|
.getResultId()
|
||||||
Assertions.assertEquals(1, temp.getRelatedProduct().stream().filter(rp -> rp.getRelation_type().equalsIgnoreCase("issupplementedby")).count());
|
.equalsIgnoreCase("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"))
|
||||||
Assertions.assertEquals(1, temp.getRelatedProduct().stream().filter(rp -> rp.getRelation_type().equalsIgnoreCase("isdocumentedby")).count());
|
.first();
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
3, temp.getFunding().size() + temp.getRelatedProduct().size() + temp.getOrganizations().size());
|
||||||
|
Assertions.assertEquals(1, temp.getFunding().size());
|
||||||
|
Assertions.assertEquals(2, temp.getRelatedProduct().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
temp
|
||||||
|
.getRelatedProduct()
|
||||||
|
.stream()
|
||||||
|
.filter(rp -> rp.getRelation_type().equalsIgnoreCase("issupplementedby"))
|
||||||
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
temp
|
||||||
|
.getRelatedProduct()
|
||||||
|
.stream()
|
||||||
|
.filter(rp -> rp.getRelation_type().equalsIgnoreCase("isdocumentedby"))
|
||||||
|
.count());
|
||||||
|
|
||||||
JavaRDD<ResearchProduct> researchProduct = sc
|
JavaRDD<ResearchProduct> researchProduct = sc
|
||||||
.textFile(workingDir.toString() + "/publication/researchproduct")
|
.textFile(workingDir.toString() + "/publication/researchproduct")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, ResearchProduct.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, ResearchProduct.class));
|
||||||
|
|
||||||
org.apache.spark.sql.Dataset<ResearchProduct> researchProductDataset = spark
|
org.apache.spark.sql.Dataset<ResearchProduct> researchProductDataset = spark
|
||||||
.createDataset(researchProduct.rdd(), Encoders.bean(ResearchProduct.class));
|
.createDataset(researchProduct.rdd(), Encoders.bean(ResearchProduct.class));
|
||||||
|
|
||||||
Assertions.assertEquals(1, researchProductDataset.filter((FilterFunction<ResearchProduct>) p -> p.getLocal_identifier().equalsIgnoreCase(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, "50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))).count());
|
Assertions
|
||||||
ResearchProduct product = researchProductDataset.filter((FilterFunction<ResearchProduct>) p -> p.getLocal_identifier().equalsIgnoreCase(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, "50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))).first();
|
.assertEquals(
|
||||||
Assertions.assertEquals(2, product.getRelevant_organizations().size());
|
1,
|
||||||
Assertions.assertEquals(1, product.getFunding().size());
|
researchProductDataset
|
||||||
Assertions.assertEquals(0, product.getRelated_products().size());
|
.filter(
|
||||||
Assertions.assertEquals(1, product.getContributions().size());
|
(FilterFunction<ResearchProduct>) p -> p
|
||||||
Assertions.assertEquals(2, product.getManifestations().size());
|
.getLocal_identifier()
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
Utils
|
||||||
|
.getIdentifier(
|
||||||
|
Prefixes.RESEARCH_PRODUCT,
|
||||||
|
"50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")))
|
||||||
|
.count());
|
||||||
|
ResearchProduct product = researchProductDataset
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<ResearchProduct>) p -> p
|
||||||
|
.getLocal_identifier()
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
Utils
|
||||||
|
.getIdentifier(
|
||||||
|
Prefixes.RESEARCH_PRODUCT, "50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")))
|
||||||
|
.first();
|
||||||
|
Assertions.assertEquals(2, product.getRelevant_organizations().size());
|
||||||
|
Assertions.assertEquals(1, product.getFunding().size());
|
||||||
|
Assertions.assertEquals(0, product.getRelated_products().size());
|
||||||
|
Assertions.assertEquals(1, product.getContributions().size());
|
||||||
|
Assertions.assertEquals(2, product.getManifestations().size());
|
||||||
|
|
||||||
researchProductDataset.show(false);
|
researchProductDataset.show(false);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmitFromDedupedResult() throws Exception {
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
}
|
final String workingDir = getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDir_complete_entities/")
|
||||||
|
.getPath();
|
||||||
|
|
||||||
@Test
|
DumpResult
|
||||||
public void testEmitFromDedupedResult() throws Exception {
|
.main(
|
||||||
final String sourcePath = getClass()
|
new String[] {
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
.getPath();
|
"-sourcePath", sourcePath,
|
||||||
|
"-workingDir", workingDir,
|
||||||
|
"-outputPath", workingDir
|
||||||
|
|
||||||
final String workingDir = getClass()
|
});
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/workingDir_complete_entities/")
|
|
||||||
.getPath();
|
|
||||||
|
|
||||||
DumpResult
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
.main(
|
|
||||||
new String[] {
|
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
|
||||||
"-sourcePath", sourcePath,
|
|
||||||
"-workingDir", workingDir,
|
|
||||||
"-outputPath", workingDir
|
|
||||||
|
|
||||||
});
|
JavaRDD<ResearchProduct> researchProduct = sc
|
||||||
|
.textFile(workingDir.toString() + "ResearchProduct")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, ResearchProduct.class));
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
org.apache.spark.sql.Dataset<ResearchProduct> researchProductDataset = spark
|
||||||
|
.createDataset(researchProduct.rdd(), Encoders.bean(ResearchProduct.class));
|
||||||
|
|
||||||
JavaRDD<ResearchProduct> researchProduct = sc
|
Assertions.assertEquals(1, researchProductDataset.count());
|
||||||
.textFile(workingDir.toString() + "ResearchProduct")
|
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, ResearchProduct.class));
|
|
||||||
|
|
||||||
org.apache.spark.sql.Dataset<ResearchProduct> researchProductDataset = spark
|
ResearchProduct rp = researchProductDataset.first();
|
||||||
.createDataset(researchProduct.rdd(), Encoders.bean(ResearchProduct.class));
|
|
||||||
|
|
||||||
Assertions.assertEquals(1, researchProductDataset.count());
|
// check the local identifier
|
||||||
|
Assertions.assertEquals("product_____::e22a152ab43b9215d14ece613f76ec84", rp.getLocal_identifier());
|
||||||
|
|
||||||
ResearchProduct rp = researchProductDataset.first();
|
// check the pids of the result
|
||||||
|
Assertions.assertEquals(3, rp.getIdentifiers().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("doi")).count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"10.1007/s40199-021-00403-x",
|
||||||
|
rp
|
||||||
|
.getIdentifiers()
|
||||||
|
.stream()
|
||||||
|
.filter(p -> p.getScheme().equalsIgnoreCase("doi"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("pmid")).count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"34327650",
|
||||||
|
rp
|
||||||
|
.getIdentifiers()
|
||||||
|
.stream()
|
||||||
|
.filter(p -> p.getScheme().equalsIgnoreCase("pmid"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(1, rp.getIdentifiers().stream().filter(p -> p.getScheme().equalsIgnoreCase("pmc")).count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
"PMC8602609",
|
||||||
|
rp
|
||||||
|
.getIdentifiers()
|
||||||
|
.stream()
|
||||||
|
.filter(p -> p.getScheme().equalsIgnoreCase("pmc"))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getValue());
|
||||||
|
|
||||||
//check the local identifier
|
// check the title
|
||||||
Assertions.assertEquals("product_____::e22a152ab43b9215d14ece613f76ec84", rp.getLocal_identifier());
|
Assertions.assertEquals(1, rp.getTitles().keySet().size());
|
||||||
|
Assertions.assertTrue(rp.getTitles().keySet().contains("none"));
|
||||||
|
Assertions.assertEquals(1, rp.getTitles().get("none").size());
|
||||||
|
|
||||||
//check the pids of the result
|
// check abstract
|
||||||
Assertions.assertEquals(3,rp.getIdentifiers().size());
|
Assertions.assertEquals(1, rp.getAbstracts().keySet().size());
|
||||||
Assertions.assertEquals(1, rp.getIdentifiers().stream().filter(p->p.getScheme().equalsIgnoreCase("doi")).count());
|
Assertions.assertTrue(rp.getAbstracts().keySet().contains("none"));
|
||||||
Assertions.assertEquals("10.1007/s40199-021-00403-x", rp.getIdentifiers().stream().filter(p->p.getScheme().equalsIgnoreCase("doi")).collect(Collectors.toList()).get(0).getValue());
|
Assertions.assertEquals(1, rp.getAbstracts().get("none").size());
|
||||||
Assertions.assertEquals(1, rp.getIdentifiers().stream().filter(p->p.getScheme().equalsIgnoreCase("pmid")).count());
|
|
||||||
Assertions.assertEquals("34327650", rp.getIdentifiers().stream().filter(p->p.getScheme().equalsIgnoreCase("pmid")).collect(Collectors.toList()).get(0).getValue());
|
|
||||||
Assertions.assertEquals(1, rp.getIdentifiers().stream().filter(p->p.getScheme().equalsIgnoreCase("pmc")).count());
|
|
||||||
Assertions.assertEquals("PMC8602609", rp.getIdentifiers().stream().filter(p->p.getScheme().equalsIgnoreCase("pmc")).collect(Collectors.toList()).get(0).getValue());
|
|
||||||
|
|
||||||
//check the title
|
// check type
|
||||||
Assertions.assertEquals(1, rp.getTitles().keySet().size());
|
Assertions.assertEquals("literature", rp.getProduct_type());
|
||||||
Assertions.assertTrue(rp.getTitles().keySet().contains("none"));
|
|
||||||
Assertions.assertEquals(1, rp.getTitles().get("none").size());
|
|
||||||
|
|
||||||
//check abstract
|
// check topics
|
||||||
Assertions.assertEquals(1, rp.getAbstracts().keySet().size());
|
Assertions.assertEquals(3, rp.getTopics().size());
|
||||||
Assertions.assertTrue(rp.getAbstracts().keySet().contains("none"));
|
Assertions
|
||||||
Assertions.assertEquals(1, rp.getAbstracts().get("none").size());
|
.assertTrue(
|
||||||
|
rp
|
||||||
|
.getTopics()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
t -> t
|
||||||
|
.getTopic()
|
||||||
|
.equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
|
||||||
|
|
||||||
//check type
|
// check contributions
|
||||||
Assertions.assertEquals("literature", rp.getProduct_type());
|
Assertions.assertEquals(4, rp.getContributions().size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("person")).count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("temp")).count());
|
||||||
|
rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation() == null));
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
rp
|
||||||
|
.getContributions()
|
||||||
|
.stream()
|
||||||
|
.filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-8284-6269true")))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getRank());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
rp
|
||||||
|
.getContributions()
|
||||||
|
.stream()
|
||||||
|
.filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0002-0940-893xtrue")))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getRank());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
3,
|
||||||
|
rp
|
||||||
|
.getContributions()
|
||||||
|
.stream()
|
||||||
|
.filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-5291-577xtrue")))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getRank());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
4,
|
||||||
|
rp
|
||||||
|
.getContributions()
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
c -> c
|
||||||
|
.getPerson()
|
||||||
|
.equals(
|
||||||
|
Utils
|
||||||
|
.getIdentifier(
|
||||||
|
Prefixes.TEMPORARY_PERSON,
|
||||||
|
"50|doi_dedup___::0000661be7c602727bae9690778b16514")))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getRank());
|
||||||
|
researchProductDataset.show(10, 100, true);
|
||||||
|
|
||||||
//check topics
|
// check manifestation 1
|
||||||
Assertions.assertEquals(3, rp.getTopics().size());
|
Assertions.assertEquals(3, rp.getManifestations().size());
|
||||||
Assertions.assertTrue(rp.getTopics().stream().anyMatch(t -> t.getTopic().equalsIgnoreCase(Prefixes.TOPIC.label + DHPUtils.md5("FOSSustained delivery"))));
|
Manifestation manifestation = rp
|
||||||
|
.getManifestations()
|
||||||
|
.stream()
|
||||||
|
.filter(
|
||||||
|
m -> m
|
||||||
|
.getHosting_datasource()
|
||||||
|
.equals(
|
||||||
|
Utils.getIdentifier(Prefixes.DATASOURCE, "10|doajarticles::6107489403b31fc7cf37cb7fda35f7f1")))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0);
|
||||||
|
Assertions.assertEquals("Article", manifestation.getProduct_local_type());
|
||||||
|
Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
||||||
|
Assertions.assertEquals(1, manifestation.getDates().size());
|
||||||
|
Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
|
||||||
|
Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
||||||
|
Assertions.assertEquals(PeerReview.PEER_REVIEWED.label, manifestation.getPeer_review());
|
||||||
|
Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
||||||
|
Assertions.assertEquals(AccessRight.CLOSED.label, manifestation.getAccess_right());
|
||||||
|
Assertions.assertEquals("Springer Nature TDM", manifestation.getLicence());
|
||||||
|
Assertions.assertEquals("https://doi.org/10.1007/s40199-021-00403-x", manifestation.getUrl());
|
||||||
|
Assertions.assertEquals("10.1007/s40199-021-00403-x", manifestation.getPid());
|
||||||
|
Assertions.assertTrue(manifestation.getBiblio() != null);
|
||||||
|
Biblio biblio = manifestation.getBiblio();
|
||||||
|
Assertions.assertTrue(biblio.getEdition() == null);
|
||||||
|
Assertions.assertTrue(biblio.getIssue() == null);
|
||||||
|
Assertions.assertEquals("Springer Science and Business Media LLC", biblio.getPublisher());
|
||||||
|
Assertions.assertEquals("29", biblio.getVolume());
|
||||||
|
Assertions.assertEquals("415", biblio.getStart_page());
|
||||||
|
Assertions.assertEquals("438", biblio.getEnd_page());
|
||||||
|
|
||||||
//check contributions
|
// check manifestation 2
|
||||||
Assertions.assertEquals(4, rp.getContributions().size());
|
manifestation = rp
|
||||||
Assertions.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("person")).count());
|
.getManifestations()
|
||||||
Assertions.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().startsWith("temp")).count());
|
.stream()
|
||||||
rp.getContributions().forEach(c -> Assertions.assertTrue(c.getDeclared_affiliation()==null));
|
.filter(
|
||||||
Assertions.assertEquals(1, rp.getContributions().stream().filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-8284-6269true")))
|
m -> m
|
||||||
.collect(Collectors.toList()).get(0).getRank());
|
.getHosting_datasource()
|
||||||
Assertions.assertEquals(2, rp.getContributions().stream().filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0002-0940-893xtrue")))
|
.equals(
|
||||||
.collect(Collectors.toList()).get(0).getRank());
|
Utils.getIdentifier(Prefixes.DATASOURCE, "10|openaire____::55045bd2a65019fd8e6741a755395c8c")))
|
||||||
Assertions.assertEquals(3, rp.getContributions().stream().filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.PERSON, "0000-0001-5291-577xtrue")))
|
.collect(Collectors.toList())
|
||||||
.collect(Collectors.toList()).get(0).getRank());
|
.get(0);
|
||||||
Assertions.assertEquals(4, rp.getContributions().stream().filter(c -> c.getPerson().equals(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, "50|doi_dedup___::0000661be7c602727bae9690778b16514")))
|
Assertions.assertEquals("Article", manifestation.getProduct_local_type());
|
||||||
.collect(Collectors.toList()).get(0).getRank());
|
Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
||||||
researchProductDataset.show(10,100,true);
|
Assertions.assertEquals(1, manifestation.getDates().size());
|
||||||
|
Assertions.assertEquals("2020-01-03", manifestation.getDates().get(0).getValue());
|
||||||
|
Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
||||||
|
Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
|
||||||
|
Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
||||||
|
Assertions.assertEquals(AccessRight.UNAVAILABLE.label, manifestation.getAccess_right());
|
||||||
|
Assertions.assertEquals(null, manifestation.getLicence());
|
||||||
|
Assertions.assertEquals("https://pubmed.ncbi.nlm.nih.gov/34327650", manifestation.getUrl());
|
||||||
|
Assertions.assertEquals("34327650", manifestation.getPid());
|
||||||
|
Assertions.assertTrue(manifestation.getBiblio() == null);
|
||||||
|
|
||||||
//check manifestation 1
|
// check manifestation 3
|
||||||
Assertions.assertEquals(3, rp.getManifestations().size());
|
manifestation = rp
|
||||||
Manifestation manifestation = rp.getManifestations().stream().filter(m -> m.getHosting_datasource().equals(Utils.getIdentifier(Prefixes.DATASOURCE , "10|doajarticles::6107489403b31fc7cf37cb7fda35f7f1")))
|
.getManifestations()
|
||||||
.collect(Collectors.toList()).get(0);
|
.stream()
|
||||||
Assertions.assertEquals("Article" , manifestation.getProduct_local_type());
|
.filter(
|
||||||
Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
m -> m
|
||||||
Assertions.assertEquals(1, manifestation.getDates().size());
|
.getHosting_datasource()
|
||||||
Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
|
.equals(
|
||||||
Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
Utils.getIdentifier(Prefixes.DATASOURCE, "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c")))
|
||||||
Assertions.assertEquals(PeerReview.PEER_REVIEWED.label, manifestation.getPeer_review());
|
.collect(Collectors.toList())
|
||||||
Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
.get(0);
|
||||||
Assertions.assertEquals(AccessRight.CLOSED.label, manifestation.getAccess_right());
|
Assertions.assertEquals("Other literature type", manifestation.getProduct_local_type());
|
||||||
Assertions.assertEquals("Springer Nature TDM", manifestation.getLicence());
|
Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
||||||
Assertions.assertEquals("https://doi.org/10.1007/s40199-021-00403-x", manifestation.getUrl());
|
Assertions.assertEquals(1, manifestation.getDates().size());
|
||||||
Assertions.assertEquals("10.1007/s40199-021-00403-x", manifestation.getPid());
|
Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
|
||||||
Assertions.assertTrue(manifestation.getBiblio() != null);
|
Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
||||||
Biblio biblio = manifestation.getBiblio();
|
Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
|
||||||
Assertions.assertTrue(biblio.getEdition() == null);
|
Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
||||||
Assertions.assertTrue(biblio.getIssue() == null);
|
Assertions.assertEquals(AccessRight.OPEN.label, manifestation.getAccess_right());
|
||||||
Assertions.assertEquals("Springer Science and Business Media LLC",biblio.getPublisher() );
|
Assertions.assertEquals(null, manifestation.getLicence());
|
||||||
Assertions.assertEquals("29", biblio.getVolume());
|
Assertions.assertEquals("https://europepmc.org/articles/PMC8602609/", manifestation.getUrl());
|
||||||
Assertions.assertEquals("415", biblio.getStart_page());
|
Assertions.assertEquals("PMC8602609", manifestation.getPid());
|
||||||
Assertions.assertEquals("438", biblio.getEnd_page());
|
Assertions.assertTrue(manifestation.getBiblio() == null);
|
||||||
|
|
||||||
//check manifestation 2
|
// check relevant organization
|
||||||
manifestation = rp.getManifestations().stream().filter(m -> m.getHosting_datasource().equals(Utils.getIdentifier(Prefixes.DATASOURCE , "10|openaire____::55045bd2a65019fd8e6741a755395c8c")))
|
Assertions.assertEquals(1, rp.getRelevant_organizations().size());
|
||||||
.collect(Collectors.toList()).get(0);
|
Assertions
|
||||||
Assertions.assertEquals("Article" , manifestation.getProduct_local_type());
|
.assertEquals(
|
||||||
Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
Prefixes.ORGANIZATION.label + "601e510b1fda7cc6cb03329531502171",
|
||||||
Assertions.assertEquals(1, manifestation.getDates().size());
|
rp.getRelevant_organizations().get(0));
|
||||||
Assertions.assertEquals("2020-01-03", manifestation.getDates().get(0).getValue());
|
|
||||||
Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
|
||||||
Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
|
|
||||||
Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
|
||||||
Assertions.assertEquals(AccessRight.UNAVAILABLE.label, manifestation.getAccess_right());
|
|
||||||
Assertions.assertEquals(null, manifestation.getLicence());
|
|
||||||
Assertions.assertEquals("https://pubmed.ncbi.nlm.nih.gov/34327650", manifestation.getUrl());
|
|
||||||
Assertions.assertEquals("34327650", manifestation.getPid());
|
|
||||||
Assertions.assertTrue(manifestation.getBiblio() == null);
|
|
||||||
|
|
||||||
//check manifestation 3
|
// check funding
|
||||||
manifestation = rp.getManifestations().stream().filter(m -> m.getHosting_datasource().equals(Utils.getIdentifier(Prefixes.DATASOURCE , "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c")))
|
Assertions.assertEquals(1, rp.getFunding().size());
|
||||||
.collect(Collectors.toList()).get(0);
|
Assertions.assertEquals(Prefixes.GRANT.label + "a7795022763d413f5de59036ebbd0c52", rp.getFunding().get(0));
|
||||||
Assertions.assertEquals("Other literature type" , manifestation.getProduct_local_type());
|
|
||||||
Assertions.assertEquals("dnet:publication_resource", manifestation.getProduct_local_type_schema());
|
|
||||||
Assertions.assertEquals(1, manifestation.getDates().size());
|
|
||||||
Assertions.assertEquals("2021-07-29", manifestation.getDates().get(0).getValue());
|
|
||||||
Assertions.assertEquals("publishing", manifestation.getDates().get(0).getType());
|
|
||||||
Assertions.assertEquals(PeerReview.NON_PEER_REVIEWED.label, manifestation.getPeer_review());
|
|
||||||
Assertions.assertEquals("unavailable", manifestation.getMetadata_curation());
|
|
||||||
Assertions.assertEquals(AccessRight.OPEN.label, manifestation.getAccess_right());
|
|
||||||
Assertions.assertEquals(null, manifestation.getLicence());
|
|
||||||
Assertions.assertEquals("https://europepmc.org/articles/PMC8602609/", manifestation.getUrl());
|
|
||||||
Assertions.assertEquals("PMC8602609", manifestation.getPid());
|
|
||||||
Assertions.assertTrue(manifestation.getBiblio() == null);
|
|
||||||
|
|
||||||
//check relevant organization
|
// check related products
|
||||||
Assertions.assertEquals(1,rp.getRelevant_organizations().size());
|
Assertions.assertEquals(5, rp.getRelated_products().size());
|
||||||
Assertions.assertEquals(Prefixes.ORGANIZATION.label + "601e510b1fda7cc6cb03329531502171", rp.getRelevant_organizations().get(0));
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
4,
|
||||||
|
rp
|
||||||
|
.getRelated_products()
|
||||||
|
.stream()
|
||||||
|
.filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.CITATION.label))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getProduct_list()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
rp
|
||||||
|
.getRelated_products()
|
||||||
|
.stream()
|
||||||
|
.filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.DOCUMENTS.label))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getProduct_list()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
rp
|
||||||
|
.getRelated_products()
|
||||||
|
.stream()
|
||||||
|
.filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.PART.label))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getProduct_list()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
rp
|
||||||
|
.getRelated_products()
|
||||||
|
.stream()
|
||||||
|
.filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.SUPPLEMENT.label))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getProduct_list()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
rp
|
||||||
|
.getRelated_products()
|
||||||
|
.stream()
|
||||||
|
.filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.VERSION.label))
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0)
|
||||||
|
.getProduct_list()
|
||||||
|
.size());
|
||||||
|
|
||||||
|
}
|
||||||
//check funding
|
|
||||||
Assertions.assertEquals(1,rp.getFunding().size());
|
|
||||||
Assertions.assertEquals(Prefixes.GRANT.label + "a7795022763d413f5de59036ebbd0c52", rp.getFunding().get(0));
|
|
||||||
|
|
||||||
//check related products
|
|
||||||
Assertions.assertEquals(5, rp.getRelated_products().size());
|
|
||||||
Assertions.assertEquals(4, rp.getRelated_products().stream().filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.CITATION.label)).collect(Collectors.toList()).get(0).getProduct_list().size());
|
|
||||||
Assertions.assertEquals(1, rp.getRelated_products().stream().filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.DOCUMENTS.label)).collect(Collectors.toList()).get(0).getProduct_list().size());
|
|
||||||
Assertions.assertEquals(1, rp.getRelated_products().stream().filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.PART.label)).collect(Collectors.toList()).get(0).getProduct_list().size());
|
|
||||||
Assertions.assertEquals(1, rp.getRelated_products().stream().filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.SUPPLEMENT.label)).collect(Collectors.toList()).get(0).getProduct_list().size());
|
|
||||||
Assertions.assertEquals(1, rp.getRelated_products().stream().filter(r -> r.getRelation_type().equalsIgnoreCase(RelationType.VERSION.label)).collect(Collectors.toList()).get(0).getProduct_list().size());
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,6 @@ import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.skgif.model.Topic;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
@ -24,9 +23,9 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
import eu.dnetlib.dhp.oa.graph.dump.skgif.beans.EmitPerManifestation;
|
||||||
import eu.dnetlib.dhp.skgif.model.Persons;
|
import eu.dnetlib.dhp.skgif.model.Persons;
|
||||||
|
import eu.dnetlib.dhp.skgif.model.Topic;
|
||||||
|
|
||||||
//@Disabled
|
//@Disabled
|
||||||
public class EmitFromResultJobTest {
|
public class EmitFromResultJobTest {
|
||||||
|
@ -101,17 +100,54 @@ public class EmitFromResultJobTest {
|
||||||
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
|
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
|
||||||
|
|
||||||
personsDataset.show(false);
|
personsDataset.show(false);
|
||||||
Persons claudiaBorer = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
Persons claudiaBorer = personsDataset
|
||||||
.first();
|
.filter(
|
||||||
|
(FilterFunction<Persons>) p -> p
|
||||||
|
.getLocal_identifier()
|
||||||
|
.equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||||
|
.first();
|
||||||
|
|
||||||
Assertions.assertEquals(2, personsDataset.filter((FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia") && p.getFamily_name().equalsIgnoreCase("borer")).count());
|
Assertions
|
||||||
Assertions.assertEquals(1, personsDataset.filter((FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia") && p.getFamily_name().equalsIgnoreCase("borer") && !p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db")).count());
|
.assertEquals(
|
||||||
|
2,
|
||||||
|
personsDataset
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia")
|
||||||
|
&& p.getFamily_name().equalsIgnoreCase("borer"))
|
||||||
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
personsDataset
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Persons>) p -> p.getGiven_name().equalsIgnoreCase("claudia")
|
||||||
|
&& p.getFamily_name().equalsIgnoreCase("borer")
|
||||||
|
&& !p
|
||||||
|
.getLocal_identifier()
|
||||||
|
.equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||||
|
.count());
|
||||||
Assertions.assertEquals("claudia", claudiaBorer.getGiven_name().toLowerCase());
|
Assertions.assertEquals("claudia", claudiaBorer.getGiven_name().toLowerCase());
|
||||||
Assertions.assertEquals("borer", claudiaBorer.getFamily_name().toLowerCase());
|
Assertions.assertEquals("borer", claudiaBorer.getFamily_name().toLowerCase());
|
||||||
|
|
||||||
Assertions.assertEquals(2, personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person")).count());
|
Assertions
|
||||||
Assertions.assertEquals(1, personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person") && p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916")).count());
|
.assertEquals(
|
||||||
Persons orcidPerson = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person") && p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916")).first();
|
2,
|
||||||
|
personsDataset
|
||||||
|
.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person"))
|
||||||
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
personsDataset
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person")
|
||||||
|
&& p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916"))
|
||||||
|
.count());
|
||||||
|
Persons orcidPerson = personsDataset
|
||||||
|
.filter(
|
||||||
|
(FilterFunction<Persons>) p -> p.getLocal_identifier().startsWith("person")
|
||||||
|
&& p.getIdentifiers().get(0).getValue().equals("0000-0002-5597-4916"))
|
||||||
|
.first();
|
||||||
Assertions.assertEquals("M.", orcidPerson.getGiven_name());
|
Assertions.assertEquals("M.", orcidPerson.getGiven_name());
|
||||||
Assertions.assertEquals("Kooi", orcidPerson.getFamily_name());
|
Assertions.assertEquals("Kooi", orcidPerson.getFamily_name());
|
||||||
Assertions.assertEquals(1, orcidPerson.getIdentifiers().size());
|
Assertions.assertEquals(1, orcidPerson.getIdentifiers().size());
|
||||||
|
@ -119,58 +155,57 @@ public class EmitFromResultJobTest {
|
||||||
Assertions.assertEquals("0000-0002-5597-4916", orcidPerson.getIdentifiers().get(0).getValue());
|
Assertions.assertEquals("0000-0002-5597-4916", orcidPerson.getIdentifiers().get(0).getValue());
|
||||||
|
|
||||||
Dataset<EmitPerManifestation> manifestationDataset = spark
|
Dataset<EmitPerManifestation> manifestationDataset = spark
|
||||||
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
||||||
manifestationDataset.show(false);
|
manifestationDataset.show(false);
|
||||||
Assertions.assertEquals(4, manifestationDataset.count());
|
Assertions.assertEquals(4, manifestationDataset.count());
|
||||||
|
|
||||||
Dataset<Topic> topicDataset = spark
|
Dataset<Topic> topicDataset = spark
|
||||||
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
|
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
|
||||||
Assertions.assertEquals(0, topicDataset.count());
|
Assertions.assertEquals(0, topicDataset.count());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEmitFromResultComplete() throws Exception {
|
public void testEmitFromResultComplete() throws Exception {
|
||||||
final String sourcePath = getClass()
|
final String sourcePath = getClass()
|
||||||
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
.getResource("/eu/dnetlib/dhp/oa/graph/dump/skgif/graph_complete_entities/")
|
||||||
.getPath();
|
.getPath();
|
||||||
|
|
||||||
EmitFromResults
|
EmitFromResults
|
||||||
.main(
|
.main(
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-outputPath", workingDir.toString() + "/result/",
|
"-outputPath", workingDir.toString() + "/result/",
|
||||||
"-workingDir", workingDir.toString() + "/"
|
"-workingDir", workingDir.toString() + "/"
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Persons> persons = sc
|
JavaRDD<Persons> persons = sc
|
||||||
.textFile(workingDir.toString() + "/result/Persons")
|
.textFile(workingDir.toString() + "/result/Persons")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Persons.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Persons.class));
|
||||||
org.apache.spark.sql.Dataset<Persons> personsDataset = spark
|
org.apache.spark.sql.Dataset<Persons> personsDataset = spark
|
||||||
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
|
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
|
||||||
|
|
||||||
personsDataset.foreach((ForeachFunction<Persons>) p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p)));
|
personsDataset.foreach((ForeachFunction<Persons>) p -> System.out.println(OBJECT_MAPPER.writeValueAsString(p)));
|
||||||
|
|
||||||
JavaRDD<Topic> topics = sc
|
JavaRDD<Topic> topics = sc
|
||||||
.textFile(workingDir.toString() + "/result/Topic")
|
.textFile(workingDir.toString() + "/result/Topic")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Topic.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Topic.class));
|
||||||
Dataset<Topic> topicDataset = spark
|
Dataset<Topic> topicDataset = spark
|
||||||
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
|
.createDataset(topics.rdd(), Encoders.bean(Topic.class));
|
||||||
Assertions.assertEquals(3, topicDataset.count());
|
Assertions.assertEquals(3, topicDataset.count());
|
||||||
|
|
||||||
topicDataset.foreach((ForeachFunction<Topic>) t -> System.out.println(OBJECT_MAPPER.writeValueAsString(t)));
|
topicDataset.foreach((ForeachFunction<Topic>) t -> System.out.println(OBJECT_MAPPER.writeValueAsString(t)));
|
||||||
|
|
||||||
JavaRDD<EmitPerManifestation> manifestation = sc
|
JavaRDD<EmitPerManifestation> manifestation = sc
|
||||||
.textFile(workingDir.toString() + "/publication/manifestation")
|
.textFile(workingDir.toString() + "/publication/manifestation")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, EmitPerManifestation.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, EmitPerManifestation.class));
|
||||||
|
|
||||||
Dataset<EmitPerManifestation> manifestationDataset = spark
|
Dataset<EmitPerManifestation> manifestationDataset = spark
|
||||||
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
||||||
manifestationDataset.show(false);
|
manifestationDataset.show(false);
|
||||||
|
|
||||||
// Persons claudiaBorer = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
// Persons claudiaBorer = personsDataset.filter((FilterFunction<Persons>) p -> p.getLocal_identifier().equalsIgnoreCase("tmp_person__::2c1eea261f7d9a97ab7ca8c4200781db"))
|
||||||
|
@ -194,6 +229,5 @@ public class EmitFromResultJobTest {
|
||||||
// Assertions.assertEquals(4, manifestationDataset.count());
|
// Assertions.assertEquals(4, manifestationDataset.count());
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue