This commit is contained in:
Miriam Baglioni 2024-02-20 09:57:33 +01:00
parent c3be9a7b14
commit e2b9989199
31 changed files with 2174 additions and 674 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,15 +6,11 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 04/09/23
*/
public enum AccessRight {
OPEN("open"),
CLOSED("closed"),
EMBARGO("embargo"),
RESTRICTED("restricted"),
UNAVAILABLE("unavailable");
OPEN("open"), CLOSED("closed"), EMBARGO("embargo"), RESTRICTED("restricted"), UNAVAILABLE("unavailable");
public final String label;
public final String label;
private AccessRight(String label) {
this.label = label;
}
private AccessRight(String label) {
this.label = label;
}
}

View File

@ -1,41 +1,42 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 05/09/23
*/
public class Affiliation implements Serializable {
private String organization;
@JsonProperty("start_date")
private String startDate;
@JsonProperty("end_date")
private String endDate;
private String organization;
@JsonProperty("start_date")
private String startDate;
@JsonProperty("end_date")
private String endDate;
public String getOrganization() {
return organization;
}
public String getOrganization() {
return organization;
}
public void setOrganization(String organization) {
this.organization = organization;
}
public void setOrganization(String organization) {
this.organization = organization;
}
public String getStartDate() {
return startDate;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
}

View File

@ -1,86 +1,87 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Biblio implements Serializable {
private String issue;
@JsonProperty("start_page")
private String startPage;
@JsonProperty("end_page")
private String endPage;
private String volume;
private String edition;
private String number;
private String publisher;
private String series;
private String issue;
@JsonProperty("start_page")
private String startPage;
@JsonProperty("end_page")
private String endPage;
private String volume;
private String edition;
private String number;
private String publisher;
private String series;
public String getIssue() {
return issue;
}
public String getIssue() {
return issue;
}
public void setIssue(String issue) {
this.issue = issue;
}
public void setIssue(String issue) {
this.issue = issue;
}
public String getStartPage() {
return startPage;
}
public String getStartPage() {
return startPage;
}
public void setStartPage(String startPage) {
this.startPage = startPage;
}
public void setStartPage(String startPage) {
this.startPage = startPage;
}
public String getEndPage() {
return endPage;
}
public String getEndPage() {
return endPage;
}
public void setEndPage(String endPage) {
this.endPage = endPage;
}
public void setEndPage(String endPage) {
this.endPage = endPage;
}
public String getVolume() {
return volume;
}
public String getVolume() {
return volume;
}
public void setVolume(String volume) {
this.volume = volume;
}
public void setVolume(String volume) {
this.volume = volume;
}
public String getEdition() {
return edition;
}
public String getEdition() {
return edition;
}
public void setEdition(String edition) {
this.edition = edition;
}
public void setEdition(String edition) {
this.edition = edition;
}
public String getNumber() {
return number;
}
public String getNumber() {
return number;
}
public void setNumber(String number) {
this.number = number;
}
public void setNumber(String number) {
this.number = number;
}
public String getPublisher() {
return publisher;
}
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
public String getSeries() {
return series;
}
public String getSeries() {
return series;
}
public void setSeries(String series) {
this.series = series;
}
public void setSeries(String series) {
this.series = series;
}
}

View File

@ -1,50 +1,51 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Contribution implements Serializable {
private String person;
@JsonProperty("declared_affiliations")
private List<String> declaredAffiliation;
private List<String> roles;
private Integer rank;
private String person;
@JsonProperty("declared_affiliations")
private List<String> declaredAffiliation;
private List<String> roles;
private Integer rank;
public String getPerson() {
return person;
}
public String getPerson() {
return person;
}
public void setPerson(String person) {
this.person = person;
}
public void setPerson(String person) {
this.person = person;
}
public List<String> getDeclaredAffiliation() {
return declaredAffiliation;
}
public List<String> getDeclaredAffiliation() {
return declaredAffiliation;
}
public void setDeclaredAffiliation(List<String> declaredAffiliation) {
this.declaredAffiliation = declaredAffiliation;
}
public void setDeclaredAffiliation(List<String> declaredAffiliation) {
this.declaredAffiliation = declaredAffiliation;
}
public List<String> getRoles() {
return roles;
}
public List<String> getRoles() {
return roles;
}
public void setRoles(List<String> roles) {
this.roles = roles;
}
public void setRoles(List<String> roles) {
this.roles = roles;
}
public Integer getRank() {
return rank;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,22 +8,29 @@ import java.io.Serializable;
* @Date 01/09/23
*/
public class Dates implements Serializable {
private String value;
private String type;
private String value;
private String type;
public String getValue() {
return value;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public void setValue(String value) {
this.value = value;
}
public String getType() {
return type;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public void setType(String type) {
this.type = type;
}
public static Dates newInstance(String value, String type) {
Dates d = new Dates();
d.value = value;
d.type = type;
return d;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,23 +8,30 @@ import java.io.Serializable;
* @Date 01/09/23
*/
public class Identifier implements Serializable {
private String scheme;
private String value;
private String scheme;
private String value;
public String getScheme() {
return scheme;
}
public String getScheme() {
return scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public void setScheme(String scheme) {
this.scheme = scheme;
}
public String getValue() {
return value;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
public void setValue(String value) {
this.value = value;
}
public static Identifier newInstance(String scheme, String value) {
Identifier i = new Identifier();
i.value = value;
i.scheme = scheme;
return i;
}
}

View File

@ -1,138 +1,139 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.net.URL;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Manifestation implements Serializable {
@JsonProperty("product_local_type")
private String productLocalType;
@JsonProperty("product_local_type_schema")
private String productLocalTypeSchema;
private List<Dates> dates;
@JsonProperty("peer_review")
private String peerReview;
@JsonProperty("metadata_curation")
private String metadataCuration;
private URL url;
private String pid;
@JsonProperty("access_right")
private String accessRight;
private String licence;
@JsonProperty("licance_schema")
private String licenceSchema;
private Biblio biblio;
private String venue;
@JsonProperty("hosting_datasource")
private String hostingDatasource;
@JsonProperty("product_local_type")
private String productLocalType;
@JsonProperty("product_local_type_schema")
private String productLocalTypeSchema;
private List<Dates> dates;
@JsonProperty("peer_review")
private String peerReview;
@JsonProperty("metadata_curation")
private String metadataCuration;
private String url;
private String pid;
@JsonProperty("access_right")
private String accessRight;
private String licence;
@JsonProperty("licance_schema")
private String licenceSchema;
private Biblio biblio;
private String venue;
@JsonProperty("hosting_datasource")
private String hostingDatasource;
public String getProductLocalType() {
return productLocalType;
}
public String getProductLocalType() {
return productLocalType;
}
public void setProductLocalType(String productLocalType) {
this.productLocalType = productLocalType;
}
public void setProductLocalType(String productLocalType) {
this.productLocalType = productLocalType;
}
public String getProductLocalTypeSchema() {
return productLocalTypeSchema;
}
public String getProductLocalTypeSchema() {
return productLocalTypeSchema;
}
public void setProductLocalTypeSchema(String productLocalTypeSchema) {
this.productLocalTypeSchema = productLocalTypeSchema;
}
public void setProductLocalTypeSchema(String productLocalTypeSchema) {
this.productLocalTypeSchema = productLocalTypeSchema;
}
public List<Dates> getDates() {
return dates;
}
public List<Dates> getDates() {
return dates;
}
public void setDates(List<Dates> dates) {
this.dates = dates;
}
public void setDates(List<Dates> dates) {
this.dates = dates;
}
public String getPeerReview() {
return peerReview;
}
public String getPeerReview() {
return peerReview;
}
public void setPeerReview(String peerReview) {
this.peerReview = peerReview;
}
public void setPeerReview(String peerReview) {
this.peerReview = peerReview;
}
public String getMetadataCuration() {
return metadataCuration;
}
public String getMetadataCuration() {
return metadataCuration;
}
public void setMetadataCuration(String metadataCuration) {
this.metadataCuration = metadataCuration;
}
public void setMetadataCuration(String metadataCuration) {
this.metadataCuration = metadataCuration;
}
public URL getUrl() {
return url;
}
public String getUrl() {
return url;
}
public void setUrl(URL url) {
this.url = url;
}
public void setUrl(String url) {
this.url = url;
}
public String getPid() {
return pid;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public String getAccessRight() {
return accessRight;
}
public String getAccessRight() {
return accessRight;
}
public void setAccessRight(String accessRight) {
this.accessRight = accessRight;
}
public void setAccessRight(String accessRight) {
this.accessRight = accessRight;
}
public String getLicence() {
return licence;
}
public String getLicence() {
return licence;
}
public void setLicence(String licence) {
this.licence = licence;
}
public void setLicence(String licence) {
this.licence = licence;
}
public String getLicenceSchema() {
return licenceSchema;
}
public String getLicenceSchema() {
return licenceSchema;
}
public void setLicenceSchema(String licenceSchema) {
this.licenceSchema = licenceSchema;
}
public void setLicenceSchema(String licenceSchema) {
this.licenceSchema = licenceSchema;
}
public Biblio getBiblio() {
return biblio;
}
public Biblio getBiblio() {
return biblio;
}
public void setBiblio(Biblio biblio) {
this.biblio = biblio;
}
public void setBiblio(Biblio biblio) {
this.biblio = biblio;
}
public String getVenue() {
return venue;
}
public String getVenue() {
return venue;
}
public void setVenue(String venue) {
this.venue = venue;
}
public void setVenue(String venue) {
this.venue = venue;
}
public String getHostingDatasource() {
return hostingDatasource;
}
public String getHostingDatasource() {
return hostingDatasource;
}
public void setHostingDatasource(String hostingDatasource) {
this.hostingDatasource = hostingDatasource;
}
public void setHostingDatasource(String hostingDatasource) {
this.hostingDatasource = hostingDatasource;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,13 +6,11 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 04/09/23
*/
public enum MetadataCuration {
YES("yes"),
NO("no"),
UNAVAILABLE("unavailable");
YES("yes"), NO("no"), UNAVAILABLE("unavailable");
public final String label;
public final String label;
private MetadataCuration(String label) {
this.label = label;
}
private MetadataCuration(String label) {
this.label = label;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,16 +6,12 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 04/09/23
*/
public enum PeerReview {
PEER_REVIEWED("peer-reviewed"),
NON_PEER_REVIEWED("not peer-reviewed"),
DOUBLE_BLIND("double-blind"),
SINGLE_BLIND("single-blind"),
UNAVAILABLE("unavailable"),
OPEN("open peer review");
PEER_REVIEWED("peer-reviewed"), NON_PEER_REVIEWED("not peer-reviewed"), DOUBLE_BLIND("double-blind"), SINGLE_BLIND(
"single-blind"), UNAVAILABLE("unavailable"), OPEN("open peer review");
public final String label;
public final String label;
private PeerReview(String label) {
this.label = label;
}
private PeerReview(String label) {
this.label = label;
}
}

View File

@ -1,71 +1,72 @@
package eu.dnetlib.dhp.skgif.model;
import org.codehaus.jackson.annotate.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import org.codehaus.jackson.annotate.JsonProperty;
/**
* @author miriam.baglioni
* @Date 05/09/23
*/
public class Persons implements Serializable {
@JsonProperty("local_identifier")
private String localIdentifier;
private List<Identifier> identifiers;
@JsonProperty("given_name")
private String givenName;
@JsonProperty("family_name")
private String familyName;
private String agent;
@JsonProperty("declared_affiliations")
private List<Affiliation>declaredAffiliations;
@JsonProperty("local_identifier")
private String localIdentifier;
private List<Identifier> identifiers;
@JsonProperty("given_name")
private String givenName;
@JsonProperty("family_name")
private String familyName;
private String agent;
@JsonProperty("declared_affiliations")
private List<Affiliation> declaredAffiliations;
public String getLocalIdentifier() {
return localIdentifier;
}
public String getLocalIdentifier() {
return localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public String getGivenName() {
return givenName;
}
public String getGivenName() {
return givenName;
}
public void setGivenName(String givenName) {
this.givenName = givenName;
}
public void setGivenName(String givenName) {
this.givenName = givenName;
}
public String getFamilyName() {
return familyName;
}
public String getFamilyName() {
return familyName;
}
public void setFamilyName(String familyName) {
this.familyName = familyName;
}
public void setFamilyName(String familyName) {
this.familyName = familyName;
}
public String getAgent() {
return agent;
}
public String getAgent() {
return agent;
}
public void setAgent(String agent) {
this.agent = agent;
}
public void setAgent(String agent) {
this.agent = agent;
}
public List<Affiliation> getDeclaredAffiliations() {
return declaredAffiliations;
}
public List<Affiliation> getDeclaredAffiliations() {
return declaredAffiliations;
}
public void setDeclaredAffiliations(List<Affiliation> declaredAffiliations) {
this.declaredAffiliations = declaredAffiliations;
}
public void setDeclaredAffiliations(List<Affiliation> declaredAffiliations) {
this.declaredAffiliations = declaredAffiliations;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,22 +8,22 @@ import java.io.Serializable;
* @Date 01/09/23
*/
public class Provenance implements Serializable {
private String type;
private double trust;
private String type;
private double trust;
public String getType() {
return type;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public void setType(String type) {
this.type = type;
}
public double getTrust() {
return trust;
}
public double getTrust() {
return trust;
}
public void setTrust(double trust) {
this.trust = trust;
}
public void setTrust(double trust) {
this.trust = trust;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
@ -7,16 +8,12 @@ import java.io.Serializable;
* @Date 05/09/23
*/
public enum RelationType implements Serializable {
OUTCOME("outcome"),
AFFILIATION("hasAuthorInstitution"),
SUPPLEMENT("IsSupplementedBy"),
DOCUMENTS("IsDocumentedBy"),
PART("IsPartOf"),
VERSION("IsNewVersioneOf");
OUTCOME("outcome"), AFFILIATION("hasAuthorInstitution"), SUPPLEMENT("IsSupplementedBy"), DOCUMENTS(
"IsDocumentedBy"), PART("IsPartOf"), VERSION("IsNewVersioneOf");
public final String label;
public final String label;
private RelationType(String label) {
this.label = label;
}
private RelationType(String label) {
this.label = label;
}
}

View File

@ -1,33 +1,43 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
import eu.dnetlib.dhp.oa.model.graph.Relation;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Relations implements Serializable {
@JsonProperty("relation_type")
private String relationType;
@JsonProperty("product_list")
private List<String> productList;
@JsonProperty("relation_type")
private String relationType;
@JsonProperty("product_list")
private List<String> productList;
public String getRelationType() {
return relationType;
}
public static Relations newInstance(String relClass, List<String> target) {
Relations r = new Relations();
r.relationType = relClass;
r.productList = target;
return r;
}
public void setRelationType(String relationType) {
this.relationType = relationType;
}
public String getRelationType() {
return relationType;
}
public List<String> getProductList() {
return productList;
}
public void setRelationType(String relationType) {
this.relationType = relationType;
}
public void setProductList(List<String> productList) {
this.productList = productList;
}
public List<String> getProductList() {
return productList;
}
public void setProductList(List<String> productList) {
this.productList = productList;
}
}

View File

@ -1,116 +1,117 @@
package eu.dnetlib.dhp.skgif.model;
import com.fasterxml.jackson.annotation.JsonProperty;
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class ResearchProduct implements Serializable {
@JsonProperty("local_identifier")
private String localIdentifier;
private List<Identifier> identifiers;
private List<String> titles;
private List<String> abstracts;
@JsonProperty("product_type")
private String productType;
private List<Topic> topics;
private List<Contribution> contributions;
private List<Manifestation> manifestations;
@JsonProperty("relevant_organizations")
private List<String> relevantOrganizations;
private List<String> funding;
@JsonProperty("related_products")
private List<Relations> relatedProducts;
@JsonProperty("local_identifier")
private String localIdentifier;
private List<Identifier> identifiers;
private List<String> titles;
private List<String> abstracts;
@JsonProperty("product_type")
private String productType;
private List<ResultTopic> topics;
private List<Contribution> contributions;
private List<Manifestation> manifestations;
@JsonProperty("relevant_organizations")
private List<String> relevantOrganizations;
private List<String> funding;
@JsonProperty("related_products")
private List<Relations> relatedProducts;
public String getLocalIdentifier() {
return localIdentifier;
}
public String getLocalIdentifier() {
return localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public void setLocalIdentifier(String localIdentifier) {
this.localIdentifier = localIdentifier;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public List<String> getTitles() {
return titles;
}
public List<String> getTitles() {
return titles;
}
public void setTitles(List<String> titles) {
this.titles = titles;
}
public void setTitles(List<String> titles) {
this.titles = titles;
}
public List<String> getAbstracts() {
return abstracts;
}
public List<String> getAbstracts() {
return abstracts;
}
public void setAbstracts(List<String> abstracts) {
this.abstracts = abstracts;
}
public void setAbstracts(List<String> abstracts) {
this.abstracts = abstracts;
}
public String getProductType() {
return productType;
}
public String getProductType() {
return productType;
}
public void setProductType(String productType) {
this.productType = productType;
}
public void setProductType(String productType) {
this.productType = productType;
}
public List<Topic> getTopics() {
return topics;
}
public List<ResultTopic> getTopics() {
return topics;
}
public void setTopics(List<Topic> topics) {
this.topics = topics;
}
public void setTopics(List<ResultTopic> topics) {
this.topics = topics;
}
public List<Contribution> getContributions() {
return contributions;
}
public List<Contribution> getContributions() {
return contributions;
}
public void setContributions(List<Contribution> contributions) {
this.contributions = contributions;
}
public void setContributions(List<Contribution> contributions) {
this.contributions = contributions;
}
public List<Manifestation> getManifestations() {
return manifestations;
}
public List<Manifestation> getManifestations() {
return manifestations;
}
public void setManifestations(List<Manifestation> manifestations) {
this.manifestations = manifestations;
}
public void setManifestations(List<Manifestation> manifestations) {
this.manifestations = manifestations;
}
public List<String> getRelevantOrganizations() {
return relevantOrganizations;
}
public List<String> getRelevantOrganizations() {
return relevantOrganizations;
}
public void setRelevantOrganizations(List<String> relevantOrganizations) {
this.relevantOrganizations = relevantOrganizations;
}
public void setRelevantOrganizations(List<String> relevantOrganizations) {
this.relevantOrganizations = relevantOrganizations;
}
public List<String> getFunding() {
return funding;
}
public List<String> getFunding() {
return funding;
}
public void setFunding(List<String> funding) {
this.funding = funding;
}
public void setFunding(List<String> funding) {
this.funding = funding;
}
public List<Relations> getRelatedProducts() {
return relatedProducts;
}
public List<Relations> getRelatedProducts() {
return relatedProducts;
}
public void setRelatedProducts(List<Relations> relatedProducts) {
this.relatedProducts = relatedProducts;
}
public void setRelatedProducts(List<Relations> relatedProducts) {
this.relatedProducts = relatedProducts;
}
}

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.skgif.model;
/**
@ -5,15 +6,12 @@ package eu.dnetlib.dhp.skgif.model;
* @Date 01/09/23
*/
public enum ResearchTypes {
LITERATURE("literature"),
RESEARCH_DATA("research data"),
RESEARCH_SOFTWARE("research software"),
OTHER("other");
LITERATURE("literature"), RESEARCH_DATA("research data"), RESEARCH_SOFTWARE("research software"), OTHER("other");
public final String label;
public final String label;
private ResearchTypes(String label) {
this.label = label;
}
private ResearchTypes(String label) {
this.label = label;
}
}

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class ResultTopic implements Serializable {
private String topic;
private Provenance provenance;
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -1,28 +1,39 @@
package eu.dnetlib.dhp.skgif.model;
import java.io.Serializable;
import java.util.List;
/**
* @author miriam.baglioni
* @Date 01/09/23
*/
public class Topic implements Serializable {
private String topic;
private Provenance provenance;
private String local_identifier;
private List<Identifier> identifiers;
private String name;
public String getTopic() {
return topic;
}
public String getLocal_identifier() {
return local_identifier;
}
public void setTopic(String topic) {
this.topic = topic;
}
public void setLocal_identifier(String local_identifier) {
this.local_identifier = local_identifier;
}
public Provenance getProvenance() {
return provenance;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}

View File

@ -0,0 +1,304 @@
package eu.dnetlib.dhp.skgif;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.skgif.Utils.getOrcid;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.skgif.beans.EmitPerManifestation;
import eu.dnetlib.dhp.skgif.beans.PartialResearchProduct;
import eu.dnetlib.dhp.skgif.beans.RelationPerProduct;
import eu.dnetlib.dhp.skgif.model.*;
import eu.dnetlib.dhp.skgif.model.AccessRight;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 06/02/24
*/
public class DumpResult implements Serializable {
private static final Logger log = LoggerFactory.getLogger(DumpResult.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/dump_result_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String workingDir = parser.get("workingDir");
log.info("workingDir: {}", workingDir);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
mapResult(spark, inputPath, outputPath, workingDir);
});
}
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
public static <R extends Result> void mapResult(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
// selection of the relevant relations from result type to other entity. Only teh semantic relevant ones are
// considered
selectRelations(spark, inputPath, workingDir);
// merge of relations and manifestation for the same result
getRelationAndManifestation(spark, workingDir, inputPath);
// dump of the result and enrichment with relevant information for relations and manifestations
dumpResult(spark, inputPath, workingDir);
}
private static void getRelationAndManifestation(SparkSession spark, String workingDir, String inputPath) {
Dataset<RelationPerProduct> aggRelations = Utils
.readPath(spark, workingDir + "aggrelation", RelationPerProduct.class);
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(ModelSupport::isResult)
.forEach(e -> {
Dataset<Datasource> datasource = Utils
.readPath(spark, inputPath + "/datasource", Datasource.class)
.filter(
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive"));
Dataset<EmitPerManifestation> man = Utils
.readPath(spark, workingDir + e.name() + "/manifestation", EmitPerManifestation.class);
man
.joinWith(aggRelations, man.col("resultId").equalTo(aggRelations.col("resultId")), "left")
.groupByKey(
(MapFunction<Tuple2<EmitPerManifestation, RelationPerProduct>, String>) t2 -> t2
._1()
.getResultId(),
Encoders.STRING())
.mapGroups(
(MapGroupsFunction<String, Tuple2<EmitPerManifestation, RelationPerProduct>, PartialResearchProduct>) (
k, v) -> {
PartialResearchProduct prp = new PartialResearchProduct();
prp.setResultId(k);
List<EmitPerManifestation> epms = new ArrayList<>();
Tuple2<EmitPerManifestation, RelationPerProduct> first = v.next();
RelationPerProduct rpp = first._2();
epms.add(first._1());
v.forEachRemaining(t2 -> epms.add(t2._1()));
Dataset<EmitPerManifestation> emitformanifestation = spark
.createDataset(epms, Encoders.bean(EmitPerManifestation.class));
prp.setManifestations(getManifestationList(emitformanifestation, datasource));
prp.setRelatedProducts(rpp.getRelatedProduct());
prp.setRelevantOrganizations(rpp.getOrganizations());
prp.setFunding(rpp.getFunding());
return prp;
}, Encoders.bean(PartialResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/partialResearchproduct");
});
}
private static List<Manifestation> getManifestationList(Dataset<EmitPerManifestation> emitformanifestation,
Dataset<Datasource> datasource) {
return emitformanifestation
.joinWith(
datasource, emitformanifestation
.col("hostedBy")
.equalTo(datasource.col("id")),
"left")
.map((MapFunction<Tuple2<EmitPerManifestation, Datasource>, Manifestation>) t2 -> {
// se il lato sinistro c'e' allora ho la biblio e la venue
// se non c'e' allora ho solo gli altri valori
EmitPerManifestation epm = t2._1();
Manifestation manifestation = new Manifestation();
manifestation.setProductLocalTypeSchema(epm.getInstance().getInstancetype().getClassname());
manifestation.setProductLocalTypeSchema(epm.getInstance().getInstancetype().getSchemename());
manifestation
.setDates(
Arrays
.asList(
Dates.newInstance(epm.getInstance().getDateofacceptance().getValue(), "publishing")));
if (Optional.ofNullable(epm.getInstance().getRefereed()).isPresent())
switch (epm.getInstance().getRefereed().getClassid()) {
case "0000":
manifestation.setPeerReview(PeerReview.UNAVAILABLE.label);
break;
case "0001":
manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label);
break;
case "0002":
manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label);
break;
}
manifestation.setMetadataCuration("unavailable");
if (Optional.ofNullable(epm.getInstance().getAccessright()).isPresent())
switch (epm.getInstance().getAccessright().getClassid()) {
case "OPEN":
case "OPEN DATA":
case "OPEN SOURCE":
manifestation.setAccessRight(AccessRight.OPEN.label);
break;
case "CLOSED":
manifestation.setAccessRight(AccessRight.CLOSED.label);
break;
case "RESTRICTED":
manifestation.setAccessRight(AccessRight.RESTRICTED.label);
break;
case "EMBARGO":
case "12MONTHS":
case "6MONTHS":
manifestation.setAccessRight(AccessRight.EMBARGO.label);
break;
default:
manifestation.setAccessRight(AccessRight.UNAVAILABLE.label);
}
manifestation.setLicence(epm.getInstance().getLicense().getValue());
manifestation.setUrl(epm.getInstance().getUrl().get(0));
if (Optional.ofNullable(epm.getInstance().getPid()).isPresent()) {
manifestation.setPid(epm.getInstance().getPid().get(0).getValue());
}
if (Optional.ofNullable(t2._2()).isPresent())
manifestation.setBiblio(getBiblio(epm));
manifestation.setVenue("venue_______::" + DHPUtils.md5(epm.getInstance().getHostedby().getKey()));
manifestation
.setHostingDatasource("datasource__::" + DHPUtils.md5(epm.getInstance().getHostedby().getKey()));
return manifestation;
}, Encoders.bean(Manifestation.class))
.collectAsList();
}
private static Biblio getBiblio(EmitPerManifestation epm) {
Biblio biblio = new Biblio();
biblio.setEdition(epm.getJournal().getEdition());
biblio.setIssue(epm.getJournal().getIss());
biblio.setPublisher(epm.getPublisher());
biblio.setVolume(epm.getJournal().getVol());
biblio.setEndPage(epm.getJournal().getEp());
biblio.setStartPage(epm.getJournal().getSp());
return biblio;
}
private static <R extends Result> void dumpResult(SparkSession spark, String inputPath, String workingDir) {
ModelSupport.entityTypes
.keySet()
.parallelStream()
.filter(ModelSupport::isResult)
.forEach(e -> {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Dataset<R> results = Utils.readPath(spark, inputPath + e.name(), resultClazz);
Dataset<PartialResearchProduct> prr = Utils
.readPath(spark, workingDir + e.name() + "/partialresearchproduct", PartialResearchProduct.class);
results
.joinWith(prr, results.col("id").equalTo(prr.col("resultId")), "left")
.map((MapFunction<Tuple2<R, PartialResearchProduct>, ResearchProduct>) t2 -> {
ResearchProduct rp = ResultMapper.map(t2._1());
rp.setRelatedProducts(t2._2().getRelatedProducts());
rp.setFunding(t2._2().getFunding());
rp.setRelevantOrganizations(t2._2().getRelevantOrganizations());
rp.setManifestations(rp.getManifestations());
return rp;
}, Encoders.bean(ResearchProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/researchproduct");
});
}
private static void selectRelations(SparkSession spark, String inputPath, String workingDir) {
Dataset<Relation> relation = spark
.read()
.json(inputPath + "/relation")
.as(Encoders.bean(Relation.class))
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")
.filter(
"relClass == 'HasAuthorInstitution' or relClass == 'IsProducedBy' or " +
"relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " +
"relClass == 'IsNewVersionOf' or relClass == 'Cites'");
relation
.groupByKey((MapFunction<Relation, String>) r -> r.getSource(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Relation, RelationPerProduct>) (k, v) -> {
RelationPerProduct rpp = new RelationPerProduct();
rpp.setResultId(k);
Map<String, List<String>> remainignRelations = new HashMap<>();
while (v.hasNext()) {
Relation rel = v.next();
String target = rel.getTarget();
String relClass = rel.getRelClass();
switch (rel.getRelClass().toLowerCase()) {
case "hasauthorinstitution":
rpp.getOrganizations().add("organization::" + DHPUtils.md5(target));
break;
case "isproducedby":
rpp.getFunding().add("grant_______::" + DHPUtils.md5(target));
break;
default:
if (!remainignRelations.keySet().contains(relClass))
remainignRelations.put(relClass, new ArrayList<>());
remainignRelations.get(relClass).add("product_____::" + DHPUtils.md5(target));
}
}
for (String key : remainignRelations.keySet())
rpp.getRelatedProduct().add(Relations.newInstance(key, remainignRelations.get(key)));
return rpp;
}, Encoders.bean(RelationPerProduct.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + "/aggrelation");
}
}

View File

@ -0,0 +1,251 @@
package eu.dnetlib.dhp.skgif;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.skgif.Utils.getOrcid;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapGroupsFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.skgif.beans.EmitPerManifestation;
import eu.dnetlib.dhp.skgif.model.*;
import eu.dnetlib.dhp.skgif.model.AccessRight;
import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Array;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 06/02/24
*/
public class EmitFromResults implements Serializable {
private static final Logger log = LoggerFactory.getLogger(EmitFromResults.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/emit_biblio_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String workingDir = parser.get("workingDir");
log.info("workingDir: {}", workingDir);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
emitFromResult(spark, inputPath, outputPath, workingDir);
});
}
//per ogni result emetto id + journal se esiste + istanza + hosted by dell'istanza
public static <R extends Result> void emitFromResult(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
emitManifestation(spark, inputPath, workingDir);
emitPerson(spark, inputPath, outputPath, workingDir);
emitTopic(spark, inputPath, outputPath, workingDir);
}
private static <R extends Result> void emitTopic(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
ModelSupport.entityTypes.keySet().forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Utils
.readPath(spark, inputPath + e.name(), resultClazz)
.filter((FilterFunction<R>) r -> Optional.of(r.getSubject()).isPresent())
.flatMap(
(FlatMapFunction<R, Topic>) r -> r
.getSubject()
.stream()
.filter(s -> !s.getQualifier().getClassid().equalsIgnoreCase("keyword"))
.map(s -> {
Topic t = new Topic();
t
.setLocal_identifier(
"topic_______::" + DHPUtils.md5(s.getQualifier().getSchemeid() + s.getValue()));
t
.setIdentifiers(
Arrays
.asList(
Identifier.newInstance(s.getQualifier().getSchemeid(), s.getValue())));
t.setName(s.getValue());
return t;
})
.collect(Collectors.toList())
.iterator(),
Encoders.bean(Topic.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/topic");
}
});
Dataset<Topic> topics = spark.emptyDataset(Encoders.bean(Topic.class));
for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
if (ModelSupport.isResult(entityType))
topics = topics.union(Utils.readPath(spark, workingDir + entityType.name() + "/topic", Topic.class));
}
topics
.groupByKey((MapFunction<Topic, String>) p -> p.getLocal_identifier(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Topic, Topic>) (k, v) -> v.next(), Encoders.bean(Topic.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/Topic");
}
private static <R extends Result> void emitPerson(SparkSession spark, String inputPath, String outputPath,
String workingDir) {
ModelSupport.entityTypes.keySet().forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
Utils
.readPath(spark, inputPath + e.name(), resultClazz)
.flatMap((FlatMapFunction<R, Persons>) r -> {
List<Persons> authors = new ArrayList<>();
if (Optional.ofNullable(r.getAuthor()).isPresent()) {
int count = 0;
for (Author a : r.getAuthor()) {
count += 1;
Persons p = new Persons();
p.setFamilyName(a.getSurname());
p.setGivenName(a.getName());
String identifier = new String();
if (Optional.ofNullable(a.getPid()).isPresent()) {
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
if (orcid != null) {
identifier = "person______::" + DHPUtils.md5(orcid._1() + orcid._2());
if (orcid._2())
p
.setIdentifiers(
Arrays.asList(Identifier.newInstance("orcid", orcid._1())));
else
p
.setIdentifiers(
Arrays.asList(Identifier.newInstance("orcid_pending", orcid._1())));
} else {
if (Optional.ofNullable(a.getRank()).isPresent()) {
identifier = "tmp_person__::" + DHPUtils.md5(r.getId() + a.getRank());
} else {
identifier = "tmp_person__::" + DHPUtils.md5(r.getId() + count);
}
}
}
p.setLocalIdentifier(identifier);
}
}
return authors.iterator();
}, Encoders.bean(Persons.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/person");
}
});
Dataset<Persons> persons = spark.emptyDataset(Encoders.bean(Persons.class));
for (EntityType entityType : ModelSupport.entityTypes.keySet()) {
if (ModelSupport.isResult(entityType))
persons = persons
.union(Utils.readPath(spark, workingDir + entityType.name() + "/person", Persons.class));
}
persons
.groupByKey((MapFunction<Persons, String>) p -> p.getLocalIdentifier(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Persons, Persons>) (k, v) -> v.next(), Encoders.bean(Persons.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath + "/Persons");
}
private static <R extends Result> void emitManifestation(SparkSession spark, String inputPath, String workingDir) {
Dataset<Datasource> datasource = Utils
.readPath(spark, inputPath + "datasource", Datasource.class)
.filter(
(FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive"));
ModelSupport.entityTypes.keySet().forEach(e -> {
if (ModelSupport.isResult(e)) {
Class<R> resultClazz = ModelSupport.entityTypes.get(e);
// Dataset<EmitPerManifestation> emitformanifestation =
Utils
.readPath(spark, inputPath + e.name(), resultClazz)
.flatMap((FlatMapFunction<R, EmitPerManifestation>) p -> p.getInstance().stream().map(i -> {
EmitPerManifestation epb = new EmitPerManifestation();
epb.setResultId(p.getId());
epb.setInstance(i);
epb.setHostedBy(i.getHostedby().getKey());
epb
.setPublisher(
Optional
.ofNullable(p.getPublisher())
.map(v -> v.getValue())
.orElse(new String()));
if (p.getClass() == Publication.class) {
epb.setJournal(((Publication) p).getJournal());
}
return epb;
}).collect(Collectors.toList()).iterator(), Encoders.bean(EmitPerManifestation.class))
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(workingDir + e.name() + "/manifestation");
;
}
});
}
}

View File

@ -1,77 +0,0 @@
package eu.dnetlib.dhp.skgif;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* @author miriam.baglioni
* @Date 06/02/24
*/
public class JournalsFromDatasources implements Serializable {
private static final Logger log = LoggerFactory.getLogger(JournalsFromDatasources.class);
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
PrepareResultRelation.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/dump/journals_from_datasource_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
final String datasourcePath = parser.get("datasourcePath");
log.info("datasourcePath: {}", datasourcePath);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
extendResult(spark, inputPath, outputPath, datasourcePath);
});
}
//find the results having a container in the metadata
//map all the hostedby.key delle istanze associate al risultato
//find a corrispondence to a datasource which is a journal
//write for the result the biblio
public static void extendResult(SparkSession spark, String inputPath, String outputPath, String datasourcePath ){
Dataset<Datasource> datasource = Utils.readPath(spark, datasourcePath, Datasource.class)
.filter((FilterFunction<Datasource>) d -> Optional.ofNullable(d.getEosctype()).isPresent() &&
d.getEosctype().getClassname().equalsIgnoreCase("Journal archive");
Dataset<ResearchProduct> results = Utils.readPath(spark, inputPath, ResearchProduct.class);
}
}

View File

@ -9,6 +9,7 @@ import java.util.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -58,17 +59,23 @@ public class PrepareResultRelation implements Serializable {
}
private static void prepareResultRelationList(SparkSession spark, String inputPath, String outputPath) {
final StructType structureSchema = new StructType()
.fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>");
.add(
"dataInfo", new StructType()
.add("deletedbyinference", DataTypes.BooleanType)
.add("invisible", DataTypes.BooleanType))
.add("id", DataTypes.StringType);
Dataset<Relation> relation = spark
.read()
.json(inputPath)
.as(Encoders.bean(Relation.class))
.filter("dataInfo.deletedbyinference != true and dataInfo.invisible != true")
.filter(
"relClass == 'hasAuthorInstitution' or relClass == 'outcome' or " +
"subRelType == 'affiliation' or subRelType == 'outcome' or " +
"relClass == 'IsSupplementedBy' or relClass == 'IsDocumentedBy' or relClass == 'IsPartOf' " +
"relClass == IsNewVersionOf");
"relClass == 'IsNewVersionOf' or relClass == 'Cites'");
org.apache.spark.sql.Dataset<Row> df = spark.createDataFrame(new ArrayList<Row>(), structureSchema);
List<String> entities = Arrays

View File

@ -41,7 +41,7 @@ public class ResultMapper implements Serializable {
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
if (ort.isPresent()) {
try {
out.setLocalIdentifier(input.getId());
out.setLocalIdentifier("product___::" + DHPUtils.md5(input.getId()));
mapPid(out, input);
mapTitle(out, input);
mapAbstract(out, input);
@ -49,17 +49,7 @@ public class ResultMapper implements Serializable {
mapTopic(out, input);
mapContribution(out, input);
if (!Optional.ofNullable(out.getTitles()).isPresent() ||
!Optional.ofNullable(out.getContributions()).isPresent())
return null;
// TODO map the manifestation directly from the instances
// it is not completed
mapManifestation(out, input);
// TODO extend the mapping to consider relations between these entities and the results
// private List<String> relevantOrganizations;
// private List<String> funding;
// private List<Relations> relatedProducts;
//The manifestation will be included extending the result as well as the relations to funder, organization and other results
} catch (ClassCastException cce) {
return null;
@ -70,101 +60,6 @@ public class ResultMapper implements Serializable {
}
private static <E extends Result> void mapManifestation(ResearchProduct out, E input) {
out
.setManifestations(
input
.getInstance()
.stream()
.parallel()
.map(i -> {
try {
return getManifestation(i);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
})
.collect(Collectors.toList()));
}
private static Manifestation getManifestation(Instance i) throws MalformedURLException {
Manifestation manifestation = new Manifestation();
manifestation.setProductLocalType(i.getInstancetype().getClassname());
manifestation.setProductLocalTypeSchema(i.getInstancetype().getSchemename());
Dates dates = new Dates();
dates.setType("publishing");
dates.setValue(i.getDateofacceptance().getValue());
manifestation.setDates(Arrays.asList(dates));
switch (i.getRefereed().getClassid()) {
case "0000":
manifestation.setPeerReview(PeerReview.UNAVAILABLE.label);
break;
case "0001":
manifestation.setPeerReview(PeerReview.PEER_REVIEWED.label);
break;
case "0002":
manifestation.setPeerReview(PeerReview.NON_PEER_REVIEWED.label);
break;
}
manifestation.setMetadataCuration(MetadataCuration.UNAVAILABLE.label);
// TODO filter out the URL that refer to pids. If nothing remains, decide what to do
manifestation.setUrl(new URL(i.getUrl().get(0)));
if (Optional.ofNullable(i.getPid()).isPresent()) {
manifestation.setPid(i.getPid().get(0).getValue());
}
switch (i.getAccessright().getClassid()) {
case "OPEN":
case "OPEN DATA":
case "OPEN SOURCE":
manifestation.setAccessRight(AccessRight.OPEN.label);
break;
case "CLOSED":
manifestation.setAccessRight(AccessRight.CLOSED.label);
break;
case "RESTRICTED":
manifestation.setAccessRight(AccessRight.RESTRICTED.label);
break;
case "EMBARGO":
case "12MONTHS":
case "6MONTHS":
manifestation.setAccessRight(AccessRight.EMBARGO.label);
break;
default:
manifestation.setAccessRight(AccessRight.UNAVAILABLE.label);
}
if (Optional.ofNullable(i.getLicense()).isPresent())
manifestation.setLicence(i.getLicense().getValue());
// TODO to fill the biblio in case it is a journal, we need to join with the datasource and verify the type
Biblio biblio = null;
manifestation.setHostingDatasource(i.getHostedby().getKey());
// TODO verify if the result is published in ojournal or conferences. In that case the venue is the identifier
// of the journal/conference. In case it is not, the venue is the datasource
if (biblio == null) {
manifestation.setVenue(i.getHostedby().getKey());
} else {
manifestation.setVenue("insert the id of the venue");
}
return manifestation;
}
private static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
private static <E extends Result> void mapContribution(ResearchProduct out, E input) {
if (Optional.ofNullable(input.getAuthor()).isPresent()) {
int count = 0;
@ -172,14 +67,14 @@ public class ResultMapper implements Serializable {
count += 1;
Contribution contribution = new Contribution();
if (Optional.ofNullable(a.getPid()).isPresent()) {
Tuple2<String, Boolean> orcid = getOrcid(a.getPid());
Tuple2<String, Boolean> orcid = Utils.getOrcid(a.getPid());
if (orcid != null) {
contribution.setPerson("person______::"+DHPUtils.md5(orcid._1() + orcid._2()));
contribution.setPerson("person______::" + DHPUtils.md5(orcid._1() + orcid._2()));
} else {
if (Optional.ofNullable(a.getRank()).isPresent()) {
contribution.setPerson("person______::"+DHPUtils.md5(input.getId() + a.getRank()));
contribution.setPerson("person______::" + DHPUtils.md5(input.getId() + a.getRank()));
} else {
contribution.setPerson("tmp_person__::"+DHPUtils.md5(input.getId() + count));
contribution.setPerson("tmp_person__::" + DHPUtils.md5(input.getId() + count));
}
}
@ -187,32 +82,31 @@ public class ResultMapper implements Serializable {
}
}
// "contributions": [
// {
// "person": "person_123",
// "declared_affiliations": ["org_1", "org_3"],
// "rank": 1,
// "roles": ["writing-original-draft", "conceptualization"]
// }
// ]
}
private static <E extends Result> void mapTopic(ResearchProduct out, E input) {
if (Optional.ofNullable(input.getSubject()).isPresent()) {
out.setTopics(input.getSubject().stream().parallel().map(s -> {
Topic topic = new Topic();
topic.setTopic(getIdentifier(s));
Provenance provenance = new Provenance();
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
provenance.setType(s.getDataInfo().getInferenceprovenance());
topic.setProvenance(provenance);
return topic;
}).collect(Collectors.toList()));
out
.setTopics(
input
.getSubject()
.stream()
.filter(s -> !s.getQualifier().getClassid().equalsIgnoreCase("keyword"))
.map(s -> {
ResultTopic topic = new ResultTopic();
topic.setTopic(getIdentifier(s));
Provenance provenance = new Provenance();
provenance.setTrust(Double.valueOf(s.getDataInfo().getTrust()));
provenance.setType(s.getDataInfo().getInferenceprovenance());
topic.setProvenance(provenance);
return topic;
})
.collect(Collectors.toList()));
}
}
private static String getIdentifier(StructuredProperty s) {
return DHPUtils.md5(s.getQualifier().getClassid() + s.getValue());
return "topic_______::" + DHPUtils.md5(s.getQualifier().getClassid() + s.getValue());
}
private static <E extends Result> void mapType(ResearchProduct out, E input) throws NoAllowedTypeException {

View File

@ -0,0 +1,36 @@
package eu.dnetlib.dhp.skgif;
import java.io.Serializable;
import java.util.List;
import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import scala.Tuple2;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class Utils implements Serializable {
public static Tuple2<String, Boolean> getOrcid(List<StructuredProperty> pid) {
if (!Optional.ofNullable(pid).isPresent())
return null;
if (pid.size() == 0)
return null;
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
return new Tuple2<>(p.getValue(), Boolean.TRUE);
}
}
for (StructuredProperty p : pid) {
if (p.getQualifier().getClassid().equals(ModelConstants.ORCID_PENDING)) {
return new Tuple2<>(p.getValue(), Boolean.FALSE);
}
}
return null;
}
}

View File

@ -0,0 +1,60 @@
package eu.dnetlib.dhp.skgif.beans;
import java.io.Serializable;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.skgif.model.Biblio;
/**
* @author miriam.baglioni
* @Date 15/02/24
*/
public class EmitPerManifestation implements Serializable {
private String resultId;
private String hostedBy;
private Journal journal;
private Instance instance;
private String publisher;
public String getPublisher() {
return publisher;
}
public void setPublisher(String publisher) {
this.publisher = publisher;
}
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public String getHostedBy() {
return hostedBy;
}
public void setHostedBy(String hostedBy) {
this.hostedBy = hostedBy;
}
public Journal getJournal() {
return journal;
}
public void setJournal(Journal journal) {
this.journal = journal;
}
public Instance getInstance() {
return instance;
}
public void setInstance(Instance instance) {
this.instance = instance;
}
}

View File

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.skgif.beans;
import eu.dnetlib.dhp.skgif.model.ResearchProduct;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class PartialResearchProduct extends ResearchProduct {
private String resultId;
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
}

View File

@ -0,0 +1,58 @@
package eu.dnetlib.dhp.skgif.beans;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import eu.dnetlib.dhp.skgif.model.Relations;
/**
* @author miriam.baglioni
* @Date 16/02/24
*/
public class RelationPerProduct implements Serializable {
private String resultId;
private List<String> organizations;
private List<String> funding;
private List<Relations> relatedProduct;
public RelationPerProduct() {
organizations = new ArrayList<>();
funding = new ArrayList<>();
relatedProduct = new ArrayList<>();
}
public String getResultId() {
return resultId;
}
public void setResultId(String resultId) {
this.resultId = resultId;
}
public List<String> getOrganizations() {
return organizations;
}
public void setOrganizations(List<String> organizations) {
this.organizations = organizations;
}
public List<String> getFunding() {
return funding;
}
public void setFunding(List<String> funding) {
this.funding = funding;
}
public List<Relations> getRelatedProduct() {
return relatedProduct;
}
public void setRelatedProduct(List<Relations> relatedProduct) {
this.relatedProduct = relatedProduct;
}
}

View File

@ -0,0 +1,783 @@
package eu.dnetlib.dhp.oa.graph.dump.skgif;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.graph.dump.Constants;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.model.Instance;
import eu.dnetlib.dhp.oa.model.OpenAccessRoute;
import eu.dnetlib.dhp.oa.model.Score;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.skgif.EmitFromResults;
import eu.dnetlib.dhp.skgif.beans.EmitPerManifestation;
import eu.dnetlib.dhp.skgif.model.Manifestation;
import eu.dnetlib.dhp.skgif.model.Persons;
import eu.dnetlib.dhp.skgif.model.ResultTopic;
//@Disabled
public class EmitFromResultJobTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static SparkSession spark;
private static Path workingDir;
private static final Logger log = LoggerFactory.getLogger(EmitFromResultJobTest.class);
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files.createTempDirectory(EmitFromResultJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir);
SparkConf conf = new SparkConf();
conf.setAppName(EmitFromResultJobTest.class.getSimpleName());
conf.setMaster("local[*]");
conf.set("spark.driver.host", "localhost");
conf.set("hive.metastore.local", "true");
conf.set("spark.ui.enabled", "false");
conf.set("spark.sql.warehouse.dir", workingDir.toString());
conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
spark = SparkSession
.builder()
.appName(EmitFromResultJobTest.class.getSimpleName())
.config(conf)
.getOrCreate();
}
@AfterAll
public static void afterAll() throws IOException {
FileUtils.deleteDirectory(workingDir.toFile());
spark.stop();
}
@Test
public void testEmitFromResult() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
EmitFromResults
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir.toString() + "/result/",
"-workingDir", workingDir.toString() + "/"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Persons> persons = sc
.textFile(workingDir.toString() + "/result/Persons")
.map(item -> OBJECT_MAPPER.readValue(item, Persons.class));
JavaRDD<ResultTopic> topics = sc
.textFile(workingDir.toString() + "/result/Topic")
.map(item -> OBJECT_MAPPER.readValue(item, ResultTopic.class));
JavaRDD<EmitPerManifestation> manifestation = sc
.textFile(workingDir.toString() + "/result/Persons")
.map(item -> OBJECT_MAPPER.readValue(item, EmitPerManifestation.class));
org.apache.spark.sql.Dataset<Persons> personsDataset = spark
.createDataset(persons.rdd(), Encoders.bean(Persons.class));
personsDataset.show(false);
}
@Test
public void testDatasetDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'dataset'").count());
// the common fields in the result have been already checked. Now checking only
// community specific fields
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getGeolocation().size());
Assertions.assertEquals(2, gr.getGeolocation().stream().filter(gl -> gl.getBox().equals("")).count());
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPlace().equals("")).count());
Assertions.assertEquals(1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("")).count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> gl.getPlace().equals("18 York St, Ottawa, ON K1N 5S6; Ottawa; Ontario; Canada"))
.count());
Assertions
.assertEquals(
1, gr.getGeolocation().stream().filter(gl -> gl.getPoint().equals("45.427242 -75.693904")).count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> gl.getPoint().equals("") && !gl.getPlace().equals(""))
.count());
Assertions
.assertEquals(
1,
gr
.getGeolocation()
.stream()
.filter(gl -> !gl.getPoint().equals("") && gl.getPlace().equals(""))
.count());
Assertions.assertEquals("1024Gb", gr.getSize());
Assertions.assertEquals("1.01", gr.getVersion());
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
Assertions.assertEquals(null, gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getDocumentationUrl());
Assertions.assertEquals(null, gr.getContactperson());
Assertions.assertEquals(null, gr.getContactgroup());
Assertions.assertEquals(null, gr.getTool());
}
@Test
public void testSoftwareDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'software'").count());
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getDocumentationUrl().size());
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_1"));
Assertions.assertTrue(gr.getDocumentationUrl().contains("doc_url_2"));
Assertions.assertEquals("code_repo", gr.getCodeRepositoryUrl());
Assertions.assertEquals("perl", gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getContactperson());
Assertions.assertEquals(null, gr.getContactgroup());
Assertions.assertEquals(null, gr.getTool());
Assertions.assertEquals(null, gr.getGeolocation());
Assertions.assertEquals(null, gr.getSize());
Assertions.assertEquals(null, gr.getVersion());
}
@Test
public void testOrpDump() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'other'").count());
GraphResult gr = verificationDataset.first();
Assertions.assertEquals(2, gr.getContactperson().size());
Assertions.assertTrue(gr.getContactperson().contains(("contact_person1")));
Assertions.assertTrue(gr.getContactperson().contains(("contact_person2")));
Assertions.assertEquals(1, gr.getContactgroup().size());
Assertions.assertTrue(gr.getContactgroup().contains(("contact_group")));
Assertions.assertEquals(2, gr.getTool().size());
Assertions.assertTrue(gr.getTool().contains("tool1"));
Assertions.assertTrue(gr.getTool().contains("tool2"));
Assertions.assertEquals(null, gr.getContainer());
Assertions.assertEquals(null, gr.getDocumentationUrl());
Assertions.assertEquals(null, gr.getCodeRepositoryUrl());
Assertions.assertEquals(null, gr.getProgrammingLanguage());
Assertions.assertEquals(null, gr.getGeolocation());
Assertions.assertEquals(null, gr.getSize());
Assertions.assertEquals(null, gr.getVersion());
}
@Test
public void testPublicationDumpCommunity() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_extendedinstance")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(1, verificationDataset.count());
Assertions.assertEquals(1, verificationDataset.filter("type = 'publication'").count());
// the common fields in the result have been already checked. Now checking only
// community specific fields
CommunityResult cr = verificationDataset.first();
Assertions.assertEquals(1, cr.getContext().size());
Assertions.assertEquals("dh-ch", cr.getContext().get(0).getCode());
Assertions.assertEquals("Digital Humanities and Cultural Heritage", cr.getContext().get(0).getLabel());
Assertions.assertEquals(1, cr.getContext().get(0).getProvenance().size());
Assertions.assertEquals("Inferred by OpenAIRE", cr.getContext().get(0).getProvenance().get(0).getProvenance());
Assertions.assertEquals("0.9", cr.getContext().get(0).getProvenance().get(0).getTrust());
Assertions.assertEquals(1, cr.getCollectedfrom().size());
Assertions
.assertEquals("openaire____::fdc7e0400d8c1634cdaf8051dbae23db", cr.getCollectedfrom().get(0).getKey());
Assertions.assertEquals("Pensoft", cr.getCollectedfrom().get(0).getValue());
Assertions.assertEquals(1, cr.getInstance().size());
Assertions
.assertEquals(
"openaire____::fdc7e0400d8c1634cdaf8051dbae23db",
cr.getInstance().get(0).getCollectedfrom().getKey());
Assertions.assertEquals("Pensoft", cr.getInstance().get(0).getCollectedfrom().getValue());
Assertions
.assertEquals(
"openaire____::e707e544b9a5bd23fc27fbfa65eb60dd", cr.getInstance().get(0).getHostedby().getKey());
Assertions.assertEquals("One Ecosystem", cr.getInstance().get(0).getHostedby().getValue());
}
@Test
public void testDataset() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(90, verificationDataset.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_abf2'").count() == verificationDataset
.filter("bestAccessright.code = 'c_abf2' and bestAccessright.label = 'OPEN'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_16ec'").count() == verificationDataset
.filter("bestAccessright.code = 'c_16ec' and bestAccessright.label = 'RESTRICTED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_14cb'").count() == verificationDataset
.filter("bestAccessright.code = 'c_14cb' and bestAccessright.label = 'CLOSED'")
.count());
Assertions
.assertTrue(
verificationDataset.filter("bestAccessright.code = 'c_f1cf'").count() == verificationDataset
.filter("bestAccessright.code = 'c_f1cf' and bestAccessright.label = 'EMBARGO'")
.count());
Assertions.assertTrue(verificationDataset.filter("size(context) > 0").count() == 90);
Assertions.assertTrue(verificationDataset.filter("type = 'dataset'").count() == 90);
}
@Test
public void testDataset2All() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(5, verificationDataset.count());
}
@Test
public void testDataset2Communities() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset_cleaned")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(0, verificationDataset.count());
}
@Test
public void testPublication() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(74, verificationDataset.count());
verificationDataset.show(false);
Assertions.assertEquals(74, verificationDataset.filter("type = 'publication'").count());
}
@Test
public void testSoftware() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/software.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(6, verificationDataset.count());
Assertions.assertEquals(6, verificationDataset.filter("type = 'software'").count());
}
@Test
public void testORP() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/orp.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(3, verificationDataset.count());
Assertions.assertEquals(3, verificationDataset.filter("type = 'other'").count());
}
@Test
public void testRecord() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpCommunityProducts
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<CommunityResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
org.apache.spark.sql.Dataset<CommunityResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(CommunityResult.class));
Assertions.assertEquals(2, verificationDataset.count());
verificationDataset.show(false);
Assertions.assertEquals(2, verificationDataset.filter("type = 'publication'").count());
}
@Test
public void testArticlePCA() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
Assertions.assertEquals(23, verificationDataset.count());
Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count());
verificationDataset.createOrReplaceTempView("check");
org.apache.spark.sql.Dataset<Row> temp = spark
.sql(
"select id " +
"from check " +
"lateral view explode (instance) i as inst " +
"where inst.articleprocessingcharge is not null");
Assertions.assertTrue(temp.count() == 2);
Assertions.assertTrue(temp.filter("id = 'datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1);
Assertions.assertTrue(temp.filter("id = 'dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1);
temp = spark
.sql(
"select id, inst.articleprocessingcharge.amount, inst.articleprocessingcharge.currency " +
"from check " +
"lateral view explode (instance) i as inst " +
"where inst.articleprocessingcharge is not null");
Assertions
.assertEquals(
"3131.64",
temp
.filter("id = 'datacite____::05c611fdfc93d7a2a703d1324e28104a'")
.collectAsList()
.get(0)
.getString(1));
Assertions
.assertEquals(
"EUR",
temp
.filter("id = 'datacite____::05c611fdfc93d7a2a703d1324e28104a'")
.collectAsList()
.get(0)
.getString(2));
Assertions
.assertEquals(
"2578.35",
temp
.filter("id = 'dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
.collectAsList()
.get(0)
.getString(1));
Assertions
.assertEquals(
"EUR",
temp
.filter("id = 'dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'")
.collectAsList()
.get(0)
.getString(2));
}
@Test
public void testresultNotDumped() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/resultNotDumped.json")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/result",
"-communityMapPath", communityMapPath
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<GraphResult> tmp = sc
.textFile(workingDir.toString() + "/result")
.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
Assertions.assertEquals(0, tmp.count());
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long