implementation of the createPublication method to map publications

This commit is contained in:
miconis 2019-10-25 11:54:14 +02:00
commit 4908165e05
16 changed files with 314 additions and 208 deletions

View File

@ -1,19 +1,30 @@
package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public class Author implements Serializable {
private String fullname;
private String name;
private String typology;
private String surname;
private String provenance;
private Integer rank;
private String trust;
private List<KeyValue> pid;
// json containing a Citation or Statistics
private String value;
private List<Field<String>> affiliation;
public String getFullname() {
return fullname;
}
public Author setFullname(String fullname) {
this.fullname = fullname;
return this;
}
public String getName() {
return name;
@ -24,39 +35,39 @@ public class Author implements Serializable {
return this;
}
public String getTypology() {
return typology;
public String getSurname() {
return surname;
}
public Author setTypology(String typology) {
this.typology = typology;
public Author setSurname(String surname) {
this.surname = surname;
return this;
}
public String getProvenance() {
return provenance;
public Integer getRank() {
return rank;
}
public Author setProvenance(String provenance) {
this.provenance = provenance;
public Author setRank(Integer rank) {
this.rank = rank;
return this;
}
public String getTrust() {
return trust;
public List<KeyValue> getPid() {
return pid;
}
public Author setTrust(String trust) {
this.trust = trust;
public Author setPid(List<KeyValue> pid) {
this.pid = pid;
return this;
}
public String getValue() {
return value;
public List<Field<String>> getAffiliation() {
return affiliation;
}
public Author setValue(String value) {
this.value = value;
public Author setAffiliation(List<Field<String>> affiliation) {
this.affiliation = affiliation;
return this;
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public class Dataset extends Result implements Serializable {
public class Dataset extends Result<Dataset> implements Serializable {
private Field<String> storagedate;
@ -81,4 +81,9 @@ public class Dataset extends Result implements Serializable {
this.geolocation = geolocation;
return this;
}
@Override
protected Dataset self() {
return this;
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public class Datasource extends OafEntity implements Serializable {
public class Datasource extends OafEntity<Datasource> implements Serializable {
private Qualifier datasourcetype;
@ -396,4 +396,9 @@ public class Datasource extends OafEntity implements Serializable {
this.journal = journal;
return this;
}
@Override
protected Datasource self() {
return this;
}
}

View File

@ -2,27 +2,29 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
public abstract class Oaf implements Serializable {
public abstract class Oaf<T extends Oaf<T>> implements Serializable {
private DataInfo dataInfo;
private Long lastupdatetimestamp;
protected abstract T self();
public DataInfo getDataInfo() {
return dataInfo;
}
public Oaf setDataInfo(DataInfo dataInfo) {
public T setDataInfo(DataInfo dataInfo) {
this.dataInfo = dataInfo;
return this;
return self();
}
public Long getLastupdatetimestamp() {
return lastupdatetimestamp;
}
public Oaf setLastupdatetimestamp(Long lastupdatetimestamp) {
public T setLastupdatetimestamp(Long lastupdatetimestamp) {
this.lastupdatetimestamp = lastupdatetimestamp;
return this;
return self();
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public abstract class OafEntity extends Oaf implements Serializable {
public abstract class OafEntity<T extends OafEntity<T>> extends Oaf<T> implements Serializable {
private String id;
@ -21,76 +21,75 @@ public abstract class OafEntity extends Oaf implements Serializable {
private OAIProvenance oaiprovenance;
public String getId() {
return id;
}
public OafEntity setId(String id) {
public T setId(String id) {
this.id = id;
return this;
return self();
}
public List<String> getOriginalId() {
return originalId;
}
public OafEntity setOriginalId(List<String> originalId) {
public T setOriginalId(List<String> originalId) {
this.originalId = originalId;
return this;
return self();
}
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public OafEntity setCollectedfrom(List<KeyValue> collectedfrom) {
public T setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
return this;
return self();
}
public List<StructuredProperty> getPid() {
return pid;
}
public OafEntity setPid(List<StructuredProperty> pid) {
public T setPid(List<StructuredProperty> pid) {
this.pid = pid;
return this;
return self();
}
public String getDateofcollection() {
return dateofcollection;
}
public OafEntity setDateofcollection(String dateofcollection) {
public T setDateofcollection(String dateofcollection) {
this.dateofcollection = dateofcollection;
return this;
return self();
}
public String getDateoftransformation() {
return dateoftransformation;
}
public OafEntity setDateoftransformation(String dateoftransformation) {
public T setDateoftransformation(String dateoftransformation) {
this.dateoftransformation = dateoftransformation;
return this;
return self();
}
public List<ExtraInfo> getExtraInfo() {
return extraInfo;
}
public OafEntity setExtraInfo(List<ExtraInfo> extraInfo) {
public T setExtraInfo(List<ExtraInfo> extraInfo) {
this.extraInfo = extraInfo;
return this;
return self();
}
public OAIProvenance getOaiprovenance() {
return oaiprovenance;
}
public OafEntity setOaiprovenance(OAIProvenance oaiprovenance) {
public T setOaiprovenance(OAIProvenance oaiprovenance) {
this.oaiprovenance = oaiprovenance;
return this;
return self();
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public class Organization extends OafEntity implements Serializable {
public class Organization extends OafEntity<Organization> implements Serializable {
private Field<String> legalshortname;
@ -180,4 +180,9 @@ public class Organization extends OafEntity implements Serializable {
this.country = country;
return this;
}
@Override
protected Organization self() {
return this;
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public class OtherResearchProducts extends Result implements Serializable {
public class OtherResearchProducts extends Result<OtherResearchProducts> implements Serializable {
private List<Field<String>> contactperson;
@ -37,4 +37,9 @@ public class OtherResearchProducts extends Result implements Serializable {
this.tool = tool;
return this;
}
@Override
protected OtherResearchProducts self() {
return this;
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public class Project extends OafEntity implements Serializable {
public class Project extends OafEntity<Project> implements Serializable {
private Field<String> websiteurl;
@ -290,4 +290,9 @@ public class Project extends OafEntity implements Serializable {
this.fundedamount = fundedamount;
return this;
}
@Override
protected Project self() {
return this;
}
}

View File

@ -2,7 +2,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
public class Publication extends Result implements Serializable {
public class Publication extends Result<Publication> implements Serializable {
// publication specific
private Journal journal;
@ -15,4 +15,9 @@ public class Publication extends Result implements Serializable {
this.journal = journal;
return this;
}
@Override
protected Publication self() {
return this;
}
}

View File

@ -2,7 +2,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.util.List;
public class Relation extends Oaf {
public class Relation extends Oaf<Relation> {
private String relType;
@ -22,7 +22,7 @@ public class Relation extends Oaf {
public Relation setRelType(String relType) {
this.relType = relType;
return this;
return self();
}
public String getSubRelType() {
@ -31,7 +31,7 @@ public class Relation extends Oaf {
public Relation setSubRelType(String subRelType) {
this.subRelType = subRelType;
return this;
return self();
}
public String getRelClass() {
@ -40,7 +40,7 @@ public class Relation extends Oaf {
public Relation setRelClass(String relClass) {
this.relClass = relClass;
return this;
return self();
}
public String getSource() {
@ -49,7 +49,7 @@ public class Relation extends Oaf {
public Relation setSource(String source) {
this.source = source;
return this;
return self();
}
public String getTarget() {
@ -58,7 +58,7 @@ public class Relation extends Oaf {
public Relation setTarget(String target) {
this.target = target;
return this;
return self();
}
public List<KeyValue> getCollectedFrom() {
@ -67,6 +67,11 @@ public class Relation extends Oaf {
public Relation setCollectedFrom(List<KeyValue> collectedFrom) {
this.collectedFrom = collectedFrom;
return self();
}
@Override
protected Relation self() {
return this;
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public abstract class Result extends OafEntity implements Serializable {
public abstract class Result<T extends Result<T>> extends OafEntity<T> implements Serializable {
private List<Author> author;
@ -53,188 +53,188 @@ public abstract class Result extends OafEntity implements Serializable {
return author;
}
public Result setAuthor(List<Author> author) {
public T setAuthor(List<Author> author) {
this.author = author;
return this;
return self();
}
public Qualifier getResulttype() {
return resulttype;
}
public Result setResulttype(Qualifier resulttype) {
public T setResulttype(Qualifier resulttype) {
this.resulttype = resulttype;
return this;
return self();
}
public Qualifier getLanguage() {
return language;
}
public Result setLanguage(Qualifier language) {
public T setLanguage(Qualifier language) {
this.language = language;
return this;
return self();
}
public List<Qualifier> getCountry() {
return country;
}
public Result setCountry(List<Qualifier> country) {
public T setCountry(List<Qualifier> country) {
this.country = country;
return this;
return self();
}
public List<StructuredProperty> getSubject() {
return subject;
}
public Result setSubject(List<StructuredProperty> subject) {
public T setSubject(List<StructuredProperty> subject) {
this.subject = subject;
return this;
return self();
}
public List<StructuredProperty> getTitle() {
return title;
}
public Result setTitle(List<StructuredProperty> title) {
public T setTitle(List<StructuredProperty> title) {
this.title = title;
return this;
return self();
}
public List<StructuredProperty> getRelevantdate() {
return relevantdate;
}
public Result setRelevantdate(List<StructuredProperty> relevantdate) {
public T setRelevantdate(List<StructuredProperty> relevantdate) {
this.relevantdate = relevantdate;
return this;
return self();
}
public List<Field<String>> getDescription() {
return description;
}
public Result setDescription(List<Field<String>> description) {
public T setDescription(List<Field<String>> description) {
this.description = description;
return this;
return self();
}
public Field<String> getDateofacceptance() {
return dateofacceptance;
}
public Result setDateofacceptance(Field<String> dateofacceptance) {
public T setDateofacceptance(Field<String> dateofacceptance) {
this.dateofacceptance = dateofacceptance;
return this;
return self();
}
public Field<String> getPublisher() {
return publisher;
}
public Result setPublisher(Field<String> publisher) {
public T setPublisher(Field<String> publisher) {
this.publisher = publisher;
return this;
return self();
}
public Field<String> getEmbargoenddate() {
return embargoenddate;
}
public Result setEmbargoenddate(Field<String> embargoenddate) {
public T setEmbargoenddate(Field<String> embargoenddate) {
this.embargoenddate = embargoenddate;
return this;
return self();
}
public List<Field<String>> getSource() {
return source;
}
public Result setSource(List<Field<String>> source) {
public T setSource(List<Field<String>> source) {
this.source = source;
return this;
return self();
}
public List<Field<String>> getFulltext() {
return fulltext;
}
public Result setFulltext(List<Field<String>> fulltext) {
public T setFulltext(List<Field<String>> fulltext) {
this.fulltext = fulltext;
return this;
return self();
}
public List<Field<String>> getFormat() {
return format;
}
public Result setFormat(List<Field<String>> format) {
public T setFormat(List<Field<String>> format) {
this.format = format;
return this;
return self();
}
public List<Field<String>> getContributor() {
return contributor;
}
public Result setContributor(List<Field<String>> contributor) {
public T setContributor(List<Field<String>> contributor) {
this.contributor = contributor;
return this;
return self();
}
public Qualifier getResourcetype() {
return resourcetype;
}
public Result setResourcetype(Qualifier resourcetype) {
public T setResourcetype(Qualifier resourcetype) {
this.resourcetype = resourcetype;
return this;
return self();
}
public List<Field<String>> getCoverage() {
return coverage;
}
public Result setCoverage(List<Field<String>> coverage) {
public T setCoverage(List<Field<String>> coverage) {
this.coverage = coverage;
return this;
return self();
}
public Field<String> getRefereed() {
return refereed;
}
public Result setRefereed(Field<String> refereed) {
public T setRefereed(Field<String> refereed) {
this.refereed = refereed;
return this;
return self();
}
public List<Context> getContext() {
return context;
}
public Result setContext(List<Context> context) {
public T setContext(List<Context> context) {
this.context = context;
return this;
return self();
}
public List<ExternalReference> getExternalReference() {
return externalReference;
}
public Result setExternalReference(List<ExternalReference> externalReference) {
public T setExternalReference(List<ExternalReference> externalReference) {
this.externalReference = externalReference;
return this;
return self();
}
public List<Instance> getInstance() {
return instance;
}
public Result setInstance(List<Instance> instance) {
public T setInstance(List<Instance> instance) {
this.instance = instance;
return this;
return self();
}
}

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
public class Software extends Result implements Serializable {
public class Software extends Result<Software> implements Serializable {
private List<Field<String>> documentationUrl;
@ -48,4 +48,9 @@ public class Software extends Result implements Serializable {
this.programmingLanguage = programmingLanguage;
return this;
}
@Override
protected Software self() {
return this;
}
}

View File

@ -58,11 +58,28 @@ public class ProtoConverter implements Serializable {
}
private static Organization convertOrganization(OafProtos.Oaf oaf) {
final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
final Organization org = setOaf(new Organization(), oaf);
return setEntity(org, oaf);
//TODO set org fields
return setEntity(org, oaf)
.setLegalshortname(mapStringField(m.getLegalshortname()))
.setLegalname(mapStringField(m.getLegalname()))
.setAlternativeNames(m.getAlternativeNamesList().
stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setWebsiteurl(mapStringField(m.getWebsiteurl()))
.setLogourl(mapStringField(m.getLogourl()))
.setEclegalbody(mapStringField(m.getEclegalbody()))
.setEclegalperson(mapStringField(m.getEclegalperson()))
.setEcnonprofit(mapStringField(m.getEcnonprofit()))
.setEcresearchorganization(mapStringField(m.getEcresearchorganization()))
.setEchighereducation(mapStringField(m.getEchighereducation()))
.setEcinternationalorganizationeurinterests(mapStringField(m.getEcinternationalorganizationeurinterests()))
.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()))
.setEcenterprise(mapStringField(m.getEcenterprise()))
.setEcsmevalidated(mapStringField(m.getEcsmevalidated()))
.setEcnutscode(mapStringField(m.getEcnutscode()))
.setCountry(mapQualifier(m.getCountry()));
}
private static Datasource convertDataSource(OafProtos.Oaf oaf) {
@ -186,69 +203,11 @@ public class ProtoConverter implements Serializable {
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
Publication publication = setOaf(new Publication(), oaf);
return setEntity(publication, oaf)
.setJournal(mapJournal(m.getJournal()))
.setRefereed(mapStringField(m.getRefereed()));
//setting Entity fields
final OafProtos.OafEntity entity = oaf.getEntity();
result.setAuthor(null);
result.setContext(null);
result.setContributor(null);
result.setCountry(null);
result.setCoverage(null);
result.setDateofacceptance(result.getDateofacceptance());
result.setDescription(entity.getResult().getMetadata().getDescriptionList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()));
result.setEmbargoenddate(null);
result.setExternalReference(null);
result.setFormat(entity.getResult().getMetadata().getFormatList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()));
result.setFulltext(null);
setEntity(publication, oaf);
return setResult(publication, oaf)
.setJournal(mapJournal(m.getJournal()));
result.setInstance(entity.getResult().getInstanceList());
result.setLanguage(ProtoUtils.mapQualifier(entity.getResult().getMetadata().getLanguage()));
result.setOaiprovenance(null);
result.setPublisher(ProtoUtils.mapStringField(entity.getResult().getMetadata().getPublisher()));
result.setRelevantdate(null);
result.setResourcetype(null);
result.setResulttype(null);
result.setSource(entity.getResult().getMetadata().getSourceList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()));
result.setSubject(null);
result.setTitle(entity.getResult().getMetadata().getTitleList()
.stream()
.map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList()));
return result;
}
private static Dataset createDataset(OafProtos.Oaf oaf) {

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.graph;
import com.googlecode.protobuf.format.JsonFormat;
import eu.dnetlib.data.proto.FieldTypeProtos;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.ResultProtos;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.stream.Collectors;
@ -42,6 +43,80 @@ public class ProtoUtils {
return entity;
}
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
//setting Entity fields
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
entity
.setAuthor(m.getAuthorList()
.stream()
.map(ProtoUtils::mapAuthor)
.collect(Collectors.toList()))
.setResulttype(mapQualifier(m.getResulttype()))
.setLanguage(ProtoUtils.mapQualifier(m.getLanguage()))
.setCountry(m.getCountryList()
.stream()
.map(ProtoUtils::mapQualifier)
.collect(Collectors.toList()))
.setSubject(m.getSubjectList()
.stream()
.map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList()))
.setTitle(m.getTitleList()
.stream()
.map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList()))
.setRelevantdate(m.getRelevantdateList()
.stream()
.map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList()))
.setDescription(m.getDescriptionList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setDateofacceptance(ProtoUtils.mapStringField(m.getDateofacceptance()))
.setPublisher(ProtoUtils.mapStringField(m.getPublisher()))
.setEmbargoenddate(ProtoUtils.mapStringField(m.getEmbargoenddate()))
.setSource(m.getSourceList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setFulltext(m.getFulltextList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setFormat(m.getFormatList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setContributor(m.getContributorList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setResourcetype(ProtoUtils.mapQualifier(m.getResourcetype()))
.setCoverage(m.getCoverageList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setRefereed(mapStringField(m.getRefereed()))
.setContext(m.getContextList()
.stream()
.map(ProtoUtils::mapContext)
.collect(Collectors.toList()));
return entity;
}
private static Context mapContext(ResultProtos.Result.Context context) {
return new Context()
.setId(context.getId())
.setDataInfo(context.getDataInfoList()
.stream()
.map(ProtoUtils::mapDataInfo)
.collect(Collectors.toList()));
}
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
return new KeyValue()
.setKey(kv.getKey())
@ -134,4 +209,21 @@ public class ProtoUtils {
.setDataInfo(mapDataInfo(j.getDataInfo()));
}
public static Author mapAuthor(FieldTypeProtos.Author author) {
return new Author()
.setFullname(author.getFullname())
.setName(author.getName())
.setSurname(author.getSurname())
.setRank(author.getRank())
.setPid(author.getPidList()
.stream()
.map(ProtoUtils::mapKV)
.collect(Collectors.toList()))
.setAffiliation(author.getAffiliationList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()));
}
}

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.graph;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaRDD;
@ -39,14 +40,15 @@ public class SparkGraphImporterJob {
final JavaRDD<Tuple2<String, String>> inputRDD = sc.sequenceFile(path, Text.class, Text.class)
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
final JavaRDD<Datasource> datasources = inputRDD
final JavaRDD<Organization> organization = inputRDD
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
.map(Tuple2::_2)
.map(ProtoConverter::convert)
.filter(s-> s instanceof Datasource)
.map(s->(Datasource)s);
final Encoder<Datasource> encoder = Encoders.bean(Datasource.class);
final Dataset<Datasource> mdstore = spark.createDataset(datasources.rdd(), encoder);
.filter(s-> s instanceof Organization)
.map(s->(Organization)s);
final Encoder<Organization> encoder = Encoders.bean(Organization.class);
final Dataset<Organization> mdstore = spark.createDataset(organization.rdd(), encoder);
System.out.println(mdstore.count());

View File

@ -43,7 +43,8 @@ public class ProtoConverterTest {
assertTrue(result instanceof Publication);
Publication p = (Publication) result;
System.out.println(p.getId());
ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(result));
}