diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java index 436bc1b6e..ebb6959e5 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Author.java @@ -1,19 +1,30 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; +import java.util.List; public class Author implements Serializable { + private String fullname; + private String name; - private String typology; + private String surname; - private String provenance; + private Integer rank; - private String trust; + private List pid; - // json containing a Citation or Statistics - private String value; + private List> affiliation; + + public String getFullname() { + return fullname; + } + + public Author setFullname(String fullname) { + this.fullname = fullname; + return this; + } public String getName() { return name; @@ -24,39 +35,39 @@ public class Author implements Serializable { return this; } - public String getTypology() { - return typology; + public String getSurname() { + return surname; } - public Author setTypology(String typology) { - this.typology = typology; + public Author setSurname(String surname) { + this.surname = surname; return this; } - public String getProvenance() { - return provenance; + public Integer getRank() { + return rank; } - public Author setProvenance(String provenance) { - this.provenance = provenance; + public Author setRank(Integer rank) { + this.rank = rank; return this; } - public String getTrust() { - return trust; + public List getPid() { + return pid; } - public Author setTrust(String trust) { - this.trust = trust; + public Author setPid(List pid) { + this.pid = pid; return this; } - public String getValue() { - return value; + public List> getAffiliation() { + return affiliation; } - public Author setValue(String value) { - this.value = value; + public Author setAffiliation(List> affiliation) { + this.affiliation = affiliation; return this; } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java index 29e1cb02f..433e217db 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Publication.java @@ -18,6 +18,6 @@ public class Publication extends Result implements Serializable { @Override protected Publication self() { - return self(); + return this; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java index fa30c1fde..14c6528a3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java @@ -207,7 +207,14 @@ public class ProtoConverter implements Serializable { } private static Publication createPublication(OafProtos.Oaf oaf) { - return new Publication(); + + ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata(); + Publication publication = setOaf(new Publication(), oaf); + setEntity(publication, oaf); + return setResult(publication, oaf) + .setJournal(mapJournal(m.getJournal())); + + } private static Dataset createDataset(OafProtos.Oaf oaf) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoUtils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoUtils.java index 7ea84316a..0c1c0d095 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoUtils.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoUtils.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.graph; import com.googlecode.protobuf.format.JsonFormat; import eu.dnetlib.data.proto.FieldTypeProtos; import eu.dnetlib.data.proto.OafProtos; +import eu.dnetlib.data.proto.ResultProtos; import eu.dnetlib.dhp.schema.oaf.*; import java.util.stream.Collectors; @@ -10,7 +11,7 @@ import java.util.stream.Collectors; public class ProtoUtils { public static OafProtos.Oaf parse(String json) throws JsonFormat.ParseException { - final OafProtos.Oaf.Builder builder = OafProtos.Oaf.newBuilder(); + final OafProtos.Oaf.Builder builder = OafProtos.Oaf.newBuilder(); JsonFormat.merge(json, builder); return builder.build(); } @@ -24,24 +25,98 @@ public class ProtoUtils { //setting Entity fields final OafProtos.OafEntity e = oaf.getEntity(); entity - .setId(e.getId()) - .setOriginalId(e.getOriginalIdList()) - .setCollectedfrom(e.getCollectedfromList() - .stream() - .map(ProtoUtils::mapKV) - .collect(Collectors.toList())) - .setPid(e.getPidList().stream() - .map(ProtoUtils::mapStructuredProperty) - .collect(Collectors.toList())) - .setDateofcollection(entity.getDateofcollection()) - .setDateoftransformation(entity.getDateoftransformation()) - .setExtraInfo(e.getExtraInfoList() - .stream() - .map(ProtoUtils::mapExtraInfo) - .collect(Collectors.toList())); + .setId(e.getId()) + .setOriginalId(e.getOriginalIdList()) + .setCollectedfrom(e.getCollectedfromList() + .stream() + .map(ProtoUtils::mapKV) + .collect(Collectors.toList())) + .setPid(e.getPidList().stream() + .map(ProtoUtils::mapStructuredProperty) + .collect(Collectors.toList())) + .setDateofcollection(entity.getDateofcollection()) + .setDateoftransformation(entity.getDateoftransformation()) + .setExtraInfo(e.getExtraInfoList() + .stream() + .map(ProtoUtils::mapExtraInfo) + .collect(Collectors.toList())); return entity; } + public static T setResult(T entity, OafProtos.Oaf oaf) { + //setting Entity fields + final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata(); + entity + .setAuthor(m.getAuthorList() + .stream() + .map(ProtoUtils::mapAuthor) + .collect(Collectors.toList())) + .setResulttype(mapQualifier(m.getResulttype())) + .setLanguage(ProtoUtils.mapQualifier(m.getLanguage())) + .setCountry(m.getCountryList() + .stream() + .map(ProtoUtils::mapQualifier) + .collect(Collectors.toList())) + .setSubject(m.getSubjectList() + .stream() + .map(ProtoUtils::mapStructuredProperty) + .collect(Collectors.toList())) + .setTitle(m.getTitleList() + .stream() + .map(ProtoUtils::mapStructuredProperty) + .collect(Collectors.toList())) + .setRelevantdate(m.getRelevantdateList() + .stream() + .map(ProtoUtils::mapStructuredProperty) + .collect(Collectors.toList())) + .setDescription(m.getDescriptionList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())) + .setDateofacceptance(ProtoUtils.mapStringField(m.getDateofacceptance())) + .setPublisher(ProtoUtils.mapStringField(m.getPublisher())) + .setEmbargoenddate(ProtoUtils.mapStringField(m.getEmbargoenddate())) + .setSource(m.getSourceList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())) + .setFulltext(m.getFulltextList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())) + .setFormat(m.getFormatList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())) + .setContributor(m.getContributorList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())) + .setResourcetype(ProtoUtils.mapQualifier(m.getResourcetype())) + .setCoverage(m.getCoverageList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())) + .setRefereed(mapStringField(m.getRefereed())) + .setContext(m.getContextList() + .stream() + .map(ProtoUtils::mapContext) + .collect(Collectors.toList())); + + return entity; + } + + private static Context mapContext(ResultProtos.Result.Context context) { + + return new Context() + .setId(context.getId()) + .setDataInfo(context.getDataInfoList() + .stream() + .map(ProtoUtils::mapDataInfo) + .collect(Collectors.toList())); + } + + public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) { return new KeyValue() .setKey(kv.getKey()) @@ -64,7 +139,7 @@ public class ProtoUtils { .setClassname(q.getClassname()) .setSchemeid(q.getSchemeid()) .setSchemename(q.getSchemename()); - //.setDataInfo(q.hasDataInfo() ? mapDataInfo(q.getDataInfo()) : null); + //.setDataInfo(q.hasDataInfo() ? mapDataInfo(q.getDataInfo()) : null); } public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) { @@ -134,4 +209,21 @@ public class ProtoUtils { .setDataInfo(mapDataInfo(j.getDataInfo())); } + public static Author mapAuthor(FieldTypeProtos.Author author) { + return new Author() + .setFullname(author.getFullname()) + .setName(author.getName()) + .setSurname(author.getSurname()) + .setRank(author.getRank()) + .setPid(author.getPidList() + .stream() + .map(ProtoUtils::mapKV) + .collect(Collectors.toList())) + .setAffiliation(author.getAffiliationList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())); + + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java index 2fe3b07aa..2b9c1f60b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java @@ -30,26 +30,25 @@ public class SparkGraphImporterJob { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - - final String path = "file:///home/sandro/part-m-00000"; + final String path = "file:///Users/miconis/Downloads/part-m-02236"; final JavaRDD> inputRDD = sc.sequenceFile(path, Text.class, Text.class) .map(item -> new Tuple2<>(item._1.toString(), item._2.toString())); + final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first(); System.out.println(body); - final JavaRDD datasources = inputRDD + final JavaRDD organization = inputRDD .filter(s -> s._1().split("@")[2].equalsIgnoreCase("body")) .map(Tuple2::_2) .map(ProtoConverter::convert) .filter(s-> s instanceof Organization) .map(s->(Organization)s); final Encoder encoder = Encoders.bean(Organization.class); - final Dataset mdstore = spark.createDataset(datasources.rdd(), encoder); - + final Dataset mdstore = spark.createDataset(organization.rdd(), encoder); System.out.println(mdstore.count()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java index aa3cce66d..fdca571fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java @@ -4,11 +4,14 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Publication; + import org.apache.commons.io.IOUtils; import static org.junit.Assert.*; import org.junit.Test; +import scala.tools.nsc.doc.model.Public; public class ProtoConverterTest { @@ -50,4 +53,19 @@ public class ProtoConverterTest { } + @Test + public void convertPublicationTest() throws Exception { + final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/publication.json")); + + Oaf result = ProtoConverter.convert(json); + + assertNotNull(result); + assertTrue(result instanceof Publication); + Publication p = (Publication) result; + + ObjectMapper mapper = new ObjectMapper(); + System.out.println(mapper.writeValueAsString(result)); + + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/publication.json new file mode 100644 index 000000000..6c1fa91b2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/publication.json @@ -0,0 +1 @@ +{"kind": "entity","entity": {"type": "result","result": {"metadata": {"title": [{"value": "SILK PRINTING WITH RECENT DEVELOPMENTS","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}},{"value": "Son Gelişmelerle İpek Baskıcılığı","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}}],"dateofacceptance": {"value": "1987-06-01"},"publisher": {"value": "Tekstil Mühendisleri Odası"},"resulttype": {"classid": "publication","classname": "publication","schemeid": "dnet:result_typologies","schemename": "dnet:result_typologies"},"language": {"classid": "tur","classname": "Turkish","schemeid": "dnet:languages","schemename": "dnet:languages"},"journal": {"name": "Tekstil ve Mühendis","issnPrinted": "1300-7599"},"format": [{"value": "application/pdf"},{"value": "application/pdf"}],"description": [{"value": " "},{"value": " "}],"source": [{"value": "Tekstil ve Mühendis; Yıl: 1987 Cilt: 1 Sayı: 4"},{"value": "2147-0510"},{"value": "1300-7599"}],"author": [{"fullname": "YAKARTEPE, Mehmet","name": "Mehmet","surname": "Yakartepe","rank": 1},{"fullname": "YAKARTEPE, Zerrin","name": "Zerrin","surname": "Yakartepe","rank": 2}]},"instance": [{"accessright": {"classid": "OPEN","classname": "Open Access","schemeid": "dnet:access_modes","schemename": "dnet:access_modes"},"instancetype": {"classid": "0001","classname": "Article","schemeid": "dnet:publication_resource","schemename": "dnet:publication_resource"},"hostedby": {"key": "10|tubitakulakb::34a91944da68f59ebc51994b4db64cda","value": "Tekstil ve Mühendis"},"url": ["http://dergi.tekstilvemuhendis.org.tr/article/view/5000000711"],"collectedfrom": {"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"},"dateofacceptance": {"value": "1987-06-01"}}]},"originalId": ["oai:dergipark.ulakbim.gov.tr:record/124507"],"collectedfrom": [{"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"}],"dateofcollection": "2019-07-29T15:35:19Z","id": "50|tubitakulakb::7fe767f5f1dfd5bbe0a3e5e9b2a10cc9","dateoftransformation": "","oaiprovenance": {"originDescription": {"harvestDate": "2018-10-13T09:48:19.806Z","altered": true,"baseURL": "http://dergipark.ulakbim.gov.tr/v2/harvester/index.php/oai","identifier": "oai:dergipark.ulakbim.gov.tr:record/124507","datestamp": "2018-10-13T09:48:19Z","metadataNamespace": "http://www.openarchives.org/OAI/2.0/oai_dc/"}}},"dataInfo": {"inferred": true,"deletedbyinference": true,"trust": "0.9","inferenceprovenance": "dedup-similarity-result-levenstein","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"},"invisible": false}} \ No newline at end of file