From b66a7e303070d08b0b340c2a5440407182bba0c3 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 24 Oct 2019 17:29:01 +0200 Subject: [PATCH 1/2] publication test added --- .../dnetlib/dhp/graph/SparkGraphImporterJob.java | 8 ++++---- .../eu/dnetlib/dhp/graph/ProtoConverterTest.java | 16 ++++++++++++++++ .../eu/dnetlib/dhp/graph/publication.json | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/publication.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java index a73ed8d75..74b40ccd7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/SparkGraphImporterJob.java @@ -30,14 +30,14 @@ public class SparkGraphImporterJob { final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - final JavaRDD> inputRDD = sc.sequenceFile("file:///home/sandro/part-m-00000", Text.class, Text.class).map(item -> new Tuple2<>(item._1.toString(), item._2.toString())); + final JavaRDD> inputRDD = sc.sequenceFile("file:///Users/miconis/Downloads/part-m-02236", Text.class, Text.class).map(item -> new Tuple2<>(item._1.toString(), item._2.toString())); - Tuple2 item = inputRDD + String body = inputRDD .filter(s -> s._1().split("@")[2].equalsIgnoreCase("body")) + .map(Tuple2::_2) .first(); - System.out.println(item._1()); - System.out.println(item._2()); + System.out.println("body = " + body); // .map(Tuple2::_2) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java index 3640cb996..e234dcc4c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java @@ -2,9 +2,11 @@ package eu.dnetlib.dhp.graph; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Publication; import org.apache.commons.io.IOUtils; import static org.junit.Assert.*; import org.junit.Test; +import scala.tools.nsc.doc.model.Public; public class ProtoConverterTest { @@ -25,6 +27,20 @@ public class ProtoConverterTest { + } + + @Test + public void convertPublicationTest() throws Exception { + final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/publication.json")); + + Oaf result = ProtoConverter.convert(json); + + assertNotNull(result); + assertTrue(result instanceof Publication); + Publication p = (Publication) result; + + System.out.println(p); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/publication.json new file mode 100644 index 000000000..6c1fa91b2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/graph/publication.json @@ -0,0 +1 @@ +{"kind": "entity","entity": {"type": "result","result": {"metadata": {"title": [{"value": "SILK PRINTING WITH RECENT DEVELOPMENTS","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}},{"value": "Son Gelişmelerle İpek Baskıcılığı","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}}],"dateofacceptance": {"value": "1987-06-01"},"publisher": {"value": "Tekstil Mühendisleri Odası"},"resulttype": {"classid": "publication","classname": "publication","schemeid": "dnet:result_typologies","schemename": "dnet:result_typologies"},"language": {"classid": "tur","classname": "Turkish","schemeid": "dnet:languages","schemename": "dnet:languages"},"journal": {"name": "Tekstil ve Mühendis","issnPrinted": "1300-7599"},"format": [{"value": "application/pdf"},{"value": "application/pdf"}],"description": [{"value": " "},{"value": " "}],"source": [{"value": "Tekstil ve Mühendis; Yıl: 1987 Cilt: 1 Sayı: 4"},{"value": "2147-0510"},{"value": "1300-7599"}],"author": [{"fullname": "YAKARTEPE, Mehmet","name": "Mehmet","surname": "Yakartepe","rank": 1},{"fullname": "YAKARTEPE, Zerrin","name": "Zerrin","surname": "Yakartepe","rank": 2}]},"instance": [{"accessright": {"classid": "OPEN","classname": "Open Access","schemeid": "dnet:access_modes","schemename": "dnet:access_modes"},"instancetype": {"classid": "0001","classname": "Article","schemeid": "dnet:publication_resource","schemename": "dnet:publication_resource"},"hostedby": {"key": "10|tubitakulakb::34a91944da68f59ebc51994b4db64cda","value": "Tekstil ve Mühendis"},"url": ["http://dergi.tekstilvemuhendis.org.tr/article/view/5000000711"],"collectedfrom": {"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"},"dateofacceptance": {"value": "1987-06-01"}}]},"originalId": ["oai:dergipark.ulakbim.gov.tr:record/124507"],"collectedfrom": [{"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"}],"dateofcollection": "2019-07-29T15:35:19Z","id": "50|tubitakulakb::7fe767f5f1dfd5bbe0a3e5e9b2a10cc9","dateoftransformation": "","oaiprovenance": {"originDescription": {"harvestDate": "2018-10-13T09:48:19.806Z","altered": true,"baseURL": "http://dergipark.ulakbim.gov.tr/v2/harvester/index.php/oai","identifier": "oai:dergipark.ulakbim.gov.tr:record/124507","datestamp": "2018-10-13T09:48:19Z","metadataNamespace": "http://www.openarchives.org/OAI/2.0/oai_dc/"}}},"dataInfo": {"inferred": true,"deletedbyinference": true,"trust": "0.9","inferenceprovenance": "dedup-similarity-result-levenstein","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"},"invisible": false}} \ No newline at end of file From b525b5413049c0502587d8b9e575e43f5ee94bff Mon Sep 17 00:00:00 2001 From: miconis Date: Fri, 25 Oct 2019 09:55:31 +0200 Subject: [PATCH 2/2] starting implementing the createPublication class --- .../eu/dnetlib/dhp/graph/ProtoConverter.java | 96 ++++++++++++++++++- .../dnetlib/dhp/graph/ProtoConverterTest.java | 2 +- 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java index 0fa83a51a..4179d3d2b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/graph/ProtoConverter.java @@ -130,7 +130,101 @@ public class ProtoConverter implements Serializable { } private static Publication createPublication(OafProtos.Oaf oaf) { - return new Publication(); + + Publication result = new Publication(); + + //Set Oaf Fields + result.setDataInfo(ProtoUtils.mapDataInfo(oaf.getDataInfo())); + + result.setLastupdatetimestamp(oaf.getLastupdatetimestamp()); + + //setting Entity fields + final OafProtos.OafEntity entity = oaf.getEntity(); + + result.setId(entity.getId()); + + result.setJournal(null); + + result.setAuthor(null); + + result.setChildren(null); + + result.setCollectedfrom(entity.getCollectedfromList() + .stream() + .map(ProtoUtils::mapKV) + .collect(Collectors.toList())); + + result.setContext(null); + + result.setContributor(null); + + result.setCountry(null); + + result.setCoverage(null); + + result.setDateofacceptance(result.getDateofacceptance()); + + result.setDateofcollection(entity.getDateofcollection()); + + result.setDateoftransformation(entity.getDateoftransformation()); + + result.setDescription(entity.getResult().getMetadata().getDescriptionList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())); + + result.setEmbargoenddate(null); + + result.setExternalReference(null); + + result.setExtraInfo(entity.getExtraInfoList() + .stream() + .map(ProtoUtils::mapExtraInfo) + .collect(Collectors.toList())); + + result.setFormat(entity.getResult().getMetadata().getFormatList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())); + + result.setFulltext(null); + + result.setInstance(null); + + result.setLanguage(ProtoUtils.mapQualifier(entity.getResult().getMetadata().getLanguage())); + + result.setOaiprovenance(null); + + result.setOriginalId(entity.getOriginalIdList()); + + result.setPid(entity.getPidList() + .stream() + .map(ProtoUtils::mapStructuredProperty) + .collect(Collectors.toList())); + + result.setPublisher(ProtoUtils.mapStringField(entity.getResult().getMetadata().getPublisher())); + + result.setRefereed(null); + + result.setRelevantdate(null); + + result.setResourcetype(null); + + result.setResulttype(null); + + result.setSource(entity.getResult().getMetadata().getSourceList() + .stream() + .map(ProtoUtils::mapStringField) + .collect(Collectors.toList())); + + result.setSubject(null); + + result.setTitle(entity.getResult().getMetadata().getTitleList() + .stream() + .map(ProtoUtils::mapStructuredProperty) + .collect(Collectors.toList())); + + return result; } private static Dataset createDataset(OafProtos.Oaf oaf) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java index e234dcc4c..bdf39071b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/graph/ProtoConverterTest.java @@ -39,7 +39,7 @@ public class ProtoConverterTest { assertTrue(result instanceof Publication); Publication p = (Publication) result; - System.out.println(p); + System.out.println(p.getId()); }