1
0
Fork 0

fixed conflict

This commit is contained in:
Sandro La Bruzzo 2019-10-25 11:58:20 +02:00
commit eef14fade3
7 changed files with 173 additions and 45 deletions

View File

@ -1,19 +1,30 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable; import java.io.Serializable;
import java.util.List;
public class Author implements Serializable { public class Author implements Serializable {
private String fullname;
private String name; private String name;
private String typology; private String surname;
private String provenance; private Integer rank;
private String trust; private List<KeyValue> pid;
// json containing a Citation or Statistics private List<Field<String>> affiliation;
private String value;
public String getFullname() {
return fullname;
}
public Author setFullname(String fullname) {
this.fullname = fullname;
return this;
}
public String getName() { public String getName() {
return name; return name;
@ -24,39 +35,39 @@ public class Author implements Serializable {
return this; return this;
} }
public String getTypology() { public String getSurname() {
return typology; return surname;
} }
public Author setTypology(String typology) { public Author setSurname(String surname) {
this.typology = typology; this.surname = surname;
return this; return this;
} }
public String getProvenance() { public Integer getRank() {
return provenance; return rank;
} }
public Author setProvenance(String provenance) { public Author setRank(Integer rank) {
this.provenance = provenance; this.rank = rank;
return this; return this;
} }
public String getTrust() { public List<KeyValue> getPid() {
return trust; return pid;
} }
public Author setTrust(String trust) { public Author setPid(List<KeyValue> pid) {
this.trust = trust; this.pid = pid;
return this; return this;
} }
public String getValue() { public List<Field<String>> getAffiliation() {
return value; return affiliation;
} }
public Author setValue(String value) { public Author setAffiliation(List<Field<String>> affiliation) {
this.value = value; this.affiliation = affiliation;
return this; return this;
} }
} }

View File

@ -18,6 +18,6 @@ public class Publication extends Result<Publication> implements Serializable {
@Override @Override
protected Publication self() { protected Publication self() {
return self(); return this;
} }
} }

View File

@ -207,7 +207,14 @@ public class ProtoConverter implements Serializable {
} }
private static Publication createPublication(OafProtos.Oaf oaf) { private static Publication createPublication(OafProtos.Oaf oaf) {
return new Publication();
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
Publication publication = setOaf(new Publication(), oaf);
setEntity(publication, oaf);
return setResult(publication, oaf)
.setJournal(mapJournal(m.getJournal()));
} }
private static Dataset createDataset(OafProtos.Oaf oaf) { private static Dataset createDataset(OafProtos.Oaf oaf) {

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.graph;
import com.googlecode.protobuf.format.JsonFormat; import com.googlecode.protobuf.format.JsonFormat;
import eu.dnetlib.data.proto.FieldTypeProtos; import eu.dnetlib.data.proto.FieldTypeProtos;
import eu.dnetlib.data.proto.OafProtos; import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.ResultProtos;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -10,7 +11,7 @@ import java.util.stream.Collectors;
public class ProtoUtils { public class ProtoUtils {
public static OafProtos.Oaf parse(String json) throws JsonFormat.ParseException { public static OafProtos.Oaf parse(String json) throws JsonFormat.ParseException {
final OafProtos.Oaf.Builder builder = OafProtos.Oaf.newBuilder(); final OafProtos.Oaf.Builder builder = OafProtos.Oaf.newBuilder();
JsonFormat.merge(json, builder); JsonFormat.merge(json, builder);
return builder.build(); return builder.build();
} }
@ -24,24 +25,98 @@ public class ProtoUtils {
//setting Entity fields //setting Entity fields
final OafProtos.OafEntity e = oaf.getEntity(); final OafProtos.OafEntity e = oaf.getEntity();
entity entity
.setId(e.getId()) .setId(e.getId())
.setOriginalId(e.getOriginalIdList()) .setOriginalId(e.getOriginalIdList())
.setCollectedfrom(e.getCollectedfromList() .setCollectedfrom(e.getCollectedfromList()
.stream() .stream()
.map(ProtoUtils::mapKV) .map(ProtoUtils::mapKV)
.collect(Collectors.toList())) .collect(Collectors.toList()))
.setPid(e.getPidList().stream() .setPid(e.getPidList().stream()
.map(ProtoUtils::mapStructuredProperty) .map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList())) .collect(Collectors.toList()))
.setDateofcollection(entity.getDateofcollection()) .setDateofcollection(entity.getDateofcollection())
.setDateoftransformation(entity.getDateoftransformation()) .setDateoftransformation(entity.getDateoftransformation())
.setExtraInfo(e.getExtraInfoList() .setExtraInfo(e.getExtraInfoList()
.stream() .stream()
.map(ProtoUtils::mapExtraInfo) .map(ProtoUtils::mapExtraInfo)
.collect(Collectors.toList())); .collect(Collectors.toList()));
return entity; return entity;
} }
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
//setting Entity fields
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
entity
.setAuthor(m.getAuthorList()
.stream()
.map(ProtoUtils::mapAuthor)
.collect(Collectors.toList()))
.setResulttype(mapQualifier(m.getResulttype()))
.setLanguage(ProtoUtils.mapQualifier(m.getLanguage()))
.setCountry(m.getCountryList()
.stream()
.map(ProtoUtils::mapQualifier)
.collect(Collectors.toList()))
.setSubject(m.getSubjectList()
.stream()
.map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList()))
.setTitle(m.getTitleList()
.stream()
.map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList()))
.setRelevantdate(m.getRelevantdateList()
.stream()
.map(ProtoUtils::mapStructuredProperty)
.collect(Collectors.toList()))
.setDescription(m.getDescriptionList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setDateofacceptance(ProtoUtils.mapStringField(m.getDateofacceptance()))
.setPublisher(ProtoUtils.mapStringField(m.getPublisher()))
.setEmbargoenddate(ProtoUtils.mapStringField(m.getEmbargoenddate()))
.setSource(m.getSourceList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setFulltext(m.getFulltextList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setFormat(m.getFormatList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setContributor(m.getContributorList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setResourcetype(ProtoUtils.mapQualifier(m.getResourcetype()))
.setCoverage(m.getCoverageList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()))
.setRefereed(mapStringField(m.getRefereed()))
.setContext(m.getContextList()
.stream()
.map(ProtoUtils::mapContext)
.collect(Collectors.toList()));
return entity;
}
private static Context mapContext(ResultProtos.Result.Context context) {
return new Context()
.setId(context.getId())
.setDataInfo(context.getDataInfoList()
.stream()
.map(ProtoUtils::mapDataInfo)
.collect(Collectors.toList()));
}
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) { public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
return new KeyValue() return new KeyValue()
.setKey(kv.getKey()) .setKey(kv.getKey())
@ -64,7 +139,7 @@ public class ProtoUtils {
.setClassname(q.getClassname()) .setClassname(q.getClassname())
.setSchemeid(q.getSchemeid()) .setSchemeid(q.getSchemeid())
.setSchemename(q.getSchemename()); .setSchemename(q.getSchemename());
//.setDataInfo(q.hasDataInfo() ? mapDataInfo(q.getDataInfo()) : null); //.setDataInfo(q.hasDataInfo() ? mapDataInfo(q.getDataInfo()) : null);
} }
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) { public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
@ -134,4 +209,21 @@ public class ProtoUtils {
.setDataInfo(mapDataInfo(j.getDataInfo())); .setDataInfo(mapDataInfo(j.getDataInfo()));
} }
public static Author mapAuthor(FieldTypeProtos.Author author) {
return new Author()
.setFullname(author.getFullname())
.setName(author.getName())
.setSurname(author.getSurname())
.setRank(author.getRank())
.setPid(author.getPidList()
.stream()
.map(ProtoUtils::mapKV)
.collect(Collectors.toList()))
.setAffiliation(author.getAffiliationList()
.stream()
.map(ProtoUtils::mapStringField)
.collect(Collectors.toList()));
}
} }

View File

@ -30,26 +30,25 @@ public class SparkGraphImporterJob {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final String path = "file:///Users/miconis/Downloads/part-m-02236";
final String path = "file:///home/sandro/part-m-00000";
final JavaRDD<Tuple2<String, String>> inputRDD = sc.sequenceFile(path, Text.class, Text.class) final JavaRDD<Tuple2<String, String>> inputRDD = sc.sequenceFile(path, Text.class, Text.class)
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString())); .map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first(); final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first();
System.out.println(body); System.out.println(body);
final JavaRDD<Organization> datasources = inputRDD final JavaRDD<Organization> organization = inputRDD
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body")) .filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
.map(Tuple2::_2) .map(Tuple2::_2)
.map(ProtoConverter::convert) .map(ProtoConverter::convert)
.filter(s-> s instanceof Organization) .filter(s-> s instanceof Organization)
.map(s->(Organization)s); .map(s->(Organization)s);
final Encoder<Organization> encoder = Encoders.bean(Organization.class); final Encoder<Organization> encoder = Encoders.bean(Organization.class);
final Dataset<Organization> mdstore = spark.createDataset(datasources.rdd(), encoder); final Dataset<Organization> mdstore = spark.createDataset(organization.rdd(), encoder);
System.out.println(mdstore.count()); System.out.println(mdstore.count());

View File

@ -4,11 +4,14 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import org.junit.Test; import org.junit.Test;
import scala.tools.nsc.doc.model.Public;
public class ProtoConverterTest { public class ProtoConverterTest {
@ -50,4 +53,19 @@ public class ProtoConverterTest {
} }
@Test
public void convertPublicationTest() throws Exception {
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/publication.json"));
Oaf result = ProtoConverter.convert(json);
assertNotNull(result);
assertTrue(result instanceof Publication);
Publication p = (Publication) result;
ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(result));
}
} }

View File

@ -0,0 +1 @@
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"title": [{"value": "SILK PRINTING WITH RECENT DEVELOPMENTS","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}},{"value": "Son Gelişmelerle İpek Baskıcılığı","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}}],"dateofacceptance": {"value": "1987-06-01"},"publisher": {"value": "Tekstil Mühendisleri Odası"},"resulttype": {"classid": "publication","classname": "publication","schemeid": "dnet:result_typologies","schemename": "dnet:result_typologies"},"language": {"classid": "tur","classname": "Turkish","schemeid": "dnet:languages","schemename": "dnet:languages"},"journal": {"name": "Tekstil ve Mühendis","issnPrinted": "1300-7599"},"format": [{"value": "application/pdf"},{"value": "application/pdf"}],"description": [{"value": " "},{"value": " "}],"source": [{"value": "Tekstil ve Mühendis; Yıl: 1987 Cilt: 1 Sayı: 4"},{"value": "2147-0510"},{"value": "1300-7599"}],"author": [{"fullname": "YAKARTEPE, Mehmet","name": "Mehmet","surname": "Yakartepe","rank": 1},{"fullname": "YAKARTEPE, Zerrin","name": "Zerrin","surname": "Yakartepe","rank": 2}]},"instance": [{"accessright": {"classid": "OPEN","classname": "Open Access","schemeid": "dnet:access_modes","schemename": "dnet:access_modes"},"instancetype": {"classid": "0001","classname": "Article","schemeid": "dnet:publication_resource","schemename": "dnet:publication_resource"},"hostedby": {"key": "10|tubitakulakb::34a91944da68f59ebc51994b4db64cda","value": "Tekstil ve Mühendis"},"url": ["http://dergi.tekstilvemuhendis.org.tr/article/view/5000000711"],"collectedfrom": {"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"},"dateofacceptance": {"value": "1987-06-01"}}]},"originalId": ["oai:dergipark.ulakbim.gov.tr:record/124507"],"collectedfrom": [{"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"}],"dateofcollection": "2019-07-29T15:35:19Z","id": "50|tubitakulakb::7fe767f5f1dfd5bbe0a3e5e9b2a10cc9","dateoftransformation": "","oaiprovenance": {"originDescription": {"harvestDate": "2018-10-13T09:48:19.806Z","altered": true,"baseURL": "http://dergipark.ulakbim.gov.tr/v2/harvester/index.php/oai","identifier": "oai:dergipark.ulakbim.gov.tr:record/124507","datestamp": "2018-10-13T09:48:19Z","metadataNamespace": "http://www.openarchives.org/OAI/2.0/oai_dc/"}}},"dataInfo": {"inferred": true,"deletedbyinference": true,"trust": "0.9","inferenceprovenance": "dedup-similarity-result-levenstein","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"},"invisible": false}}