forked from antonis.lempesis/dnet-hadoop
fixed conflict
This commit is contained in:
commit
eef14fade3
|
@ -1,19 +1,30 @@
|
|||
package eu.dnetlib.dhp.schema.oaf;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class Author implements Serializable {
|
||||
|
||||
private String fullname;
|
||||
|
||||
private String name;
|
||||
|
||||
private String typology;
|
||||
private String surname;
|
||||
|
||||
private String provenance;
|
||||
private Integer rank;
|
||||
|
||||
private String trust;
|
||||
private List<KeyValue> pid;
|
||||
|
||||
// json containing a Citation or Statistics
|
||||
private String value;
|
||||
private List<Field<String>> affiliation;
|
||||
|
||||
public String getFullname() {
|
||||
return fullname;
|
||||
}
|
||||
|
||||
public Author setFullname(String fullname) {
|
||||
this.fullname = fullname;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
|
@ -24,39 +35,39 @@ public class Author implements Serializable {
|
|||
return this;
|
||||
}
|
||||
|
||||
public String getTypology() {
|
||||
return typology;
|
||||
public String getSurname() {
|
||||
return surname;
|
||||
}
|
||||
|
||||
public Author setTypology(String typology) {
|
||||
this.typology = typology;
|
||||
public Author setSurname(String surname) {
|
||||
this.surname = surname;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getProvenance() {
|
||||
return provenance;
|
||||
public Integer getRank() {
|
||||
return rank;
|
||||
}
|
||||
|
||||
public Author setProvenance(String provenance) {
|
||||
this.provenance = provenance;
|
||||
public Author setRank(Integer rank) {
|
||||
this.rank = rank;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getTrust() {
|
||||
return trust;
|
||||
public List<KeyValue> getPid() {
|
||||
return pid;
|
||||
}
|
||||
|
||||
public Author setTrust(String trust) {
|
||||
this.trust = trust;
|
||||
public Author setPid(List<KeyValue> pid) {
|
||||
this.pid = pid;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
public List<Field<String>> getAffiliation() {
|
||||
return affiliation;
|
||||
}
|
||||
|
||||
public Author setValue(String value) {
|
||||
this.value = value;
|
||||
public Author setAffiliation(List<Field<String>> affiliation) {
|
||||
this.affiliation = affiliation;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,6 @@ public class Publication extends Result<Publication> implements Serializable {
|
|||
|
||||
@Override
|
||||
protected Publication self() {
|
||||
return self();
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -207,7 +207,14 @@ public class ProtoConverter implements Serializable {
|
|||
}
|
||||
|
||||
private static Publication createPublication(OafProtos.Oaf oaf) {
|
||||
return new Publication();
|
||||
|
||||
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
||||
Publication publication = setOaf(new Publication(), oaf);
|
||||
setEntity(publication, oaf);
|
||||
return setResult(publication, oaf)
|
||||
.setJournal(mapJournal(m.getJournal()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static Dataset createDataset(OafProtos.Oaf oaf) {
|
||||
|
|
|
@ -3,6 +3,7 @@ package eu.dnetlib.dhp.graph;
|
|||
import com.googlecode.protobuf.format.JsonFormat;
|
||||
import eu.dnetlib.data.proto.FieldTypeProtos;
|
||||
import eu.dnetlib.data.proto.OafProtos;
|
||||
import eu.dnetlib.data.proto.ResultProtos;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -10,7 +11,7 @@ import java.util.stream.Collectors;
|
|||
public class ProtoUtils {
|
||||
|
||||
public static OafProtos.Oaf parse(String json) throws JsonFormat.ParseException {
|
||||
final OafProtos.Oaf.Builder builder = OafProtos.Oaf.newBuilder();
|
||||
final OafProtos.Oaf.Builder builder = OafProtos.Oaf.newBuilder();
|
||||
JsonFormat.merge(json, builder);
|
||||
return builder.build();
|
||||
}
|
||||
|
@ -24,24 +25,98 @@ public class ProtoUtils {
|
|||
//setting Entity fields
|
||||
final OafProtos.OafEntity e = oaf.getEntity();
|
||||
entity
|
||||
.setId(e.getId())
|
||||
.setOriginalId(e.getOriginalIdList())
|
||||
.setCollectedfrom(e.getCollectedfromList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapKV)
|
||||
.collect(Collectors.toList()))
|
||||
.setPid(e.getPidList().stream()
|
||||
.map(ProtoUtils::mapStructuredProperty)
|
||||
.collect(Collectors.toList()))
|
||||
.setDateofcollection(entity.getDateofcollection())
|
||||
.setDateoftransformation(entity.getDateoftransformation())
|
||||
.setExtraInfo(e.getExtraInfoList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapExtraInfo)
|
||||
.collect(Collectors.toList()));
|
||||
.setId(e.getId())
|
||||
.setOriginalId(e.getOriginalIdList())
|
||||
.setCollectedfrom(e.getCollectedfromList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapKV)
|
||||
.collect(Collectors.toList()))
|
||||
.setPid(e.getPidList().stream()
|
||||
.map(ProtoUtils::mapStructuredProperty)
|
||||
.collect(Collectors.toList()))
|
||||
.setDateofcollection(entity.getDateofcollection())
|
||||
.setDateoftransformation(entity.getDateoftransformation())
|
||||
.setExtraInfo(e.getExtraInfoList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapExtraInfo)
|
||||
.collect(Collectors.toList()));
|
||||
return entity;
|
||||
}
|
||||
|
||||
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
|
||||
//setting Entity fields
|
||||
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
||||
entity
|
||||
.setAuthor(m.getAuthorList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapAuthor)
|
||||
.collect(Collectors.toList()))
|
||||
.setResulttype(mapQualifier(m.getResulttype()))
|
||||
.setLanguage(ProtoUtils.mapQualifier(m.getLanguage()))
|
||||
.setCountry(m.getCountryList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapQualifier)
|
||||
.collect(Collectors.toList()))
|
||||
.setSubject(m.getSubjectList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStructuredProperty)
|
||||
.collect(Collectors.toList()))
|
||||
.setTitle(m.getTitleList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStructuredProperty)
|
||||
.collect(Collectors.toList()))
|
||||
.setRelevantdate(m.getRelevantdateList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStructuredProperty)
|
||||
.collect(Collectors.toList()))
|
||||
.setDescription(m.getDescriptionList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStringField)
|
||||
.collect(Collectors.toList()))
|
||||
.setDateofacceptance(ProtoUtils.mapStringField(m.getDateofacceptance()))
|
||||
.setPublisher(ProtoUtils.mapStringField(m.getPublisher()))
|
||||
.setEmbargoenddate(ProtoUtils.mapStringField(m.getEmbargoenddate()))
|
||||
.setSource(m.getSourceList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStringField)
|
||||
.collect(Collectors.toList()))
|
||||
.setFulltext(m.getFulltextList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStringField)
|
||||
.collect(Collectors.toList()))
|
||||
.setFormat(m.getFormatList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStringField)
|
||||
.collect(Collectors.toList()))
|
||||
.setContributor(m.getContributorList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStringField)
|
||||
.collect(Collectors.toList()))
|
||||
.setResourcetype(ProtoUtils.mapQualifier(m.getResourcetype()))
|
||||
.setCoverage(m.getCoverageList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStringField)
|
||||
.collect(Collectors.toList()))
|
||||
.setRefereed(mapStringField(m.getRefereed()))
|
||||
.setContext(m.getContextList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapContext)
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
return entity;
|
||||
}
|
||||
|
||||
private static Context mapContext(ResultProtos.Result.Context context) {
|
||||
|
||||
return new Context()
|
||||
.setId(context.getId())
|
||||
.setDataInfo(context.getDataInfoList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapDataInfo)
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
|
||||
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
|
||||
return new KeyValue()
|
||||
.setKey(kv.getKey())
|
||||
|
@ -64,7 +139,7 @@ public class ProtoUtils {
|
|||
.setClassname(q.getClassname())
|
||||
.setSchemeid(q.getSchemeid())
|
||||
.setSchemename(q.getSchemename());
|
||||
//.setDataInfo(q.hasDataInfo() ? mapDataInfo(q.getDataInfo()) : null);
|
||||
//.setDataInfo(q.hasDataInfo() ? mapDataInfo(q.getDataInfo()) : null);
|
||||
}
|
||||
|
||||
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
|
||||
|
@ -134,4 +209,21 @@ public class ProtoUtils {
|
|||
.setDataInfo(mapDataInfo(j.getDataInfo()));
|
||||
}
|
||||
|
||||
public static Author mapAuthor(FieldTypeProtos.Author author) {
|
||||
return new Author()
|
||||
.setFullname(author.getFullname())
|
||||
.setName(author.getName())
|
||||
.setSurname(author.getSurname())
|
||||
.setRank(author.getRank())
|
||||
.setPid(author.getPidList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapKV)
|
||||
.collect(Collectors.toList()))
|
||||
.setAffiliation(author.getAffiliationList()
|
||||
.stream()
|
||||
.map(ProtoUtils::mapStringField)
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -30,26 +30,25 @@ public class SparkGraphImporterJob {
|
|||
|
||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||
|
||||
|
||||
final String path = "file:///home/sandro/part-m-00000";
|
||||
final String path = "file:///Users/miconis/Downloads/part-m-02236";
|
||||
final JavaRDD<Tuple2<String, String>> inputRDD = sc.sequenceFile(path, Text.class, Text.class)
|
||||
.map(item -> new Tuple2<>(item._1.toString(), item._2.toString()));
|
||||
|
||||
|
||||
|
||||
final String body = inputRDD.filter(s -> s._1().contains("20|") && s._1().split("@")[2].equalsIgnoreCase("body")).map(Tuple2::_2).first();
|
||||
|
||||
System.out.println(body);
|
||||
|
||||
|
||||
final JavaRDD<Organization> datasources = inputRDD
|
||||
final JavaRDD<Organization> organization = inputRDD
|
||||
.filter(s -> s._1().split("@")[2].equalsIgnoreCase("body"))
|
||||
.map(Tuple2::_2)
|
||||
.map(ProtoConverter::convert)
|
||||
.filter(s-> s instanceof Organization)
|
||||
.map(s->(Organization)s);
|
||||
final Encoder<Organization> encoder = Encoders.bean(Organization.class);
|
||||
final Dataset<Organization> mdstore = spark.createDataset(datasources.rdd(), encoder);
|
||||
|
||||
final Dataset<Organization> mdstore = spark.createDataset(organization.rdd(), encoder);
|
||||
|
||||
System.out.println(mdstore.count());
|
||||
|
||||
|
|
|
@ -4,11 +4,14 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.junit.Test;
|
||||
import scala.tools.nsc.doc.model.Public;
|
||||
|
||||
public class ProtoConverterTest {
|
||||
|
||||
|
@ -50,4 +53,19 @@ public class ProtoConverterTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void convertPublicationTest() throws Exception {
|
||||
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/graph/publication.json"));
|
||||
|
||||
Oaf result = ProtoConverter.convert(json);
|
||||
|
||||
assertNotNull(result);
|
||||
assertTrue(result instanceof Publication);
|
||||
Publication p = (Publication) result;
|
||||
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
System.out.println(mapper.writeValueAsString(result));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"title": [{"value": "SILK PRINTING WITH RECENT DEVELOPMENTS","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}},{"value": "Son Gelişmelerle İpek Baskıcılığı","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}}],"dateofacceptance": {"value": "1987-06-01"},"publisher": {"value": "Tekstil Mühendisleri Odası"},"resulttype": {"classid": "publication","classname": "publication","schemeid": "dnet:result_typologies","schemename": "dnet:result_typologies"},"language": {"classid": "tur","classname": "Turkish","schemeid": "dnet:languages","schemename": "dnet:languages"},"journal": {"name": "Tekstil ve Mühendis","issnPrinted": "1300-7599"},"format": [{"value": "application/pdf"},{"value": "application/pdf"}],"description": [{"value": " "},{"value": " "}],"source": [{"value": "Tekstil ve Mühendis; Yıl: 1987 Cilt: 1 Sayı: 4"},{"value": "2147-0510"},{"value": "1300-7599"}],"author": [{"fullname": "YAKARTEPE, Mehmet","name": "Mehmet","surname": "Yakartepe","rank": 1},{"fullname": "YAKARTEPE, Zerrin","name": "Zerrin","surname": "Yakartepe","rank": 2}]},"instance": [{"accessright": {"classid": "OPEN","classname": "Open Access","schemeid": "dnet:access_modes","schemename": "dnet:access_modes"},"instancetype": {"classid": "0001","classname": "Article","schemeid": "dnet:publication_resource","schemename": "dnet:publication_resource"},"hostedby": {"key": "10|tubitakulakb::34a91944da68f59ebc51994b4db64cda","value": "Tekstil ve Mühendis"},"url": ["http://dergi.tekstilvemuhendis.org.tr/article/view/5000000711"],"collectedfrom": {"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"},"dateofacceptance": {"value": "1987-06-01"}}]},"originalId": ["oai:dergipark.ulakbim.gov.tr:record/124507"],"collectedfrom": [{"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"}],"dateofcollection": "2019-07-29T15:35:19Z","id": "50|tubitakulakb::7fe767f5f1dfd5bbe0a3e5e9b2a10cc9","dateoftransformation": "","oaiprovenance": {"originDescription": {"harvestDate": "2018-10-13T09:48:19.806Z","altered": true,"baseURL": "http://dergipark.ulakbim.gov.tr/v2/harvester/index.php/oai","identifier": "oai:dergipark.ulakbim.gov.tr:record/124507","datestamp": "2018-10-13T09:48:19Z","metadataNamespace": "http://www.openarchives.org/OAI/2.0/oai_dc/"}}},"dataInfo": {"inferred": true,"deletedbyinference": true,"trust": "0.9","inferenceprovenance": "dedup-similarity-result-levenstein","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"},"invisible": false}}
|
Loading…
Reference in New Issue