[personEntity] create entity for each profile in orcid even without works. Added validated true to each relation coming from orcid data

This commit is contained in:
Miriam Baglioni 2024-10-21 14:38:15 +02:00
parent 4f0463d779
commit ce4ee1189f
2 changed files with 18 additions and 18 deletions

View File

@ -61,7 +61,8 @@ public class CoAuthorshipIterator implements Iterator<Relation> {
private Relation getRelation(String orcid1, String orcid2) {
String source = PERSON_PREFIX + IdentifierFactory.md5(orcid1);
String target = PERSON_PREFIX + IdentifierFactory.md5(orcid2);
return OafMapperUtils
Relation relation =
OafMapperUtils
.getRelation(
source, target, ModelConstants.PERSON_PERSON_RELTYPE,
ModelConstants.PERSON_PERSON_SUBRELTYPE,
@ -76,5 +77,7 @@ public class CoAuthorshipIterator implements Iterator<Relation> {
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
"0.91"),
null);
relation.setValidated(true);
return relation;
}
}

View File

@ -144,18 +144,13 @@ public class ExtractPerson implements Serializable {
.parquet(inputPath + "Employments")
.as(Encoders.bean(Employment.class));
Dataset<Author> peopleToMap = authors
.joinWith(works, authors.col("orcid").equalTo(works.col("orcid")))
.map((MapFunction<Tuple2<Author, Work>, Author>) t2 -> t2._1(), Encoders.bean(Author.class))
.groupByKey((MapFunction<Author, String>) a -> a.getOrcid(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Author, Author>) (k, it) -> it.next(), Encoders.bean(Author.class));
Dataset<Employment> employment = employmentDataset
.joinWith(peopleToMap, employmentDataset.col("orcid").equalTo(peopleToMap.col("orcid")))
.joinWith(authors, employmentDataset.col("orcid").equalTo(authors.col("orcid")))
.map((MapFunction<Tuple2<Employment, Author>, Employment>) t2 -> t2._1(), Encoders.bean(Employment.class));
//Mapping all the orcid profiles even if the profile has no visible works
Dataset<Person> people;
peopleToMap.map((MapFunction<Author, Person>) op -> {
authors.map((MapFunction<Author, Person>) op -> {
Person person = new Person();
person.setId(DHPUtils.generateIdentifier(op.getOrcid(), PERSON_PREFIX));
person
@ -325,6 +320,7 @@ public class ExtractPerson implements Serializable {
Arrays.asList(OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS)),
DATAINFO,
null);
relation.setValidated(true);
if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtil.isNotBlank(row.getStartDate())) {
KeyValue kv = new KeyValue();
@ -412,14 +408,15 @@ public class ExtractPerson implements Serializable {
default:
return null;
}
return OafMapperUtils
.getRelation(
source, target, ModelConstants.RESULT_PERSON_RELTYPE,
ModelConstants.RESULT_PERSON_SUBRELTYPE,
ModelConstants.RESULT_PERSON_HASAUTHORED,
Arrays.asList(OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS)),
DATAINFO,
null);
Relation relation = OafMapperUtils
.getRelation(
source, target, ModelConstants.RESULT_PERSON_RELTYPE,
ModelConstants.RESULT_PERSON_SUBRELTYPE,
ModelConstants.RESULT_PERSON_HASAUTHORED,
Arrays.asList(OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS)),
DATAINFO,
null);
relation.setValidated(true);
return relation;
}
}