[SKG-IF] denormalization fixing issue and new properties

This commit is contained in:
Miriam Baglioni 2024-03-12 14:51:14 +01:00
parent d6a8db5202
commit cb9a081236
657 changed files with 71 additions and 35 deletions

View File

@ -32,6 +32,11 @@ public class Grant implements Serializable {
// class
// isParticipant produces the list of organization internal identifiers
private List<Contributor> contributors;//
private String grantCode;
public String getGrantCode() {
return grantCode;
}
public String getLocal_identifier() {
return local_identifier;
@ -152,4 +157,8 @@ public class Grant implements Serializable {
public void setContributors(List<Contributor> contributors) {
this.contributors = contributors;
}
public void setGrantCode(String value) {
grantCode = value;
}
}

View File

@ -22,6 +22,16 @@ public class Persons implements Serializable {
@JsonProperty("declared_affiliations")
private List<Affiliation> declared_affiliations;
private String fullname;
public String getFullname() {
return fullname;
}
public void setFullname(String fullname) {
this.fullname = fullname;
}
public String getLocal_identifier() {
return local_identifier;
}

View File

@ -19,7 +19,7 @@ public enum Prefixes implements Serializable {
TEMPORARY_PERSON("temp_person_::"),
DATASOURCE("datasource__::"), TOPIC("topic_______::"), VENUE("venue_______::");
DATASOURCE("datasource__::"), TOPIC("temp_topic__::"), VENUE("temp_venue__::");
public final String label;

View File

@ -112,7 +112,8 @@ public class DumpDatasource implements Serializable {
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
Datasource d = first._1();
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
//datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
datasource.setLocal_identifier(d.getId());
datasource
.setIdentifiers(
d

View File

@ -92,7 +92,7 @@ public class DumpGrant implements Serializable {
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
Dataset<EncloseMinElement> eme = Utils
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
.filter((FilterFunction<EncloseMinElement>) e -> e.getMinOrganization() != null);
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations
.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
@ -110,7 +110,8 @@ public class DumpGrant implements Serializable {
(MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k, v) -> {
Grant g = new Grant();
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
g.setLocal_identifier( k);
g.setGrantCode(first._1().getCode().getValue());
g.setIdentifiers(getProjectIdentifier(first._1()));
g.setTitle(first._1().getTitle().getValue());
g

View File

@ -74,7 +74,8 @@ public class DumpOrganization implements Serializable {
&& !o.getDataInfo().getInvisible())
.map((MapFunction<Organization, eu.dnetlib.dhp.skgif.model.Organization>) o -> {
eu.dnetlib.dhp.skgif.model.Organization organization = new eu.dnetlib.dhp.skgif.model.Organization();
organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
//organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
organization.setLocal_identifier(o.getId());
organization
.setCountry(
Optional

View File

@ -289,7 +289,8 @@ public class DumpResult implements Serializable {
.setHosting_datasource(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()),
//Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()),
epm.getInstance().getHostedby().getKey(),
epm.getInstance().getHostedby().getValue()));
return manifestation;

View File

@ -123,7 +123,8 @@ public class EmitFromEntities implements Serializable {
.setMinDatsource(
MinVenue
.newInstance(
Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
//Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
d.getId(), d.getOfficialname().getValue()));
eme.setEnclosedEntityId(d.getId());
return eme;
}, Encoders.bean(EncloseMinElement.class))
@ -221,7 +222,8 @@ public class EmitFromEntities implements Serializable {
.stream()
.filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")
|| s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
//|| s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
)
.map(s -> {
Topic t = new Topic();
t
@ -279,6 +281,7 @@ public class EmitFromEntities implements Serializable {
Persons p = new Persons();
p.setFamily_name(a.getSurname());
p.setGiven_name(a.getName());
p.setFullname(a.getFullname());
String identifier = new String();
if (Optional.ofNullable(a.getPid()).isPresent()) {
Tuple2<String, Boolean> orcid = eu.dnetlib.dhp.oa.graph.dump.skgif.Utils

View File

@ -27,7 +27,8 @@ public class ResultMapper implements Serializable {
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
if (ort.isPresent()) {
try {
out.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, input.getId()));
//out.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, input.getId()));
out.setLocal_identifier(input.getId());
mapPid(out, input);
mapTitle(out, input);
mapAbstract(out, input);
@ -72,7 +73,6 @@ public class ResultMapper implements Serializable {
.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
contribution.setPerson(minPerson);
}
}
if (Optional.ofNullable(a.getRank()).isPresent()) {
contribution.setRank(a.getRank());
@ -93,8 +93,9 @@ public class ResultMapper implements Serializable {
.getSubject()
.stream()
.filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") ||
s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") )
// ||
// s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
.map(s -> {
ResultTopic topic = new ResultTopic();
MinTopic minTopic = new MinTopic();

View File

@ -87,48 +87,57 @@ public class Utils implements Serializable {
public static MinOrganization getMinOrganization(Organization o) {
MinOrganization mo = new MinOrganization();
mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
//mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
mo.setLocal_identifier(o.getId());
if (Optional.ofNullable(o.getLegalname()).isPresent())
mo.setName(o.getLegalname().getValue());
if (Optional.ofNullable(o.getPid()).isPresent())
for (StructuredProperty pid : o.getPid()) {
switch (pid.getQualifier().getClassid().toLowerCase()) {
case "ror":
mo.setRor(pid.getValue());
break;
case "isni":
mo.setIsni(pid.getValue());
break;
case "fundref":
mo.setFundRef(pid.getValue());
break;
case "ringgold":
mo.setRinGold(pid.getValue());
break;
case "wikidata":
mo.setWikidata(pid.getValue());
break;
if(Optional.ofNullable(pid.getQualifier()).isPresent() &&
Optional.ofNullable(pid.getQualifier().getClassid()).isPresent())
switch (pid.getQualifier().getClassid().toLowerCase()) {
case "ror":
mo.setRor(pid.getValue());
break;
case "isni":
mo.setIsni(pid.getValue());
break;
case "fundref":
mo.setFundRef(pid.getValue());
break;
case "ringgold":
mo.setRinGold(pid.getValue());
break;
case "wikidata":
mo.setWikidata(pid.getValue());
break;
}
}
}
return mo;
}
public static MinGrant getMinGrant(Project p) throws DocumentException {
MinGrant mg = new MinGrant();
mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
//mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
mg.setLocal_identifier(p.getId());
if (Optional.ofNullable(p.getCode()).isPresent())
mg.setCode(p.getCode().getValue());
if (Optional.ofNullable(p.getFundingtree()).isPresent())
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
if (Optional.ofNullable(p.getTitle()).isPresent())
if (Optional.ofNullable(p.getAcronym()).isPresent())
mg.setTitle(p.getAcronym().getValue());
else if (Optional.ofNullable(p.getTitle()).isPresent()) {
mg.setTitle(p.getTitle().getValue());
}
return mg;
}
public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException {
MinProduct mp = new MinProduct();
mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
//mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
mp.setLocal_identifier(r.getId());
for (StructuredProperty title : r.getTitle()) {
if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) {
mp.setTitle(title.getValue());

View File

@ -134,7 +134,7 @@ public class DumpGrantTest implements Serializable {
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-outputPath", workingDir,
"-outputPath", "/tmp/",
"-workingDir", workingDir
});

View File

@ -251,7 +251,7 @@ public class EmitFromEntitiesJobTest {
Dataset<EmitPerManifestation> manifestationDataset = spark
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
manifestationDataset.show(false);
manifestation.foreach(m-> System.out.println(OBJECT_MAPPER.writeValueAsString(m)));
}
}

Some files were not shown because too many files have changed in this diff Show More