[SKG-IF] denormalization fixing issue and new properties

This commit is contained in:
Miriam Baglioni 2024-03-12 14:51:14 +01:00
parent d6a8db5202
commit cb9a081236
657 changed files with 71 additions and 35 deletions

View File

@ -32,6 +32,11 @@ public class Grant implements Serializable {
// class // class
// isParticipant produces the list of organization internal identifiers // isParticipant produces the list of organization internal identifiers
private List<Contributor> contributors;// private List<Contributor> contributors;//
private String grantCode;
public String getGrantCode() {
return grantCode;
}
public String getLocal_identifier() { public String getLocal_identifier() {
return local_identifier; return local_identifier;
@ -152,4 +157,8 @@ public class Grant implements Serializable {
public void setContributors(List<Contributor> contributors) { public void setContributors(List<Contributor> contributors) {
this.contributors = contributors; this.contributors = contributors;
} }
public void setGrantCode(String value) {
grantCode = value;
}
} }

View File

@ -22,6 +22,16 @@ public class Persons implements Serializable {
@JsonProperty("declared_affiliations") @JsonProperty("declared_affiliations")
private List<Affiliation> declared_affiliations; private List<Affiliation> declared_affiliations;
private String fullname;
public String getFullname() {
return fullname;
}
public void setFullname(String fullname) {
this.fullname = fullname;
}
public String getLocal_identifier() { public String getLocal_identifier() {
return local_identifier; return local_identifier;
} }

View File

@ -19,7 +19,7 @@ public enum Prefixes implements Serializable {
TEMPORARY_PERSON("temp_person_::"), TEMPORARY_PERSON("temp_person_::"),
DATASOURCE("datasource__::"), TOPIC("topic_______::"), VENUE("venue_______::"); DATASOURCE("datasource__::"), TOPIC("temp_topic__::"), VENUE("temp_venue__::");
public final String label; public final String label;

View File

@ -112,7 +112,8 @@ public class DumpDatasource implements Serializable {
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource(); eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next(); Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
Datasource d = first._1(); Datasource d = first._1();
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId())); //datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
datasource.setLocal_identifier(d.getId());
datasource datasource
.setIdentifiers( .setIdentifiers(
d d

View File

@ -92,7 +92,7 @@ public class DumpGrant implements Serializable {
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label)); r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
Dataset<EncloseMinElement> eme = Utils Dataset<EncloseMinElement> eme = Utils
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class) .readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent()); .filter((FilterFunction<EncloseMinElement>) e -> e.getMinOrganization() != null);
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations
.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId"))) .joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
@ -110,7 +110,8 @@ public class DumpGrant implements Serializable {
(MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k, v) -> { (MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k, v) -> {
Grant g = new Grant(); Grant g = new Grant();
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next(); Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k)); g.setLocal_identifier( k);
g.setGrantCode(first._1().getCode().getValue());
g.setIdentifiers(getProjectIdentifier(first._1())); g.setIdentifiers(getProjectIdentifier(first._1()));
g.setTitle(first._1().getTitle().getValue()); g.setTitle(first._1().getTitle().getValue());
g g

View File

@ -74,7 +74,8 @@ public class DumpOrganization implements Serializable {
&& !o.getDataInfo().getInvisible()) && !o.getDataInfo().getInvisible())
.map((MapFunction<Organization, eu.dnetlib.dhp.skgif.model.Organization>) o -> { .map((MapFunction<Organization, eu.dnetlib.dhp.skgif.model.Organization>) o -> {
eu.dnetlib.dhp.skgif.model.Organization organization = new eu.dnetlib.dhp.skgif.model.Organization(); eu.dnetlib.dhp.skgif.model.Organization organization = new eu.dnetlib.dhp.skgif.model.Organization();
organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId())); //organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
organization.setLocal_identifier(o.getId());
organization organization
.setCountry( .setCountry(
Optional Optional

View File

@ -289,7 +289,8 @@ public class DumpResult implements Serializable {
.setHosting_datasource( .setHosting_datasource(
MinVenue MinVenue
.newInstance( .newInstance(
Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()), //Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()),
epm.getInstance().getHostedby().getKey(),
epm.getInstance().getHostedby().getValue())); epm.getInstance().getHostedby().getValue()));
return manifestation; return manifestation;

View File

@ -123,7 +123,8 @@ public class EmitFromEntities implements Serializable {
.setMinDatsource( .setMinDatsource(
MinVenue MinVenue
.newInstance( .newInstance(
Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue())); //Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
d.getId(), d.getOfficialname().getValue()));
eme.setEnclosedEntityId(d.getId()); eme.setEnclosedEntityId(d.getId());
return eme; return eme;
}, Encoders.bean(EncloseMinElement.class)) }, Encoders.bean(EncloseMinElement.class))
@ -221,7 +222,8 @@ public class EmitFromEntities implements Serializable {
.stream() .stream()
.filter( .filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")
|| s.getQualifier().getClassid().equalsIgnoreCase("sdg")) //|| s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
)
.map(s -> { .map(s -> {
Topic t = new Topic(); Topic t = new Topic();
t t
@ -279,6 +281,7 @@ public class EmitFromEntities implements Serializable {
Persons p = new Persons(); Persons p = new Persons();
p.setFamily_name(a.getSurname()); p.setFamily_name(a.getSurname());
p.setGiven_name(a.getName()); p.setGiven_name(a.getName());
p.setFullname(a.getFullname());
String identifier = new String(); String identifier = new String();
if (Optional.ofNullable(a.getPid()).isPresent()) { if (Optional.ofNullable(a.getPid()).isPresent()) {
Tuple2<String, Boolean> orcid = eu.dnetlib.dhp.oa.graph.dump.skgif.Utils Tuple2<String, Boolean> orcid = eu.dnetlib.dhp.oa.graph.dump.skgif.Utils

View File

@ -27,7 +27,8 @@ public class ResultMapper implements Serializable {
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype()); Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
if (ort.isPresent()) { if (ort.isPresent()) {
try { try {
out.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, input.getId())); //out.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, input.getId()));
out.setLocal_identifier(input.getId());
mapPid(out, input); mapPid(out, input);
mapTitle(out, input); mapTitle(out, input);
mapAbstract(out, input); mapAbstract(out, input);
@ -72,7 +73,6 @@ public class ResultMapper implements Serializable {
.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count)); .setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
contribution.setPerson(minPerson); contribution.setPerson(minPerson);
} }
} }
if (Optional.ofNullable(a.getRank()).isPresent()) { if (Optional.ofNullable(a.getRank()).isPresent()) {
contribution.setRank(a.getRank()); contribution.setRank(a.getRank());
@ -93,8 +93,9 @@ public class ResultMapper implements Serializable {
.getSubject() .getSubject()
.stream() .stream()
.filter( .filter(
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") || s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") )
s.getQualifier().getClassid().equalsIgnoreCase("sdg")) // ||
// s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
.map(s -> { .map(s -> {
ResultTopic topic = new ResultTopic(); ResultTopic topic = new ResultTopic();
MinTopic minTopic = new MinTopic(); MinTopic minTopic = new MinTopic();

View File

@ -87,48 +87,57 @@ public class Utils implements Serializable {
public static MinOrganization getMinOrganization(Organization o) { public static MinOrganization getMinOrganization(Organization o) {
MinOrganization mo = new MinOrganization(); MinOrganization mo = new MinOrganization();
mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId())); //mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
mo.setLocal_identifier(o.getId());
if (Optional.ofNullable(o.getLegalname()).isPresent()) if (Optional.ofNullable(o.getLegalname()).isPresent())
mo.setName(o.getLegalname().getValue()); mo.setName(o.getLegalname().getValue());
if (Optional.ofNullable(o.getPid()).isPresent()) if (Optional.ofNullable(o.getPid()).isPresent())
for (StructuredProperty pid : o.getPid()) { for (StructuredProperty pid : o.getPid()) {
switch (pid.getQualifier().getClassid().toLowerCase()) { if(Optional.ofNullable(pid.getQualifier()).isPresent() &&
case "ror": Optional.ofNullable(pid.getQualifier().getClassid()).isPresent())
mo.setRor(pid.getValue()); switch (pid.getQualifier().getClassid().toLowerCase()) {
break; case "ror":
case "isni": mo.setRor(pid.getValue());
mo.setIsni(pid.getValue()); break;
break; case "isni":
case "fundref": mo.setIsni(pid.getValue());
mo.setFundRef(pid.getValue()); break;
break; case "fundref":
case "ringgold": mo.setFundRef(pid.getValue());
mo.setRinGold(pid.getValue()); break;
break; case "ringgold":
case "wikidata": mo.setRinGold(pid.getValue());
mo.setWikidata(pid.getValue()); break;
break; case "wikidata":
mo.setWikidata(pid.getValue());
break;
} }
} }
return mo; return mo;
} }
public static MinGrant getMinGrant(Project p) throws DocumentException { public static MinGrant getMinGrant(Project p) throws DocumentException {
MinGrant mg = new MinGrant(); MinGrant mg = new MinGrant();
mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId())); //mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
mg.setLocal_identifier(p.getId());
if (Optional.ofNullable(p.getCode()).isPresent()) if (Optional.ofNullable(p.getCode()).isPresent())
mg.setCode(p.getCode().getValue()); mg.setCode(p.getCode().getValue());
if (Optional.ofNullable(p.getFundingtree()).isPresent()) if (Optional.ofNullable(p.getFundingtree()).isPresent())
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue())); mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
if (Optional.ofNullable(p.getTitle()).isPresent()) if (Optional.ofNullable(p.getAcronym()).isPresent())
mg.setTitle(p.getAcronym().getValue());
else if (Optional.ofNullable(p.getTitle()).isPresent()) {
mg.setTitle(p.getTitle().getValue()); mg.setTitle(p.getTitle().getValue());
}
return mg; return mg;
} }
public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException { public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException {
MinProduct mp = new MinProduct(); MinProduct mp = new MinProduct();
mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId())); //mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
mp.setLocal_identifier(r.getId());
for (StructuredProperty title : r.getTitle()) { for (StructuredProperty title : r.getTitle()) {
if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) { if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) {
mp.setTitle(title.getValue()); mp.setTitle(title.getValue());

View File

@ -134,7 +134,7 @@ public class DumpGrantTest implements Serializable {
new String[] { new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-outputPath", workingDir, "-outputPath", "/tmp/",
"-workingDir", workingDir "-workingDir", workingDir
}); });

View File

@ -251,7 +251,7 @@ public class EmitFromEntitiesJobTest {
Dataset<EmitPerManifestation> manifestationDataset = spark Dataset<EmitPerManifestation> manifestationDataset = spark
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class)); .createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
manifestationDataset.show(false); manifestation.foreach(m-> System.out.println(OBJECT_MAPPER.writeValueAsString(m)));
} }
} }

Some files were not shown because too many files have changed in this diff Show More