[SKG-IF] denormalization fixing issue and new properties
This commit is contained in:
parent
d6a8db5202
commit
cb9a081236
|
@ -32,6 +32,11 @@ public class Grant implements Serializable {
|
||||||
// class
|
// class
|
||||||
// isParticipant produces the list of organization internal identifiers
|
// isParticipant produces the list of organization internal identifiers
|
||||||
private List<Contributor> contributors;//
|
private List<Contributor> contributors;//
|
||||||
|
private String grantCode;
|
||||||
|
|
||||||
|
public String getGrantCode() {
|
||||||
|
return grantCode;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
|
@ -152,4 +157,8 @@ public class Grant implements Serializable {
|
||||||
public void setContributors(List<Contributor> contributors) {
|
public void setContributors(List<Contributor> contributors) {
|
||||||
this.contributors = contributors;
|
this.contributors = contributors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setGrantCode(String value) {
|
||||||
|
grantCode = value;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,16 @@ public class Persons implements Serializable {
|
||||||
@JsonProperty("declared_affiliations")
|
@JsonProperty("declared_affiliations")
|
||||||
private List<Affiliation> declared_affiliations;
|
private List<Affiliation> declared_affiliations;
|
||||||
|
|
||||||
|
private String fullname;
|
||||||
|
|
||||||
|
public String getFullname() {
|
||||||
|
return fullname;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFullname(String fullname) {
|
||||||
|
this.fullname = fullname;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLocal_identifier() {
|
public String getLocal_identifier() {
|
||||||
return local_identifier;
|
return local_identifier;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ public enum Prefixes implements Serializable {
|
||||||
|
|
||||||
TEMPORARY_PERSON("temp_person_::"),
|
TEMPORARY_PERSON("temp_person_::"),
|
||||||
|
|
||||||
DATASOURCE("datasource__::"), TOPIC("topic_______::"), VENUE("venue_______::");
|
DATASOURCE("datasource__::"), TOPIC("temp_topic__::"), VENUE("temp_venue__::");
|
||||||
|
|
||||||
public final String label;
|
public final String label;
|
||||||
|
|
||||||
|
|
|
@ -112,7 +112,8 @@ public class DumpDatasource implements Serializable {
|
||||||
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
eu.dnetlib.dhp.skgif.model.Datasource datasource = new eu.dnetlib.dhp.skgif.model.Datasource();
|
||||||
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
|
Tuple2<Datasource, Tuple2<String, EncloseMinElement>> first = vs.next();
|
||||||
Datasource d = first._1();
|
Datasource d = first._1();
|
||||||
datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
//datasource.setLocal_identifier(Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()));
|
||||||
|
datasource.setLocal_identifier(d.getId());
|
||||||
datasource
|
datasource
|
||||||
.setIdentifiers(
|
.setIdentifiers(
|
||||||
d
|
d
|
||||||
|
|
|
@ -92,7 +92,7 @@ public class DumpGrant implements Serializable {
|
||||||
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
r.getRelClass().equalsIgnoreCase(RelationType.ORGANIZATION_PARTICIPANT_IN_PROJECT.label));
|
||||||
Dataset<EncloseMinElement> eme = Utils
|
Dataset<EncloseMinElement> eme = Utils
|
||||||
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
.readPath(spark, workingDir + "minEntity", EncloseMinElement.class)
|
||||||
.filter((FilterFunction<EncloseMinElement>) e -> Optional.ofNullable(e.getMinOrganization()).isPresent());
|
.filter((FilterFunction<EncloseMinElement>) e -> e.getMinOrganization() != null);
|
||||||
|
|
||||||
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations
|
Dataset<Tuple2<String, EncloseMinElement>> partecipantOrganization = relations
|
||||||
.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
|
.joinWith(eme, relations.col("source").equalTo(eme.col("enclosedEntityId")))
|
||||||
|
@ -110,7 +110,8 @@ public class DumpGrant implements Serializable {
|
||||||
(MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k, v) -> {
|
(MapGroupsFunction<String, Tuple2<Project, Tuple2<String, EncloseMinElement>>, Grant>) (k, v) -> {
|
||||||
Grant g = new Grant();
|
Grant g = new Grant();
|
||||||
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
|
Tuple2<Project, Tuple2<String, EncloseMinElement>> first = v.next();
|
||||||
g.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, k));
|
g.setLocal_identifier( k);
|
||||||
|
g.setGrantCode(first._1().getCode().getValue());
|
||||||
g.setIdentifiers(getProjectIdentifier(first._1()));
|
g.setIdentifiers(getProjectIdentifier(first._1()));
|
||||||
g.setTitle(first._1().getTitle().getValue());
|
g.setTitle(first._1().getTitle().getValue());
|
||||||
g
|
g
|
||||||
|
|
|
@ -74,7 +74,8 @@ public class DumpOrganization implements Serializable {
|
||||||
&& !o.getDataInfo().getInvisible())
|
&& !o.getDataInfo().getInvisible())
|
||||||
.map((MapFunction<Organization, eu.dnetlib.dhp.skgif.model.Organization>) o -> {
|
.map((MapFunction<Organization, eu.dnetlib.dhp.skgif.model.Organization>) o -> {
|
||||||
eu.dnetlib.dhp.skgif.model.Organization organization = new eu.dnetlib.dhp.skgif.model.Organization();
|
eu.dnetlib.dhp.skgif.model.Organization organization = new eu.dnetlib.dhp.skgif.model.Organization();
|
||||||
organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
//organization.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
||||||
|
organization.setLocal_identifier(o.getId());
|
||||||
organization
|
organization
|
||||||
.setCountry(
|
.setCountry(
|
||||||
Optional
|
Optional
|
||||||
|
|
|
@ -289,7 +289,8 @@ public class DumpResult implements Serializable {
|
||||||
.setHosting_datasource(
|
.setHosting_datasource(
|
||||||
MinVenue
|
MinVenue
|
||||||
.newInstance(
|
.newInstance(
|
||||||
Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()),
|
//Utils.getIdentifier(Prefixes.DATASOURCE, epm.getInstance().getHostedby().getKey()),
|
||||||
|
epm.getInstance().getHostedby().getKey(),
|
||||||
epm.getInstance().getHostedby().getValue()));
|
epm.getInstance().getHostedby().getValue()));
|
||||||
|
|
||||||
return manifestation;
|
return manifestation;
|
||||||
|
|
|
@ -123,7 +123,8 @@ public class EmitFromEntities implements Serializable {
|
||||||
.setMinDatsource(
|
.setMinDatsource(
|
||||||
MinVenue
|
MinVenue
|
||||||
.newInstance(
|
.newInstance(
|
||||||
Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
|
//Utils.getIdentifier(Prefixes.DATASOURCE, d.getId()), d.getOfficialname().getValue()));
|
||||||
|
d.getId(), d.getOfficialname().getValue()));
|
||||||
eme.setEnclosedEntityId(d.getId());
|
eme.setEnclosedEntityId(d.getId());
|
||||||
return eme;
|
return eme;
|
||||||
}, Encoders.bean(EncloseMinElement.class))
|
}, Encoders.bean(EncloseMinElement.class))
|
||||||
|
@ -221,7 +222,8 @@ public class EmitFromEntities implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(
|
.filter(
|
||||||
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")
|
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos")
|
||||||
|| s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
//|| s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
||||||
|
)
|
||||||
.map(s -> {
|
.map(s -> {
|
||||||
Topic t = new Topic();
|
Topic t = new Topic();
|
||||||
t
|
t
|
||||||
|
@ -279,6 +281,7 @@ public class EmitFromEntities implements Serializable {
|
||||||
Persons p = new Persons();
|
Persons p = new Persons();
|
||||||
p.setFamily_name(a.getSurname());
|
p.setFamily_name(a.getSurname());
|
||||||
p.setGiven_name(a.getName());
|
p.setGiven_name(a.getName());
|
||||||
|
p.setFullname(a.getFullname());
|
||||||
String identifier = new String();
|
String identifier = new String();
|
||||||
if (Optional.ofNullable(a.getPid()).isPresent()) {
|
if (Optional.ofNullable(a.getPid()).isPresent()) {
|
||||||
Tuple2<String, Boolean> orcid = eu.dnetlib.dhp.oa.graph.dump.skgif.Utils
|
Tuple2<String, Boolean> orcid = eu.dnetlib.dhp.oa.graph.dump.skgif.Utils
|
||||||
|
|
|
@ -27,7 +27,8 @@ public class ResultMapper implements Serializable {
|
||||||
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
|
Optional<Qualifier> ort = Optional.ofNullable(input.getResulttype());
|
||||||
if (ort.isPresent()) {
|
if (ort.isPresent()) {
|
||||||
try {
|
try {
|
||||||
out.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, input.getId()));
|
//out.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, input.getId()));
|
||||||
|
out.setLocal_identifier(input.getId());
|
||||||
mapPid(out, input);
|
mapPid(out, input);
|
||||||
mapTitle(out, input);
|
mapTitle(out, input);
|
||||||
mapAbstract(out, input);
|
mapAbstract(out, input);
|
||||||
|
@ -72,7 +73,6 @@ public class ResultMapper implements Serializable {
|
||||||
.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
.setLocal_identifier(Utils.getIdentifier(Prefixes.TEMPORARY_PERSON, input.getId() + count));
|
||||||
contribution.setPerson(minPerson);
|
contribution.setPerson(minPerson);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
if (Optional.ofNullable(a.getRank()).isPresent()) {
|
||||||
contribution.setRank(a.getRank());
|
contribution.setRank(a.getRank());
|
||||||
|
@ -93,8 +93,9 @@ public class ResultMapper implements Serializable {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(
|
.filter(
|
||||||
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") ||
|
s -> s.getQualifier().getClassid().equalsIgnoreCase("fos") )
|
||||||
s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
// ||
|
||||||
|
// s.getQualifier().getClassid().equalsIgnoreCase("sdg"))
|
||||||
.map(s -> {
|
.map(s -> {
|
||||||
ResultTopic topic = new ResultTopic();
|
ResultTopic topic = new ResultTopic();
|
||||||
MinTopic minTopic = new MinTopic();
|
MinTopic minTopic = new MinTopic();
|
||||||
|
|
|
@ -87,48 +87,57 @@ public class Utils implements Serializable {
|
||||||
|
|
||||||
public static MinOrganization getMinOrganization(Organization o) {
|
public static MinOrganization getMinOrganization(Organization o) {
|
||||||
MinOrganization mo = new MinOrganization();
|
MinOrganization mo = new MinOrganization();
|
||||||
mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
//mo.setLocal_identifier(Utils.getIdentifier(Prefixes.ORGANIZATION, o.getId()));
|
||||||
|
mo.setLocal_identifier(o.getId());
|
||||||
if (Optional.ofNullable(o.getLegalname()).isPresent())
|
if (Optional.ofNullable(o.getLegalname()).isPresent())
|
||||||
mo.setName(o.getLegalname().getValue());
|
mo.setName(o.getLegalname().getValue());
|
||||||
if (Optional.ofNullable(o.getPid()).isPresent())
|
if (Optional.ofNullable(o.getPid()).isPresent())
|
||||||
for (StructuredProperty pid : o.getPid()) {
|
for (StructuredProperty pid : o.getPid()) {
|
||||||
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
if(Optional.ofNullable(pid.getQualifier()).isPresent() &&
|
||||||
case "ror":
|
Optional.ofNullable(pid.getQualifier().getClassid()).isPresent())
|
||||||
mo.setRor(pid.getValue());
|
switch (pid.getQualifier().getClassid().toLowerCase()) {
|
||||||
break;
|
case "ror":
|
||||||
case "isni":
|
mo.setRor(pid.getValue());
|
||||||
mo.setIsni(pid.getValue());
|
break;
|
||||||
break;
|
case "isni":
|
||||||
case "fundref":
|
mo.setIsni(pid.getValue());
|
||||||
mo.setFundRef(pid.getValue());
|
break;
|
||||||
break;
|
case "fundref":
|
||||||
case "ringgold":
|
mo.setFundRef(pid.getValue());
|
||||||
mo.setRinGold(pid.getValue());
|
break;
|
||||||
break;
|
case "ringgold":
|
||||||
case "wikidata":
|
mo.setRinGold(pid.getValue());
|
||||||
mo.setWikidata(pid.getValue());
|
break;
|
||||||
break;
|
case "wikidata":
|
||||||
|
mo.setWikidata(pid.getValue());
|
||||||
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return mo;
|
return mo;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MinGrant getMinGrant(Project p) throws DocumentException {
|
public static MinGrant getMinGrant(Project p) throws DocumentException {
|
||||||
MinGrant mg = new MinGrant();
|
MinGrant mg = new MinGrant();
|
||||||
mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
|
//mg.setLocal_identifier(Utils.getIdentifier(Prefixes.GRANT, p.getId()));
|
||||||
|
mg.setLocal_identifier(p.getId());
|
||||||
if (Optional.ofNullable(p.getCode()).isPresent())
|
if (Optional.ofNullable(p.getCode()).isPresent())
|
||||||
mg.setCode(p.getCode().getValue());
|
mg.setCode(p.getCode().getValue());
|
||||||
if (Optional.ofNullable(p.getFundingtree()).isPresent())
|
if (Optional.ofNullable(p.getFundingtree()).isPresent())
|
||||||
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
|
mg.setFunder(getFunderName(p.getFundingtree().get(0).getValue()));
|
||||||
if (Optional.ofNullable(p.getTitle()).isPresent())
|
if (Optional.ofNullable(p.getAcronym()).isPresent())
|
||||||
|
mg.setTitle(p.getAcronym().getValue());
|
||||||
|
else if (Optional.ofNullable(p.getTitle()).isPresent()) {
|
||||||
mg.setTitle(p.getTitle().getValue());
|
mg.setTitle(p.getTitle().getValue());
|
||||||
|
|
||||||
|
}
|
||||||
return mg;
|
return mg;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException {
|
public static <R extends Result> MinProduct getMinProduct(R r) throws JsonProcessingException {
|
||||||
MinProduct mp = new MinProduct();
|
MinProduct mp = new MinProduct();
|
||||||
mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
|
//mp.setLocal_identifier(Utils.getIdentifier(Prefixes.RESEARCH_PRODUCT, r.getId()));
|
||||||
|
mp.setLocal_identifier(r.getId());
|
||||||
for (StructuredProperty title : r.getTitle()) {
|
for (StructuredProperty title : r.getTitle()) {
|
||||||
if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) {
|
if (title.getQualifier().getClassid().equalsIgnoreCase("main title")) {
|
||||||
mp.setTitle(title.getValue());
|
mp.setTitle(title.getValue());
|
||||||
|
|
|
@ -134,7 +134,7 @@ public class DumpGrantTest implements Serializable {
|
||||||
new String[] {
|
new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-sourcePath", sourcePath,
|
"-sourcePath", sourcePath,
|
||||||
"-outputPath", workingDir,
|
"-outputPath", "/tmp/",
|
||||||
"-workingDir", workingDir
|
"-workingDir", workingDir
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
|
@ -251,7 +251,7 @@ public class EmitFromEntitiesJobTest {
|
||||||
|
|
||||||
Dataset<EmitPerManifestation> manifestationDataset = spark
|
Dataset<EmitPerManifestation> manifestationDataset = spark
|
||||||
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
.createDataset(manifestation.rdd(), Encoders.bean(EmitPerManifestation.class));
|
||||||
manifestationDataset.show(false);
|
manifestation.foreach(m-> System.out.println(OBJECT_MAPPER.writeValueAsString(m)));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue