forked from D-Net/dnet-hadoop
split the classes related to the communities dump and to the whole graph dump
This commit is contained in:
parent
355d7e426e
commit
332258d199
|
@ -1,41 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
public abstract class Oaf implements Serializable {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The list of datasource id/name pairs providing this relationship.
|
|
||||||
*/
|
|
||||||
protected List<KeyValue> collectedfrom;
|
|
||||||
|
|
||||||
private Long lastupdatetimestamp;
|
|
||||||
|
|
||||||
public List<KeyValue> getCollectedfrom() {
|
|
||||||
return collectedfrom;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
|
||||||
this.collectedfrom = collectedfrom;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Long getLastupdatetimestamp() {
|
|
||||||
return lastupdatetimestamp;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLastupdatetimestamp(Long lastupdatetimestamp) {
|
|
||||||
this.lastupdatetimestamp = lastupdatetimestamp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// public void setAllowedValues(eu.dnetlib.dhp.schema.oaf.Oaf o){
|
|
||||||
// collectedfrom = o.getCollectedfrom().stream().map(cf -> KeyValue.newInstance(cf)).collect(Collectors.toList());
|
|
||||||
//
|
|
||||||
// lastupdatetimestamp = o.getLastupdatetimestamp();
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,61 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public abstract class OafEntity extends Oaf implements Serializable {
|
|
||||||
|
|
||||||
private String id;
|
|
||||||
|
|
||||||
private List<String> originalId;
|
|
||||||
|
|
||||||
private List<ControlledField> pid;
|
|
||||||
|
|
||||||
private String dateofcollection;
|
|
||||||
|
|
||||||
private List<Project> projects;
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getOriginalId() {
|
|
||||||
return originalId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOriginalId(List<String> originalId) {
|
|
||||||
this.originalId = originalId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<ControlledField> getPid() {
|
|
||||||
return pid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPid(List<ControlledField> pid) {
|
|
||||||
this.pid = pid;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDateofcollection() {
|
|
||||||
return dateofcollection;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDateofcollection(String dateofcollection) {
|
|
||||||
this.dateofcollection = dateofcollection;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Project> getProjects() {
|
|
||||||
return projects;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setProjects(List<Project> projects) {
|
|
||||||
this.projects = projects;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Context;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Result;
|
||||||
|
|
||||||
|
public class CommunityResult extends Result {
|
||||||
|
|
||||||
|
private List<Project> projects;
|
||||||
|
|
||||||
|
private List<Context> context;
|
||||||
|
|
||||||
|
protected List<KeyValue> collectedfrom;
|
||||||
|
|
||||||
|
public List<KeyValue> getCollectedfrom() {
|
||||||
|
return collectedfrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||||
|
this.collectedfrom = collectedfrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Project> getProjects() {
|
||||||
|
return projects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProjects(List<Project> projects) {
|
||||||
|
this.projects = projects;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Context> getContext() {
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setContext(List<Context> context) {
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,11 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
package eu.dnetlib.dhp.schema.dump.oaf.community;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.Funder;
|
import eu.dnetlib.dhp.schema.dump.oaf.Funder;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
public class Project implements Serializable {
|
public class Project implements Serializable {
|
||||||
|
|
||||||
private String id;// OpenAIRE id
|
private String id;// OpenAIRE id
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Constants implements Serializable {
|
||||||
|
// collectedFrom va con isProvidedBy -> becco da ModelSupport
|
||||||
|
|
||||||
|
public static final String HOSTED_BY = "isHostedBy";
|
||||||
|
public static final String HOSTS = "hosts";
|
||||||
|
|
||||||
|
// community result uso isrelatedto
|
||||||
|
|
||||||
|
public static final String RESULT_ENTITY = "result";
|
||||||
|
public static final String DATASOURCE_ENTITY = "datasource";
|
||||||
|
public static final String CONTEXT_ENTITY = "context";
|
||||||
|
|
||||||
|
public static final String CONTEXT_ID = "60";
|
||||||
|
public static final String CONTEXT_NS_PREFIX = "context____";
|
||||||
|
|
||||||
|
}
|
|
@ -1,27 +1,28 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class Funder implements Serializable {
|
public class Funder implements Serializable {
|
||||||
|
|
||||||
private String id ;
|
// private String id ;
|
||||||
|
|
||||||
private String shortName;
|
private String shortName;
|
||||||
|
|
||||||
private String name;
|
private String name;
|
||||||
|
|
||||||
private List<Levels> funding_levels;
|
// private List<Levels> funding_levels;
|
||||||
|
private Fundings funding_stream;
|
||||||
|
|
||||||
private String jurisdiction;
|
private String jurisdiction;
|
||||||
|
|
||||||
public String getId() {
|
// public String getId() {
|
||||||
return id;
|
// return id;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
public void setId(String id) {
|
// public void setId(String id) {
|
||||||
this.id = id;
|
// this.id = id;
|
||||||
}
|
// }
|
||||||
|
|
||||||
public String getShortName() {
|
public String getShortName() {
|
||||||
return shortName;
|
return shortName;
|
||||||
|
@ -39,13 +40,13 @@ public class Funder implements Serializable {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Levels> getFunding_levels() {
|
// public List<Levels> getFunding_levels() {
|
||||||
return funding_levels;
|
// return funding_levels;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
public void setFunding_levels(List<Levels> funding_levels) {
|
// public void setFunding_levels(List<Levels> funding_levels) {
|
||||||
this.funding_levels = funding_levels;
|
// this.funding_levels = funding_levels;
|
||||||
}
|
// }
|
||||||
|
|
||||||
public String getJurisdiction() {
|
public String getJurisdiction() {
|
||||||
return jurisdiction;
|
return jurisdiction;
|
||||||
|
@ -54,4 +55,12 @@ public class Funder implements Serializable {
|
||||||
public void setJurisdiction(String jurisdiction) {
|
public void setJurisdiction(String jurisdiction) {
|
||||||
this.jurisdiction = jurisdiction;
|
this.jurisdiction = jurisdiction;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Fundings getFunding_stream() {
|
||||||
|
return funding_stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFunding_stream(Fundings funding_stream) {
|
||||||
|
this.funding_stream = funding_stream;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Fundings implements Serializable {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
private String description;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,11 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
public class Granted implements Serializable {
|
public class Granted implements Serializable {
|
||||||
private String currency;
|
private String currency;
|
||||||
private String totalcost;
|
private float totalcost;
|
||||||
private String fundedamount;
|
private float fundedamount;
|
||||||
|
|
||||||
public String getCurrency() {
|
public String getCurrency() {
|
||||||
return currency;
|
return currency;
|
||||||
|
@ -15,19 +17,34 @@ public class Granted implements Serializable {
|
||||||
this.currency = currency;
|
this.currency = currency;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTotalcost() {
|
public float getTotalcost() {
|
||||||
return totalcost;
|
return totalcost;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setTotalcost(String totalcost) {
|
public void setTotalcost(float totalcost) {
|
||||||
this.totalcost = totalcost;
|
this.totalcost = totalcost;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFundedamount() {
|
public float getFundedamount() {
|
||||||
return fundedamount;
|
return fundedamount;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFundedamount(String fundedamount) {
|
public void setFundedamount(float fundedamount) {
|
||||||
this.fundedamount = fundedamount;
|
this.fundedamount = fundedamount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Granted newInstance(String currency, float totalcost, float fundedamount) {
|
||||||
|
Granted granted = new Granted();
|
||||||
|
granted.currency = currency;
|
||||||
|
granted.totalcost = totalcost;
|
||||||
|
granted.fundedamount = fundedamount;
|
||||||
|
return granted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Granted newInstance(String currency, float fundedamount) {
|
||||||
|
Granted granted = new Granted();
|
||||||
|
granted.currency = currency;
|
||||||
|
granted.fundedamount = fundedamount;
|
||||||
|
return granted;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
public class Levels implements Serializable {
|
public class Levels implements Serializable {
|
||||||
private String level;
|
private String level;
|
||||||
private String il;
|
private String id;
|
||||||
private String description;
|
private String description;
|
||||||
private String name;
|
private String name;
|
||||||
|
|
||||||
|
@ -16,12 +17,12 @@ public class Levels implements Serializable {
|
||||||
this.level = level;
|
this.level = level;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getIl() {
|
public String getId() {
|
||||||
return il;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setIl(String il) {
|
public void setId(String il) {
|
||||||
this.il = il;
|
this.id = il;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getDescription() {
|
public String getDescription() {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -21,4 +22,11 @@ public class Node implements Serializable {
|
||||||
public void setType(String type) {
|
public void setType(String type) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Node newInstance(String id, String type) {
|
||||||
|
Node node = new Node();
|
||||||
|
node.id = id;
|
||||||
|
node.type = type;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,24 @@
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
|
||||||
|
|
||||||
public class Organization implements Serializable {
|
public class Organization implements Serializable {
|
||||||
private String legalshortname;
|
private String legalshortname;
|
||||||
private String legalname;
|
private String legalname;
|
||||||
private String websiteurl;
|
private String websiteurl;
|
||||||
private List<String> alternativenames;
|
private List<String> alternativenames;
|
||||||
private Country country;
|
private Qualifier country;
|
||||||
private String id;
|
private String id;
|
||||||
private List<ControlledField> pid;
|
private List<ControlledField> pid;
|
||||||
private String collectedfrom;
|
private List<KeyValue> collectedfrom;
|
||||||
|
|
||||||
public String getLegalshortname() {
|
public String getLegalshortname() {
|
||||||
return legalshortname;
|
return legalshortname;
|
||||||
|
@ -49,11 +52,11 @@ public class Organization implements Serializable {
|
||||||
this.alternativenames = alternativenames;
|
this.alternativenames = alternativenames;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Country getCountry() {
|
public Qualifier getCountry() {
|
||||||
return country;
|
return country;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCountry(Country country) {
|
public void setCountry(Qualifier country) {
|
||||||
this.country = country;
|
this.country = country;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,11 +76,11 @@ public class Organization implements Serializable {
|
||||||
this.pid = pid;
|
this.pid = pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCollectedfrom() {
|
public List<KeyValue> getCollectedfrom() {
|
||||||
return collectedfrom;
|
return collectedfrom;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCollectedfrom(String collectedfrom) {
|
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||||
this.collectedfrom = collectedfrom;
|
this.collectedfrom = collectedfrom;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -22,5 +23,10 @@ public class Programme implements Serializable {
|
||||||
this.description = description;
|
this.description = description;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Programme newInstance(String code, String description) {
|
||||||
|
Programme p = new Programme();
|
||||||
|
p.code = code;
|
||||||
|
p.description = description;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
|
||||||
public class Project implements Serializable {
|
public class Project implements Serializable {
|
||||||
private String id;
|
private String id;
|
||||||
|
private List<KeyValue> collectedfrom;
|
||||||
private String websiteurl;
|
private String websiteurl;
|
||||||
private String code;
|
private String code;
|
||||||
private String acronym;
|
private String acronym;
|
||||||
|
@ -27,13 +27,14 @@ public class Project implements Serializable {
|
||||||
|
|
||||||
private boolean openaccessmandatefordataset;
|
private boolean openaccessmandatefordataset;
|
||||||
private List<String> subject;
|
private List<String> subject;
|
||||||
private Funder funding;
|
|
||||||
|
private List<Funder> funding;
|
||||||
|
|
||||||
private String summary;
|
private String summary;
|
||||||
|
|
||||||
private Granted granted;
|
private Granted granted;
|
||||||
|
|
||||||
private Programme programme;
|
private List<Programme> programme;
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
return id;
|
return id;
|
||||||
|
@ -139,11 +140,11 @@ public class Project implements Serializable {
|
||||||
this.subject = subject;
|
this.subject = subject;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Funder getFunding() {
|
public List<Funder> getFunding() {
|
||||||
return funding;
|
return funding;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFunding(Funder funding) {
|
public void setFunding(List<Funder> funding) {
|
||||||
this.funding = funding;
|
this.funding = funding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,11 +164,19 @@ public class Project implements Serializable {
|
||||||
this.granted = granted;
|
this.granted = granted;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Programme getProgramme() {
|
public List<Programme> getProgramme() {
|
||||||
return programme;
|
return programme;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setProgramme(Programme programme) {
|
public void setProgramme(List<Programme> programme) {
|
||||||
this.programme = programme;
|
this.programme = programme;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<KeyValue> getCollectedfrom() {
|
||||||
|
return collectedfrom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCollectedfrom(List<KeyValue> collectedfrom) {
|
||||||
|
this.collectedfrom = collectedfrom;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -21,4 +22,11 @@ public class RelType implements Serializable {
|
||||||
public void setType(String type) {
|
public void setType(String type) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static RelType newInstance(String name, String type) {
|
||||||
|
RelType rel = new RelType();
|
||||||
|
rel.name = name;
|
||||||
|
rel.type = type;
|
||||||
|
return rel;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
|
||||||
|
|
||||||
public class Relation implements Serializable {
|
public class Relation implements Serializable {
|
||||||
private Node source;
|
private Node source;
|
||||||
private Node target;
|
private Node target;
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ResearchCommunity extends ResearchInitiative {
|
||||||
|
private List<String> subject;
|
||||||
|
|
||||||
|
public List<String> getSubject() {
|
||||||
|
return subject;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSubject(List<String> subject) {
|
||||||
|
this.subject = subject;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,61 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.dump.oaf.graph;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class ResearchInitiative implements Serializable {
|
||||||
|
private String id; // openaireId
|
||||||
|
private String originalId; // context id
|
||||||
|
private String name; // context name
|
||||||
|
private String type; // context type: research initiative or research community
|
||||||
|
private String description;
|
||||||
|
private String zenodo_community;
|
||||||
|
|
||||||
|
public String getZenodo_community() {
|
||||||
|
return zenodo_community;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setZenodo_community(String zenodo_community) {
|
||||||
|
this.zenodo_community = zenodo_community;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setType(String type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setName(String label) {
|
||||||
|
this.name = label;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOriginalId() {
|
||||||
|
return originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOriginalId(String originalId) {
|
||||||
|
this.originalId = originalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDescription(String description) {
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class ContextInfo {
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchCommunity;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class SparkCreateContextEntities implements Serializable {
|
||||||
|
|
||||||
|
//leggo i context dall'is e mi faccio la mappa id -> contextinfo
|
||||||
|
|
||||||
|
//creo le entities con le info generali
|
||||||
|
|
||||||
|
//creo le relazioni con le info in projectList e datasourceList. Le relazioni sono di tipo isRelatedTo da una parte e dall'altra
|
||||||
|
|
||||||
|
//prendo un parametro community_organization per creare relazioni fra community ed organizzazioni . n.b. l'id dell'organizzazione
|
||||||
|
//e' non deduplicato => bisogna risolverlo e prendere i dedup id distinti
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkCreateContextEntities.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkCreateContextEntities.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
CommunityMap communityMap = queryInformationSystem.getCommunityMap();
|
||||||
|
|
||||||
|
createEntities(communityMap, outputPath + "/context");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void createEntities(CommunityMap communityMap, String s) {
|
||||||
|
communityMap.keySet().stream()
|
||||||
|
.map(key -> {
|
||||||
|
ResearchInitiative community;
|
||||||
|
ContextInfo cinfo = communityMap.get(key);
|
||||||
|
if(cinfo.getType().equals("community")){
|
||||||
|
community = new ResearchCommunity();
|
||||||
|
}else{
|
||||||
|
community = new ResearchInitiative();
|
||||||
|
}
|
||||||
|
return community;
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class CreateContextRelation {
|
||||||
|
}
|
|
@ -0,0 +1,253 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
public class DumpGraph implements Serializable {
|
||||||
|
|
||||||
|
|
||||||
|
public void run(Boolean isSparkSessionManaged,
|
||||||
|
String inputPath,
|
||||||
|
String outputPath,
|
||||||
|
Class<? extends OafEntity> inputClazz,
|
||||||
|
CommunityMap communityMap) {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
switch (ModelSupport.idPrefixMap.get(inputClazz)){
|
||||||
|
case "50":
|
||||||
|
DumpProducts d = new DumpProducts();
|
||||||
|
d.run(isSparkSessionManaged,inputPath,outputPath,communityMap, inputClazz, true);
|
||||||
|
break;
|
||||||
|
case "40":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
projectMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "20":
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
Utils.removeOutputDir(spark, outputPath);
|
||||||
|
organizationMap(spark, inputPath, outputPath, inputClazz);
|
||||||
|
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void projectMap(SparkSession spark, String inputPath, String outputPath, Class<E> inputClazz) {
|
||||||
|
Utils.readPath(spark, inputPath, inputClazz)
|
||||||
|
.map(p -> mapProject((eu.dnetlib.dhp.schema.oaf.Project)p), Encoders.bean(Project.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression","gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) {
|
||||||
|
Project project = new Project();
|
||||||
|
|
||||||
|
project.setCollectedfrom(Optional.ofNullable(p.getCollectedfrom())
|
||||||
|
.map(cf -> cf.stream().map(coll -> coll.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getId())
|
||||||
|
.ifPresent(id -> project.setId(id));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getWebsiteurl())
|
||||||
|
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getCode())
|
||||||
|
.ifPresent(code -> project.setCode(code.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getAcronym())
|
||||||
|
.ifPresent(acronynim -> project.setAcronym(acronynim.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getTitle())
|
||||||
|
.ifPresent(title -> project.setTitle(title.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getStartdate())
|
||||||
|
.ifPresent(sdate -> project.setStartdate(sdate.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getEnddate())
|
||||||
|
.ifPresent(edate -> project.setEnddate(edate.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getCallidentifier())
|
||||||
|
.ifPresent(cide -> project.setCallidentifier(cide.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getKeywords())
|
||||||
|
.ifPresent(key -> project.setKeywords(key.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getDuration())
|
||||||
|
.ifPresent(duration -> project.setDuration(duration.getValue()));
|
||||||
|
|
||||||
|
Optional<Field<String>> omandate = Optional.ofNullable(p.getOamandatepublications());
|
||||||
|
Optional<Field<String>> oecsc39 = Optional.ofNullable(p.getEcsc39());
|
||||||
|
boolean mandate = false;
|
||||||
|
if(omandate.isPresent()){
|
||||||
|
if (!omandate.get().getValue().equals("N")){
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(oecsc39.isPresent()){
|
||||||
|
if(!oecsc39.get().getValue().equals("N")){
|
||||||
|
mandate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project.setOpenaccessmandateforpublications(mandate);
|
||||||
|
project.setOpenaccessmandatefordataset(false);
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getEcarticle29_3())
|
||||||
|
.ifPresent(oamandate -> project.setOpenaccessmandatefordataset(oamandate.getValue().equals("Y")));
|
||||||
|
|
||||||
|
project.setSubject(Optional.ofNullable(p.getSubjects())
|
||||||
|
.map(subjs -> subjs.stream().map(s -> s.getValue()).collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
Optional.ofNullable(p.getSummary())
|
||||||
|
.ifPresent(summary -> project.setSummary(summary.getValue()));
|
||||||
|
|
||||||
|
Optional<Float> ofundedamount = Optional.ofNullable(p.getFundedamount());
|
||||||
|
Optional<Field<String>> ocurrency = Optional.ofNullable(p.getCurrency());
|
||||||
|
Optional<Float> ototalcost = Optional.ofNullable(p.getTotalcost());
|
||||||
|
|
||||||
|
if(ocurrency.isPresent()){
|
||||||
|
if(ofundedamount.isPresent()){
|
||||||
|
if(ototalcost.isPresent()){
|
||||||
|
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ototalcost.get(), ofundedamount.get()));
|
||||||
|
}else{
|
||||||
|
project.setGranted(Granted.newInstance(ocurrency.get().getValue(), ofundedamount.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
project.setProgramme(Optional.ofNullable(p.getProgramme())
|
||||||
|
.map(programme -> programme.stream().map(pg -> Programme.newInstance(pg.getCode(), pg.getDescription()))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
project.setFunding(Optional.ofNullable(p.getFundingtree())
|
||||||
|
.map(value -> value.stream()
|
||||||
|
.map(fundingtree -> getFunder(fundingtree.getValue())).collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
return project;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Funder getFunder(String fundingtree){
|
||||||
|
|
||||||
|
Funder f = new Funder();
|
||||||
|
final Document doc;
|
||||||
|
try {
|
||||||
|
doc = new SAXReader().read(new StringReader(fundingtree));
|
||||||
|
f.setShortName(((org.dom4j.Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
|
||||||
|
f.setName(((org.dom4j.Node) (doc.selectNodes("//funder/name").get(0))).getText());
|
||||||
|
f.setJurisdiction(((org.dom4j.Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
|
||||||
|
f.setId(((org.dom4j.Node) (doc.selectNodes("//funder/id").get(0))).getText());
|
||||||
|
List<Levels> fundings = new ArrayList<>();
|
||||||
|
int level = 0;
|
||||||
|
List<org.dom4j.Node> nodes = doc.selectNodes("//funding_level_"+level);
|
||||||
|
while(nodes.size() > 0 ) {
|
||||||
|
for(org.dom4j.Node n: nodes) {
|
||||||
|
Levels funding_stream = new Levels();
|
||||||
|
funding_stream.setLevel(String.valueOf(level));
|
||||||
|
List node = n.selectNodes("./name");
|
||||||
|
funding_stream.setName(((org.dom4j.Node)node.get(0)).getText());
|
||||||
|
node = n.selectNodes("./id");
|
||||||
|
funding_stream.setId(((org.dom4j.Node)node.get(0)).getText());
|
||||||
|
node = n.selectNodes("./description");
|
||||||
|
funding_stream.setDescription(((Node)node.get(0)).getText());
|
||||||
|
fundings.add(funding_stream);
|
||||||
|
|
||||||
|
}
|
||||||
|
level += 1;
|
||||||
|
nodes = doc.selectNodes("//funding_level_"+level);
|
||||||
|
}
|
||||||
|
if(fundings.size() > 0 ) {
|
||||||
|
f.setFunding_levels(fundings);
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
|
} catch (DocumentException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return f;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath, Class<E> inputClazz) {
|
||||||
|
Utils.readPath(spark, inputPath,inputClazz)
|
||||||
|
.map(o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization)o), Encoders.bean(Organization.class))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.option("compression","gzip")
|
||||||
|
.json(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org){
|
||||||
|
Organization organization = new Organization();
|
||||||
|
|
||||||
|
Optional.ofNullable(org.getLegalshortname())
|
||||||
|
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(org.getLegalname())
|
||||||
|
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(org.getWebsiteurl())
|
||||||
|
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
||||||
|
|
||||||
|
Optional.ofNullable(org.getAlternativeNames())
|
||||||
|
.ifPresent(value -> organization.setAlternativenames(value.stream()
|
||||||
|
.map( v-> v.getValue()).collect(Collectors.toList())));
|
||||||
|
|
||||||
|
Optional.ofNullable(org.getCountry())
|
||||||
|
.ifPresent(value -> organization.setCountry(Qualifier.newInstance(value.getClassid(), value.getClassname())));
|
||||||
|
|
||||||
|
Optional.ofNullable(org.getId())
|
||||||
|
.ifPresent(value -> organization.setId(value));
|
||||||
|
|
||||||
|
Optional.ofNullable(org.getPid())
|
||||||
|
.ifPresent(value -> organization.setPid(
|
||||||
|
value.stream().map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())).collect(Collectors.toList())
|
||||||
|
));
|
||||||
|
|
||||||
|
organization.setCollectedfrom(Optional.ofNullable(org.getCollectedfrom())
|
||||||
|
.map(value -> value.stream()
|
||||||
|
.map(cf -> KeyValue.newInstance(cf.getKey(),cf.getValue())).collect(Collectors.toList()))
|
||||||
|
.orElse(new ArrayList<>()));
|
||||||
|
|
||||||
|
return organization;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.resulttocommunityfromorganization;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class OrganizationMap extends HashMap<String, List<String>> {
|
||||||
|
|
||||||
|
public OrganizationMap() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> get(String key) {
|
||||||
|
|
||||||
|
if (super.get(key) == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
return super.get(key);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,63 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Element;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
public class QueryInformationSystem {
|
||||||
|
|
||||||
|
private ISLookUpService isLookUp;
|
||||||
|
|
||||||
|
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
|
||||||
|
+
|
||||||
|
" where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
|
||||||
|
" and ($x//context/param[./@name = 'status']/text() = 'manager' or $x//context/param[./@name = 'status']/text() = 'all') "
|
||||||
|
+
|
||||||
|
" return " +
|
||||||
|
"<community> " +
|
||||||
|
"{$x//CONFIGURATION/context/@id}" +
|
||||||
|
"{$x//CONFIGURATION/context/@label}" +
|
||||||
|
"</community>";
|
||||||
|
|
||||||
|
public CommunityMap getCommunityMap()
|
||||||
|
throws ISLookUpException {
|
||||||
|
return getMap(isLookUp.quickSearchProfile(XQUERY));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public ISLookUpService getIsLookUp() {
|
||||||
|
return isLookUp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setIsLookUp(ISLookUpService isLookUpService) {
|
||||||
|
this.isLookUp = isLookUpService;
|
||||||
|
}
|
||||||
|
|
||||||
|
private CommunityMap getMap(List<String> communityMap) {
|
||||||
|
final CommunityMap map = new CommunityMap();
|
||||||
|
|
||||||
|
communityMap.stream().forEach(xml -> {
|
||||||
|
final Document doc;
|
||||||
|
try {
|
||||||
|
doc = new SAXReader().read(new StringReader(xml));
|
||||||
|
Element root = doc.getRootElement();
|
||||||
|
map.put(root.attribute("id").getValue(), root.attribute("label").getValue());
|
||||||
|
} catch (DocumentException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
import org.apache.avro.generic.GenericData;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.DocumentException;
|
||||||
|
import org.dom4j.Node;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import javax.swing.text.html.Option;
|
||||||
|
import javax.xml.bind.annotation.adapters.CollapsedStringAdapter;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
public class SparkDumpJob implements Serializable {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
SparkDumpJob.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
|
||||||
|
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
final String inputPath = parser.get("sourcePath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
final String resultClassName = parser.get("resultTableName");
|
||||||
|
log.info("resultTableName: {}", resultClassName);
|
||||||
|
|
||||||
|
final String isLookUpUrl = parser.get("isLookUpUrl");
|
||||||
|
log.info("isLookUpUrl: {}", isLookUpUrl);
|
||||||
|
|
||||||
|
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
|
||||||
|
|
||||||
|
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
||||||
|
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
||||||
|
CommunityMap communityMap = queryInformationSystem.getCommunityMap();
|
||||||
|
|
||||||
|
DumpGraph dg = new DumpGraph();
|
||||||
|
dg.run(isSparkSessionManaged, inputPath, outputPath, inputClazz, communityMap);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -1,184 +0,0 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.QueryInformationSystem;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.Utils;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.dump.community.SparkDumpCommunityProducts;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.Country;
|
|
||||||
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
public class SparkDumpJob implements Serializable {
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
SparkDumpJob.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/oa/graph/dump/input_graphdump_parameters.json"));
|
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
final String inputPath = parser.get("sourcePath");
|
|
||||||
log.info("inputPath: {}", inputPath);
|
|
||||||
|
|
||||||
final String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath: {}", outputPath);
|
|
||||||
|
|
||||||
final String resultClassName = parser.get("resultTableName");
|
|
||||||
log.info("resultTableName: {}", resultClassName);
|
|
||||||
|
|
||||||
final String isLookUpUrl = parser.get("isLookUpUrl");
|
|
||||||
log.info("isLookUpUrl: {}", isLookUpUrl);
|
|
||||||
|
|
||||||
Class<? extends OafEntity> inputClazz = (Class<? extends OafEntity>) Class.forName(resultClassName);
|
|
||||||
|
|
||||||
QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
|
|
||||||
queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl));
|
|
||||||
CommunityMap communityMap = queryInformationSystem.getCommunityMap();
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
switch (ModelSupport.idPrefixMap.get(inputClazz)){
|
|
||||||
case "50":
|
|
||||||
DumpProducts d = new DumpProducts();
|
|
||||||
d.run(isSparkSessionManaged,inputPath,outputPath,communityMap, inputClazz, true);
|
|
||||||
break;
|
|
||||||
case "40":
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
|
||||||
projectMap(spark, inputPath, outputPath);
|
|
||||||
|
|
||||||
});
|
|
||||||
break;
|
|
||||||
case "20":
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
Utils.removeOutputDir(spark, outputPath);
|
|
||||||
organizationMap(spark, inputPath, outputPath);
|
|
||||||
|
|
||||||
});
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void projectMap(SparkSession spark, String inputPath, String outputPath) {
|
|
||||||
Utils.readPath(spark, inputPath, eu.dnetlib.dhp.schema.oaf.Project.class)
|
|
||||||
.map(p -> mapProject(p), Encoders.bean(Project.class))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression","gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Project mapProject(eu.dnetlib.dhp.schema.oaf.Project p) {
|
|
||||||
Project project = new Project();
|
|
||||||
|
|
||||||
Optional.ofNullable(p.getId())
|
|
||||||
.ifPresent(id -> project.setId(id));
|
|
||||||
|
|
||||||
Optional.ofNullable(p.getWebsiteurl())
|
|
||||||
.ifPresent(w -> project.setWebsiteurl(w.getValue()));
|
|
||||||
|
|
||||||
Optional.ofNullable(p.getCode())
|
|
||||||
.ifPresent(code -> project.setCode(code));
|
|
||||||
|
|
||||||
|
|
||||||
// private String acronym;
|
|
||||||
// private String title;
|
|
||||||
// private String startdate;
|
|
||||||
//
|
|
||||||
// private String enddate;
|
|
||||||
//
|
|
||||||
// private String callidentifier;
|
|
||||||
//
|
|
||||||
// private String keywords;
|
|
||||||
//
|
|
||||||
// private String duration;
|
|
||||||
//
|
|
||||||
// private boolean openaccessmandateforpublications;
|
|
||||||
//
|
|
||||||
// private boolean openaccessmandatefordataset;
|
|
||||||
// private List<String> subject;
|
|
||||||
// private Funder funding;
|
|
||||||
//
|
|
||||||
// private String summary;
|
|
||||||
//
|
|
||||||
// private Granted granted;
|
|
||||||
//
|
|
||||||
// private Programme programme;
|
|
||||||
return project;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void organizationMap(SparkSession spark, String inputPath, String outputPath) {
|
|
||||||
Utils.readPath(spark, inputPath, eu.dnetlib.dhp.schema.oaf.Organization.class)
|
|
||||||
.map(o -> mapOrganization(o), Encoders.bean(Organization.class))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.option("compression","gzip")
|
|
||||||
.json(outputPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org){
|
|
||||||
Organization organization = new Organization();
|
|
||||||
Optional.ofNullable(org.getLegalshortname())
|
|
||||||
.ifPresent(value -> organization.setLegalshortname(value.getValue()));
|
|
||||||
|
|
||||||
Optional.ofNullable(org.getLegalname())
|
|
||||||
.ifPresent(value -> organization.setLegalname(value.getValue()));
|
|
||||||
|
|
||||||
Optional.ofNullable(org.getWebsiteurl())
|
|
||||||
.ifPresent(value -> organization.setWebsiteurl(value.getValue()));
|
|
||||||
|
|
||||||
Optional.ofNullable(org.getAlternativeNames())
|
|
||||||
.ifPresent(value -> organization.setAlternativenames(value.stream()
|
|
||||||
.map( v-> v.getValue()).collect(Collectors.toList())));
|
|
||||||
|
|
||||||
Optional.ofNullable(org.getCountry())
|
|
||||||
.ifPresent(value -> organization.setCountry(Country.newInstance(value.getClassid(), value.getClassname(), null)));
|
|
||||||
|
|
||||||
Optional.ofNullable(org.getId())
|
|
||||||
.ifPresent(value -> organization.setId(value));
|
|
||||||
|
|
||||||
Optional.ofNullable(org.getPid())
|
|
||||||
.ifPresent(value -> organization.setPid(
|
|
||||||
value.stream().map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue())).collect(Collectors.toList())
|
|
||||||
));
|
|
||||||
|
|
||||||
return organization;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class SparkDumpRelationJob {
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class SparkExtractRelationFromEntities {
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class SparkOrganizationRelation {
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
|
||||||
|
|
||||||
|
[
|
||||||
|
|
||||||
|
{
|
||||||
|
"paramName":"is",
|
||||||
|
"paramLongName":"isLookUpUrl",
|
||||||
|
"paramDescription": "URL of the isLookUp Service",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"s",
|
||||||
|
"paramLongName":"sourcePath",
|
||||||
|
"paramDescription": "the path of the sequencial file to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path used to store temporary output files",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "ssm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "true if the spark session is managed, false otherwise",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName":"tn",
|
||||||
|
"paramLongName":"resultTableName",
|
||||||
|
"paramDescription": "the name of the result table we are currently working on",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class DumpOrganizationProjectTest {
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class DumpRelationTest {
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class FunderParsingTest {
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
package eu.dnetlib.dhp.oa.graph.dump.graph;
|
||||||
|
|
||||||
|
public class QueryInformationSystemTest {
|
||||||
|
}
|
Loading…
Reference in New Issue