extended some types by adding provenance as the couple (provenance, trust) and moved some classes to be used by the complete graph dump also

This commit is contained in:
Miriam Baglioni 2020-07-20 17:46:27 +02:00
parent f9ad6f3255
commit e47ea9349c
28 changed files with 644 additions and 84 deletions

View File

@ -3,7 +3,6 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
public class Author implements Serializable {
@ -15,7 +14,7 @@ public class Author implements Serializable {
private Integer rank;
private List<ControlledField> pid;
private List<Pid> pids;
private List<String> affiliation;
@ -51,12 +50,12 @@ public class Author implements Serializable {
this.rank = rank;
}
public List<ControlledField> getPid() {
return pid;
public List<Pid> getPids() {
return pids;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
public void setPids(List<Pid> pids) {
this.pids = pids;
}
public List<String> getAffiliation() {

View File

@ -4,13 +4,13 @@ package eu.dnetlib.dhp.schema.dump.oaf;
import java.util.List;
public class Context extends Qualifier {
private List<String> provenance;
private List<Provenance> provenance;
public List<String> getProvenance() {
public List<Provenance> getProvenance() {
return provenance;
}
public void setProvenance(List<String> provenance) {
public void setProvenance(List<Provenance> provenance) {
this.provenance = provenance;
}
}

View File

@ -3,17 +3,17 @@ package eu.dnetlib.dhp.schema.dump.oaf;
public class Country extends Qualifier {
private String provenance;
private Provenance provenance;
public String getProvenance() {
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public static Country newInstance(String code, String label, String provenance) {
public static Country newInstance(String code, String label, Provenance provenance) {
Country c = new Country();
c.setProvenance(provenance);
c.setCode(code);
@ -21,4 +21,8 @@ public class Country extends Qualifier {
return c;
}
public static Country newInstance(String code, String label, String provenance, String trust){
return newInstance(code, label, Provenance.newInstance(provenance, trust));
}
}

View File

@ -6,15 +6,13 @@ import java.util.Objects;
import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
//ExtraInfo
//ExtraInfo renamed ExternalReference do not confuse with ExternalReference in oaf schema
public class ExternalReference implements Serializable {
private String name;
private String typology;
private String provenance;
private String trust;
private Provenance provenance;
// json containing a Citation or Statistics
private String value;
@ -35,22 +33,14 @@ public class ExternalReference implements Serializable {
this.typology = typology;
}
public String getProvenance() {
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public String getTrust() {
return trust;
}
public void setTrust(String trust) {
this.trust = trust;
}
public String getValue() {
return value;
}
@ -64,8 +54,7 @@ public class ExternalReference implements Serializable {
er.name = ei.getName();
er.typology = ei.getTypology();
er.provenance = ei.getProvenance();
er.trust = ei.getTrust();
er.provenance = Provenance.newInstance(ei.getProvenance(), ei.getTrust());
er.value = ei.getValue();
return er;
}

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
import java.io.Serializable;
import java.util.List;
@ -14,7 +16,7 @@ public abstract class OafEntity extends Oaf implements Serializable {
private String dateofcollection;
private List<Projects> projects;
private List<Project> projects;
public String getId() {
return id;
@ -48,11 +50,11 @@ public abstract class OafEntity extends Oaf implements Serializable {
this.dateofcollection = dateofcollection;
}
public List<Projects> getProjects() {
public List<Project> getProjects() {
return projects;
}
public void setProjects(List<Projects> projects) {
public void setProjects(List<Project> projects) {
this.projects = projects;
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
public class Pid implements Serializable {
private ControlledField pid;
private Provenance provenance;
public ControlledField getPid() {
return pid;
}
public void setPid(ControlledField pid) {
this.pid = pid;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
public class Provenance implements Serializable {
private String provenance;
private String trust;
public String getProvenance() {
return provenance;
}
public void setProvenance(String provenance) {
this.provenance = provenance;
}
public String getTrust() {
return trust;
}
public void setTrust(String trust) {
this.trust = trust;
}
public static Provenance newInstance(String provenance, String trust){
Provenance p = new Provenance();
p.provenance = provenance;
p.trust = trust;
return p;
}
}

View File

@ -16,7 +16,7 @@ public class Result extends OafEntity implements Serializable {
private List<Country> country;
private List<ControlledField> subject;
private List<Subject> subjects;
private String maintitle;
@ -106,12 +106,12 @@ public class Result extends OafEntity implements Serializable {
this.country = country;
}
public List<ControlledField> getSubject() {
return subject;
public List<Subject> getSubjects() {
return subjects;
}
public void setSubject(List<ControlledField> subject) {
this.subject = subject;
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
public String getMaintitle() {

View File

@ -0,0 +1,25 @@
package eu.dnetlib.dhp.schema.dump.oaf;
import java.io.Serializable;
public class Subject implements Serializable {
private ControlledField subject;
private Provenance provenance;
public ControlledField getSubject() {
return subject;
}
public void setSubject(ControlledField subject) {
this.subject = subject;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -1,11 +1,12 @@
package eu.dnetlib.dhp.schema.dump.oaf;
package eu.dnetlib.dhp.schema.dump.oaf.community;
import java.util.List;
import eu.dnetlib.dhp.schema.dump.oaf.Funder;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
import eu.dnetlib.dhp.schema.oaf.Project;
import java.io.Serializable;
public class Projects {
public class Project implements Serializable {
private String id;// OpenAIRE id
private String code;
@ -16,6 +17,16 @@ public class Projects {
private Funder funder;
private Provenance provenance;
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
public String getId() {
return id;
}
@ -56,13 +67,13 @@ public class Projects {
this.funder = funders;
}
public static Projects newInstance(String id, String code, String acronym, String title, Funder funder) {
Projects projects = new Projects();
projects.setAcronym(acronym);
projects.setCode(code);
projects.setFunder(funder);
projects.setId(id);
projects.setTitle(title);
return projects;
public static Project newInstance(String id, String code, String acronym, String title, Funder funder) {
Project project = new Project();
project.setAcronym(acronym);
project.setCode(code);
project.setFunder(funder);
project.setId(id);
project.setTitle(title);
return project;
}
}

View File

@ -0,0 +1,57 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
public class Funder implements Serializable {
private String id ;
private String shortName;
private String name;
private List<Levels> funding_levels;
private String jurisdiction;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getShortName() {
return shortName;
}
public void setShortName(String shortName) {
this.shortName = shortName;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<Levels> getFunding_levels() {
return funding_levels;
}
public void setFunding_levels(List<Levels> funding_levels) {
this.funding_levels = funding_levels;
}
public String getJurisdiction() {
return jurisdiction;
}
public void setJurisdiction(String jurisdiction) {
this.jurisdiction = jurisdiction;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
public class Granted implements Serializable {
private String currency;
private String totalcost;
private String fundedamount;
public String getCurrency() {
return currency;
}
public void setCurrency(String currency) {
this.currency = currency;
}
public String getTotalcost() {
return totalcost;
}
public void setTotalcost(String totalcost) {
this.totalcost = totalcost;
}
public String getFundedamount() {
return fundedamount;
}
public void setFundedamount(String fundedamount) {
this.fundedamount = fundedamount;
}
}

View File

@ -0,0 +1,42 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
public class Levels implements Serializable {
private String level;
private String il;
private String description;
private String name;
public String getLevel() {
return level;
}
public void setLevel(String level) {
this.level = level;
}
public String getIl() {
return il;
}
public void setIl(String il) {
this.il = il;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
public class Node implements Serializable {
private String id;
private String type;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}

View File

@ -0,0 +1,83 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import eu.dnetlib.dhp.schema.dump.oaf.ControlledField;
import eu.dnetlib.dhp.schema.dump.oaf.Country;
import eu.dnetlib.dhp.schema.dump.oaf.community.Project;
import java.io.Serializable;
import java.util.List;
public class Organization implements Serializable {
private String legalshortname;
private String legalname;
private String websiteurl;
private List<String> alternativenames;
private Country country;
private String id;
private List<ControlledField> pid;
private String collectedfrom;
public String getLegalshortname() {
return legalshortname;
}
public void setLegalshortname(String legalshortname) {
this.legalshortname = legalshortname;
}
public String getLegalname() {
return legalname;
}
public void setLegalname(String legalname) {
this.legalname = legalname;
}
public String getWebsiteurl() {
return websiteurl;
}
public void setWebsiteurl(String websiteurl) {
this.websiteurl = websiteurl;
}
public List<String> getAlternativenames() {
return alternativenames;
}
public void setAlternativenames(List<String> alternativenames) {
this.alternativenames = alternativenames;
}
public Country getCountry() {
return country;
}
public void setCountry(Country country) {
this.country = country;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<ControlledField> getPid() {
return pid;
}
public void setPid(List<ControlledField> pid) {
this.pid = pid;
}
public String getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(String collectedfrom) {
this.collectedfrom = collectedfrom;
}
}

View File

@ -0,0 +1,26 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
public class Programme implements Serializable {
private String code;
private String description;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
}

View File

@ -0,0 +1,38 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
import java.util.List;
public class Project implements Serializable {
private String id;
private String websiteurl;
private String code;
private String acronym;
private String title;
private String startdate;
private String enddate;
private String callidentifier;
private String keywords;
private String duration;
private boolean openaccessmandateforpublications;
private boolean openaccessmandatefordataset;
private List<String> subject;
private Funder funding;
private String summary;
private Granted granted;
private Programme programme;
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import java.io.Serializable;
public class RelType implements Serializable {
private String name ; //relclass
private String type ; //subreltype
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}

View File

@ -0,0 +1,44 @@
package eu.dnetlib.dhp.schema.dump.oaf.graph;
import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
import java.io.Serializable;
public class Relation implements Serializable {
private Node source;
private Node target;
private RelType reltype;
private Provenance provenance;
public Node getSource() {
return source;
}
public void setSource(Node source) {
this.source = source;
}
public Node getTarget() {
return target;
}
public void setTarget(Node target) {
this.target = target;
}
public RelType getReltype() {
return reltype;
}
public void setReltype(RelType reltype) {
this.reltype = reltype;
}
public Provenance getProvenance() {
return provenance;
}
public void setProvenance(Provenance provenance) {
this.provenance = provenance;
}
}

View File

@ -0,0 +1,84 @@
package eu.dnetlib.dhp.oa.graph.dump.community;
import eu.dnetlib.dhp.oa.graph.dump.ResultMapper;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
public class DumpProducts implements Serializable {
public void run(Boolean isSparkSessionManaged, String inputPath, String outputPath, CommunityMap communityMap, Class<? extends Result> inputClazz, boolean graph) {
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
Utils.removeOutputDir(spark, outputPath);
execDump(spark, inputPath, outputPath, communityMap, inputClazz, graph);// , dumpClazz);
});
}
public static <I extends Result, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> void execDump(SparkSession spark,
String inputPath,
String outputPath,
CommunityMap communityMap,
Class<I> inputClazz,
boolean graph) {
Dataset<I> tmp = Utils.readPath(spark, inputPath, inputClazz);
tmp
.map(value -> execMap(value, communityMap, graph), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.Result.class))
.filter(Objects::nonNull)
.write()
.mode(SaveMode.Overwrite)
.option("compression", "gzip")
.json(outputPath);
}
private static <I extends Result> eu.dnetlib.dhp.schema.dump.oaf.Result execMap(I value,
CommunityMap communityMap,
boolean graph) {
if (!graph) {
Set<String> communities = communityMap.keySet();
Optional<List<Context>> inputContext = Optional.ofNullable(value.getContext());
if (!inputContext.isPresent()) {
return null;
}
List<String> toDumpFor = inputContext.get().stream().map(c -> {
if (communities.contains(c.getId())) {
return c.getId();
}
if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
return c.getId().substring(0, 3);
}
return null;
}).filter(Objects::nonNull).collect(Collectors.toList());
if (toDumpFor.size() == 0) {
return null;
}
}
return ResultMapper.map(value, communityMap);
}
}

View File

@ -1,15 +1,15 @@
package eu.dnetlib.dhp.oa.graph.dump;
package eu.dnetlib.dhp.oa.graph.dump.community;
import java.io.StringReader;
import java.util.List;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

View File

@ -5,10 +5,6 @@ import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
import javax.swing.text.html.Option;
import org.apache.avro.generic.GenericData;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.*;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
@ -166,17 +162,9 @@ public class Mapper implements Serializable {
community_id = community_id.substring(0, community_id.indexOf("::"));
}
if (communities.contains(community_id)) {
// || communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
Context context = new Context();
context.setCode(community_id);
context.setLabel(communityMap.get(community_id));
// if (!communityMap.containsKey(c.getId())) {
// context.setCode(c.getId().substring(0, c.getId().indexOf("::")));
// context.setLabel(communityMap.get(context.getCode()));
// } else {
// context.setCode(c.getId());
// context.setLabel(communityMap.get(c.getId()));
// }
Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
if (dataInfo.isPresent()) {
List<String> provenance = new ArrayList<>();
@ -368,16 +356,18 @@ public class Mapper implements Serializable {
.ofNullable(input.getSource())
.ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
List<ControlledField> subjectList = new ArrayList<>();
Optional
.ofNullable(input.getSubject())
.ifPresent(
value -> value
.stream()
.forEach(
s -> subjectList
.add(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()))));
out.setSubject(subjectList);
List<Subject> subjectList = new ArrayList<>();
Optional.ofNullable(input.getSubject())
.ifPresent(value -> value
.forEach(s->subjectList.add(getSubject(s))));
List<ExternalReference> erList = new ArrayList<>();
Optional.ofNullable(input.getExtraInfo())
.ifPresent(value -> value.forEach(
er -> erList.add(ExternalReference.newInstance(er))
));
out.setSubjects(subjectList);
out.setType(input.getResulttype().getClassid());
}
@ -385,6 +375,19 @@ public class Mapper implements Serializable {
return out;
}
private static Subject getSubject(StructuredProperty s){
Subject subject = new Subject();
subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
Optional<DataInfo> di = Optional.of(s.getDataInfo());
Provenance p = new Provenance();
if (di.isPresent()){
p.setProvenance(di.get().getProvenanceaction().getClassname());
p.setTrust(di.get().getTrust());
subject.setProvenance(p);
}
return subject;
}
private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
Author a = new Author();
Optional
@ -404,12 +407,26 @@ public class Mapper implements Serializable {
.ofNullable(oa.getPid())
.ifPresent(
value -> a
.setPid(
.setPids(
value
.stream()
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
.map(p -> getPid(p))
.collect(Collectors.toList())));
return a;
}
private static Pid getPid(StructuredProperty p){
Pid pid = new Pid();
pid.setPid(ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()));
Optional<DataInfo> di = Optional.of(p.getDataInfo());
Provenance provenance = new Provenance();
if (di.isPresent()){
provenance.setProvenance(di.get().getProvenanceaction().getClassname());
provenance.setTrust(di.get().getTrust());
pid.setProvenance(provenance);
}
return pid;
}
}

View File

@ -57,21 +57,12 @@ public class SparkDumpCommunityProducts implements Serializable {
final String resultClassName = parser.get("resultTableName");
log.info("resultTableName: {}", resultClassName);
// final String dumpClassName = parser.get("dumpTableName");
// log.info("dumpClassName: {}", dumpClassName);
final String isLookUpUrl = parser.get("isLookUpUrl");
log.info("isLookUpUrl: {}", isLookUpUrl);
// final String resultType = parser.get("resultType");
// log.info("resultType: {}", resultType);
final Optional<String> cm = Optional.ofNullable(parser.get("communityMap"));
Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
// Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result> dumpClazz = (Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result>) Class
// .forName(dumpClassName);
SparkConf conf = new SparkConf();
CommunityMap communityMap;

View File

@ -0,0 +1,4 @@
package eu.dnetlib.dhp.oa.graph.dump.graph;
public class DumpOrganization {
}

View File

@ -0,0 +1,4 @@
package eu.dnetlib.dhp.oa.graph.dump.graph;
public class OrganizationMapper {
}

View File

@ -0,0 +1,4 @@
package eu.dnetlib.dhp.oa.graph.dump.graph;
public class SparkDumpJob {
}