From 1069cf243a1368ccc118b614ddb634915c8a2fc1 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 22 Sep 2020 14:38:00 +0200 Subject: [PATCH] modification to the schema to consider the H2020classification of the programme. The filed Programme has been moved inside the H2020classification that is now associated to the Project. Programme is no more associated directly to the Project but via H2020CLassification --- .../{Programme.java => H2020Programme.java} | 6 +- .../dhp/schema/oaf/H2020classification.java | 82 +++++++++++++++++++ .../eu/dnetlib/dhp/schema/oaf/Project.java | 39 +++++++-- ...st.java => PrepareH2020ProgrammeTest.java} | 26 +++++- 4 files changed, 140 insertions(+), 13 deletions(-) rename dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/{Programme.java => H2020Programme.java} (77%) create mode 100644 dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020classification.java rename dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/{PrepareProgrammeTest.java => PrepareH2020ProgrammeTest.java} (68%) diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java similarity index 77% rename from dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java index 00dc32fbc..7478aa16e 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Programme.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020Programme.java @@ -4,7 +4,7 @@ package eu.dnetlib.dhp.schema.oaf; import java.io.Serializable; import java.util.Objects; -public class Programme implements Serializable { +public class H2020Programme implements Serializable { private String code; private String description; @@ -31,8 +31,8 @@ public class Programme implements Serializable { if (o == null || getClass() != o.getClass()) return false; - Programme programme = (Programme) o; - return Objects.equals(code, programme.code); + H2020Programme h2020Programme = (H2020Programme) o; + return Objects.equals(code, h2020Programme.code); } } diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020classification.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020classification.java new file mode 100644 index 000000000..cebe3f427 --- /dev/null +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/H2020classification.java @@ -0,0 +1,82 @@ +package eu.dnetlib.dhp.schema.oaf; + +import java.io.Serializable; +import java.util.Objects; + +public class H2020classification implements Serializable { + private H2020Programme h2020Programme; + private String level1; + private String level2; + private String level3; + + private String classification; + + + public H2020Programme getH2020Programme() { + return h2020Programme; + } + + public void setH2020Programme(H2020Programme h2020Programme) { + this.h2020Programme = h2020Programme; + } + + + public String getLevel1() { + return level1; + } + + public void setLevel1(String level1) { + this.level1 = level1; + } + + public String getLevel2() { + return level2; + } + + public void setLevel2(String level2) { + this.level2 = level2; + } + + public String getLevel3() { + return level3; + } + + public void setLevel3(String level3) { + this.level3 = level3; + } + + public String getClassification() { + return classification; + } + + public void setClassification(String classification) { + this.classification = classification; + } + + public void setLevels() { + String[] tmp = classification.split(" $ "); + level1 = tmp[0]; + if(tmp.length > 1){ + level2 = tmp[1]; + } + if(tmp.length > 2){ + level3 = tmp[2]; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + H2020classification h2020classification = (H2020classification)o; + + return Objects.equals(level1, h2020classification.level1) && + Objects.equals(level2, h2020classification.level2) && + Objects.equals(level3, h2020classification.level3) && + Objects.equals(classification, h2020classification.classification) && + h2020Programme.equals(h2020classification.h2020Programme); + } +} diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java index 1fcfb305e..ce0baa074 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Project.java @@ -58,7 +58,26 @@ public class Project extends OafEntity implements Serializable { private Float fundedamount; - private List programme; + private String topic; + + private List h2020classification; + + + public String getTopic() { + return topic; + } + + public void setTopic(String topic) { + this.topic = topic; + } + + public List getH2020classification() { + return h2020classification; + } + + public void setH2020classification(List h2020classification) { + this.h2020classification = h2020classification; + } public Field getWebsiteurl() { return websiteurl; @@ -268,13 +287,13 @@ public class Project extends OafEntity implements Serializable { this.fundedamount = fundedamount; } - public List getProgramme() { - return programme; - } - - public void setProgramme(List programme) { - this.programme = programme; - } +// public List getProgramme() { +// return programme; +// } +// +// public void setProgramme(List programme) { +// this.programme = programme; +// } @Override public void mergeFrom(OafEntity e) { @@ -331,7 +350,9 @@ public class Project extends OafEntity implements Serializable { ? p.getFundedamount() : fundedamount; - programme = mergeLists(programme, p.getProgramme()); + //programme = mergeLists(programme, p.getProgramme()); + + h2020classification = mergeLists(h2020classification, p.getH2020classification()); mergeOAFDataInfo(e); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java similarity index 68% rename from dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 7f890a8a3..34e5c9729 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -5,10 +5,12 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; @@ -88,7 +90,29 @@ public class PrepareProgrammeTest { Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); - Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); + Assertions.assertEquals(0, verificationDataset.filter("title =''").count()); + + Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count()); + + Assertions.assertEquals("Societal challenges $ Smart, Green And Integrated Transport $ CLEANSKY2 $ IADP Fast Rotorcraft", + verificationDataset.filter("code = 'H2020-EU.3.4.5.3.'").select("classification").collectAsList() + .get(0).getString(0)); + + + Assertions.assertEquals("Euratom $ Indirect actions $ European Fusion Development Agreement", + verificationDataset.filter("code = 'H2020-Euratom-1.9.'").select("classification").collectAsList() + .get(0).getString(0)); + + + Assertions.assertEquals("Industrial leadership $ Leadership in enabling and industrial technologies $ Advanced manufacturing and processing $ New sustainable business models", + verificationDataset.filter("code = 'H2020-EU.2.1.5.4.'").select("classification").collectAsList() + .get(0).getString(0)); + + Assertions.assertEquals("Excellent science $ Future and Emerging Technologies (FET) $ FET Open", + verificationDataset.filter("code = 'H2020-EU.1.2.1.'").select("classification").collectAsList() + .get(0).getString(0)); + + } }