[EOSC TAG] refactoring after compilation

This commit is contained in:
Miriam Baglioni 2022-07-21 14:45:43 +02:00
parent e61b8e6b03
commit 3be036f290
2 changed files with 178 additions and 131 deletions

View File

@ -29,7 +29,6 @@ public class SparkEoscTag {
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class); private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
@ -72,6 +71,7 @@ public class SparkEoscTag {
return eig; return eig;
} }
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) { private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
readPath(spark, inputPath + "/software", Software.class) readPath(spark, inputPath + "/software", Software.class)
@ -80,14 +80,17 @@ public class SparkEoscTag {
if (containsCriteriaNotebook(s)) { if (containsCriteriaNotebook(s)) {
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent()) if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>()); s.setEoscifguidelines(new ArrayList<>());
addEIG(s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "", "compliesWith"); addEIG(
s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "",
"compliesWith");
} }
if (containsCriteriaGalaxy(s)) { if (containsCriteriaGalaxy(s)) {
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent()) if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>()); s.setEoscifguidelines(new ArrayList<>());
addEIG(s.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith"); addEIG(
s.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
} }
return s; return s;
}, Encoders.bean(Software.class)) }, Encoders.bean(Software.class))
@ -109,7 +112,9 @@ public class SparkEoscTag {
orp.setEoscifguidelines(new ArrayList<>()); orp.setEoscifguidelines(new ArrayList<>());
if (containsCriteriaGalaxy(orp)) { if (containsCriteriaGalaxy(orp)) {
addEIG(orp.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith"); addEIG(
orp.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "",
"compliesWith");
} }
if (containscriteriaTwitter(orp)) { if (containscriteriaTwitter(orp)) {
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith"); addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
@ -149,12 +154,12 @@ public class SparkEoscTag {
.json(inputPath + "/dataset"); .json(inputPath + "/dataset");
} }
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url, String sem) { private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
String sem) {
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code))) if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
eoscifguidelines.add(newInstance(code, label, url, sem)); eoscifguidelines.add(newInstance(code, label, url, sem));
} }
private static boolean containscriteriaTwitter(Result r) { private static boolean containscriteriaTwitter(Result r) {
Set<String> words = getWordsSP(r.getTitle()); Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription())); words.addAll(getWordsF(r.getDescription()));

View File

@ -128,7 +128,10 @@ public class EOSCTagJobTest {
tmp tmp
.filter(s -> s.getEoscifguidelines() != null) .filter(s -> s.getEoscifguidelines() != null)
.filter( .filter(
s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook"))) s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count()); .count());
Assertions Assertions
@ -186,12 +189,13 @@ public class EOSCTagJobTest {
.stream() .stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook"))); .anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions.assertTrue(tmp Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11")) .filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect() .collect()
.get(0) .get(0)
.getEoscifguidelines() == null .getEoscifguidelines() == null);
);
Assertions Assertions
.assertEquals( .assertEquals(
@ -245,13 +249,13 @@ public class EOSCTagJobTest {
.getSubject() .getSubject()
.stream() .stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook"))); .anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions.assertTrue( Assertions
.assertTrue(
tmp tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589")) .filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect() .collect()
.get(0) .get(0)
.getEoscifguidelines() == null .getEoscifguidelines() == null);
);
Assertions Assertions
.assertEquals( .assertEquals(
@ -270,14 +274,18 @@ public class EOSCTagJobTest {
.getSubject() .getSubject()
.stream() .stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook"))); .anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions.assertEquals(1, Assertions
.assertEquals(
1,
tmp tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0")) .filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect() .collect()
.get(0) .get(0)
.getEoscifguidelines() .getEoscifguidelines()
.size()); .size());
Assertions.assertTrue(tmp Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0")) .filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect() .collect()
.get(0) .get(0)
@ -320,7 +328,10 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/dataset") .textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)) .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.filter( .filter(
ds -> ds.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook"))) ds -> ds
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count()); .count());
Assertions Assertions
@ -336,7 +347,10 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/otherresearchproduct") .textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)) .map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter( .filter(
orp -> orp.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook"))) orp -> orp
.getSubject()
.stream()
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
.count()); .count());
Assertions Assertions
@ -345,7 +359,10 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/otherresearchproduct") .textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)) .map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter( .filter(
orp -> orp.getSubject().stream().anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook"))) orp -> orp
.getSubject()
.stream()
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
.count()); .count());
// spark.stop(); // spark.stop();
@ -425,7 +442,11 @@ public class EOSCTagJobTest {
tmp tmp
.filter( .filter(
s -> s.getEoscifguidelines() != null) s -> s.getEoscifguidelines() != null)
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow"))) .filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count()); .count());
Assertions Assertions
@ -446,18 +467,23 @@ public class EOSCTagJobTest {
.stream() .stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow"))); .anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions.assertEquals(1, tmp Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4")) .filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect() .collect()
.get(0) .get(0)
.getEoscifguidelines() .getEoscifguidelines()
.size()); .size());
Assertions.assertTrue(tmp Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4")) .filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect() .collect()
.get(0) .get(0)
.getEoscifguidelines() .getEoscifguidelines()
.stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow"))); .stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
Assertions Assertions
.assertEquals( .assertEquals(
@ -501,8 +527,16 @@ public class EOSCTagJobTest {
.count()); .count());
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o))); orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
Assertions.assertEquals(1, orp.filter(o -> o.getEoscifguidelines() != null) Assertions
.filter(o -> o.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow"))).count()); .assertEquals(
1, orp
.filter(o -> o.getEoscifguidelines() != null)
.filter(
o -> o
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count());
Assertions Assertions
.assertEquals( .assertEquals(
@ -654,7 +688,11 @@ public class EOSCTagJobTest {
.assertEquals( .assertEquals(
3, 3,
orp orp
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data"))) .filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count()); .count());
JavaRDD<Dataset> dats = sc JavaRDD<Dataset> dats = sc
@ -667,7 +705,11 @@ public class EOSCTagJobTest {
.assertEquals( .assertEquals(
3, 3,
dats dats
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data"))) .filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count()); .count());
} }