[EOSC TAG] refactoring after compilation

This commit is contained in:
Miriam Baglioni 2022-07-21 14:45:43 +02:00
parent e61b8e6b03
commit 3be036f290
2 changed files with 178 additions and 131 deletions

View File

@ -29,7 +29,6 @@ public class SparkEoscTag {
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
@ -63,15 +62,16 @@ public class SparkEoscTag {
});
}
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics){
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics) {
EoscIfGuidelines eig = new EoscIfGuidelines();
eig.setCode( code);
eig.setCode(code);
eig.setLabel(label);
eig.setUrl(url);
eig.setSemanticRelation(semantics);
return eig;
}
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
readPath(spark, inputPath + "/software", Software.class)
@ -80,14 +80,17 @@ public class SparkEoscTag {
if (containsCriteriaNotebook(s)) {
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>());
addEIG(s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "", "compliesWith");
addEIG(
s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "",
"compliesWith");
}
if (containsCriteriaGalaxy(s)) {
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
s.setEoscifguidelines(new ArrayList<>());
addEIG(s.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
addEIG(
s.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
}
return s;
}, Encoders.bean(Software.class))
@ -109,10 +112,12 @@ public class SparkEoscTag {
orp.setEoscifguidelines(new ArrayList<>());
if (containsCriteriaGalaxy(orp)) {
addEIG(orp.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
addEIG(
orp.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "",
"compliesWith");
}
if (containscriteriaTwitter(orp)) {
addEIG(orp.getEoscifguidelines(),"EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
}
return orp;
}, Encoders.bean(OtherResearchProduct.class))
@ -133,7 +138,7 @@ public class SparkEoscTag {
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
d.setEoscifguidelines(new ArrayList<>());
if (containscriteriaTwitter(d)) {
addEIG(d.getEoscifguidelines(),"EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
addEIG(d.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
}
return d;
}, Encoders.bean(Dataset.class))
@ -149,12 +154,12 @@ public class SparkEoscTag {
.json(inputPath + "/dataset");
}
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url, String sem) {
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
String sem) {
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
eoscifguidelines.add(newInstance(code, label, url, sem));
}
private static boolean containscriteriaTwitter(Result r) {
Set<String> words = getWordsSP(r.getTitle());
words.addAll(getWordsF(r.getDescription()));

View File

@ -126,9 +126,12 @@ public class EOSCTagJobTest {
.assertEquals(
4,
tmp
.filter(s -> s.getEoscifguidelines()!= null)
.filter(s -> s.getEoscifguidelines() != null)
.filter(
s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
@ -186,12 +189,13 @@ public class EOSCTagJobTest {
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions.assertTrue(tmp
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
.collect()
.get(0)
.getEoscifguidelines() == null
);
.getEoscifguidelines() == null);
Assertions
.assertEquals(
@ -245,13 +249,13 @@ public class EOSCTagJobTest {
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions.assertTrue(
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
.collect()
.get(0)
.getEoscifguidelines() == null
);
.getEoscifguidelines() == null);
Assertions
.assertEquals(
@ -270,14 +274,18 @@ public class EOSCTagJobTest {
.getSubject()
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
Assertions.assertEquals(1,
Assertions
.assertEquals(
1,
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
.getEoscifguidelines()
.size());
Assertions.assertTrue(tmp
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
.collect()
.get(0)
@ -320,7 +328,10 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
.filter(
ds -> ds.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
ds -> ds
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
@ -336,7 +347,10 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter(
orp -> orp.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
orp -> orp
.getSubject()
.stream()
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
.count());
Assertions
@ -345,7 +359,10 @@ public class EOSCTagJobTest {
.textFile(workingDir.toString() + "/input/otherresearchproduct")
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
.filter(
orp -> orp.getSubject().stream().anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
orp -> orp
.getSubject()
.stream()
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
.count());
// spark.stop();
@ -417,15 +434,19 @@ public class EOSCTagJobTest {
1,
tmp
.filter(
s -> s.getEoscifguidelines()!=null)
s -> s.getEoscifguidelines() != null)
.count());
Assertions
.assertEquals(
1,
tmp
.filter(
s -> s.getEoscifguidelines()!=null)
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
s -> s.getEoscifguidelines() != null)
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
@ -446,18 +467,23 @@ public class EOSCTagJobTest {
.stream()
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
Assertions.assertEquals(1, tmp
Assertions
.assertEquals(
1, tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.size() );
Assertions.assertTrue(tmp
.size());
Assertions
.assertTrue(
tmp
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
.collect()
.get(0)
.getEoscifguidelines()
.stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
Assertions
.assertEquals(
@ -499,10 +525,18 @@ public class EOSCTagJobTest {
.filter(
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
.count());
orp.foreach(o-> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
Assertions.assertEquals(1, orp.filter(o -> o.getEoscifguidelines() != null)
.filter(o -> o.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow"))).count());
Assertions
.assertEquals(
1, orp
.filter(o -> o.getEoscifguidelines() != null)
.filter(
o -> o
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
.count());
Assertions
.assertEquals(
@ -654,7 +688,11 @@ public class EOSCTagJobTest {
.assertEquals(
3,
orp
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count());
JavaRDD<Dataset> dats = sc
@ -667,7 +705,11 @@ public class EOSCTagJobTest {
.assertEquals(
3,
dats
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.filter(
s -> s
.getEoscifguidelines()
.stream()
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
.count());
}