forked from D-Net/dnet-hadoop
[EOSC TAG] refactoring after compilation
This commit is contained in:
parent
e61b8e6b03
commit
3be036f290
|
@ -29,7 +29,6 @@ public class SparkEoscTag {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
|
@ -72,6 +71,7 @@ public class SparkEoscTag {
|
||||||
return eig;
|
return eig;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
|
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
|
||||||
|
|
||||||
readPath(spark, inputPath + "/software", Software.class)
|
readPath(spark, inputPath + "/software", Software.class)
|
||||||
|
@ -80,14 +80,17 @@ public class SparkEoscTag {
|
||||||
if (containsCriteriaNotebook(s)) {
|
if (containsCriteriaNotebook(s)) {
|
||||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||||
s.setEoscifguidelines(new ArrayList<>());
|
s.setEoscifguidelines(new ArrayList<>());
|
||||||
addEIG(s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "", "compliesWith");
|
addEIG(
|
||||||
|
s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "",
|
||||||
|
"compliesWith");
|
||||||
|
|
||||||
}
|
}
|
||||||
if (containsCriteriaGalaxy(s)) {
|
if (containsCriteriaGalaxy(s)) {
|
||||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||||
s.setEoscifguidelines(new ArrayList<>());
|
s.setEoscifguidelines(new ArrayList<>());
|
||||||
|
|
||||||
addEIG(s.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
addEIG(
|
||||||
|
s.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}, Encoders.bean(Software.class))
|
}, Encoders.bean(Software.class))
|
||||||
|
@ -109,7 +112,9 @@ public class SparkEoscTag {
|
||||||
orp.setEoscifguidelines(new ArrayList<>());
|
orp.setEoscifguidelines(new ArrayList<>());
|
||||||
|
|
||||||
if (containsCriteriaGalaxy(orp)) {
|
if (containsCriteriaGalaxy(orp)) {
|
||||||
addEIG(orp.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
addEIG(
|
||||||
|
orp.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "",
|
||||||
|
"compliesWith");
|
||||||
}
|
}
|
||||||
if (containscriteriaTwitter(orp)) {
|
if (containscriteriaTwitter(orp)) {
|
||||||
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||||
|
@ -149,12 +154,12 @@ public class SparkEoscTag {
|
||||||
.json(inputPath + "/dataset");
|
.json(inputPath + "/dataset");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url, String sem) {
|
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
|
||||||
|
String sem) {
|
||||||
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
|
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
|
||||||
eoscifguidelines.add(newInstance(code, label, url, sem));
|
eoscifguidelines.add(newInstance(code, label, url, sem));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static boolean containscriteriaTwitter(Result r) {
|
private static boolean containscriteriaTwitter(Result r) {
|
||||||
Set<String> words = getWordsSP(r.getTitle());
|
Set<String> words = getWordsSP(r.getTitle());
|
||||||
words.addAll(getWordsF(r.getDescription()));
|
words.addAll(getWordsF(r.getDescription()));
|
||||||
|
|
|
@ -128,7 +128,10 @@ public class EOSCTagJobTest {
|
||||||
tmp
|
tmp
|
||||||
.filter(s -> s.getEoscifguidelines() != null)
|
.filter(s -> s.getEoscifguidelines() != null)
|
||||||
.filter(
|
.filter(
|
||||||
s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
|
@ -186,12 +189,13 @@ public class EOSCTagJobTest {
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||||
|
|
||||||
Assertions.assertTrue(tmp
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getEoscifguidelines() == null
|
.getEoscifguidelines() == null);
|
||||||
);
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -245,13 +249,13 @@ public class EOSCTagJobTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||||
Assertions.assertTrue(
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
tmp
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getEoscifguidelines() == null
|
.getEoscifguidelines() == null);
|
||||||
);
|
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -270,14 +274,18 @@ public class EOSCTagJobTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||||
Assertions.assertEquals(1,
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
tmp
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getEoscifguidelines()
|
.getEoscifguidelines()
|
||||||
.size());
|
.size());
|
||||||
Assertions.assertTrue(tmp
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
|
@ -320,7 +328,10 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/dataset")
|
.textFile(workingDir.toString() + "/input/dataset")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||||
.filter(
|
.filter(
|
||||||
ds -> ds.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
ds -> ds
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
|
@ -336,7 +347,10 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||||
.filter(
|
.filter(
|
||||||
orp -> orp.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
orp -> orp
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
|
@ -345,7 +359,10 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||||
.filter(
|
.filter(
|
||||||
orp -> orp.getSubject().stream().anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
|
orp -> orp
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
// spark.stop();
|
// spark.stop();
|
||||||
|
@ -425,7 +442,11 @@ public class EOSCTagJobTest {
|
||||||
tmp
|
tmp
|
||||||
.filter(
|
.filter(
|
||||||
s -> s.getEoscifguidelines() != null)
|
s -> s.getEoscifguidelines() != null)
|
||||||
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
.filter(
|
||||||
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
|
@ -446,18 +467,23 @@ public class EOSCTagJobTest {
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||||
|
|
||||||
Assertions.assertEquals(1, tmp
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getEoscifguidelines()
|
.getEoscifguidelines()
|
||||||
.size());
|
.size());
|
||||||
Assertions.assertTrue(tmp
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getEoscifguidelines()
|
.getEoscifguidelines()
|
||||||
.stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -501,8 +527,16 @@ public class EOSCTagJobTest {
|
||||||
.count());
|
.count());
|
||||||
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||||
|
|
||||||
Assertions.assertEquals(1, orp.filter(o -> o.getEoscifguidelines() != null)
|
Assertions
|
||||||
.filter(o -> o.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow"))).count());
|
.assertEquals(
|
||||||
|
1, orp
|
||||||
|
.filter(o -> o.getEoscifguidelines() != null)
|
||||||
|
.filter(
|
||||||
|
o -> o
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||||
|
.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -654,7 +688,11 @@ public class EOSCTagJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3,
|
3,
|
||||||
orp
|
orp
|
||||||
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
.filter(
|
||||||
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
JavaRDD<Dataset> dats = sc
|
JavaRDD<Dataset> dats = sc
|
||||||
|
@ -667,7 +705,11 @@ public class EOSCTagJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3,
|
3,
|
||||||
dats
|
dats
|
||||||
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
.filter(
|
||||||
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue