forked from antonis.lempesis/dnet-hadoop
[EOSC TAG] refactoring after compilation
This commit is contained in:
parent
e61b8e6b03
commit
3be036f290
|
@ -29,7 +29,6 @@ public class SparkEoscTag {
|
|||
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
|
@ -63,15 +62,16 @@ public class SparkEoscTag {
|
|||
});
|
||||
}
|
||||
|
||||
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics){
|
||||
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics) {
|
||||
EoscIfGuidelines eig = new EoscIfGuidelines();
|
||||
eig.setCode( code);
|
||||
eig.setCode(code);
|
||||
eig.setLabel(label);
|
||||
eig.setUrl(url);
|
||||
eig.setSemanticRelation(semantics);
|
||||
return eig;
|
||||
|
||||
}
|
||||
|
||||
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
|
||||
|
||||
readPath(spark, inputPath + "/software", Software.class)
|
||||
|
@ -80,14 +80,17 @@ public class SparkEoscTag {
|
|||
if (containsCriteriaNotebook(s)) {
|
||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||
s.setEoscifguidelines(new ArrayList<>());
|
||||
addEIG(s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "", "compliesWith");
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "",
|
||||
"compliesWith");
|
||||
|
||||
}
|
||||
if (containsCriteriaGalaxy(s)) {
|
||||
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||
s.setEoscifguidelines(new ArrayList<>());
|
||||
|
||||
addEIG(s.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
||||
addEIG(
|
||||
s.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
||||
}
|
||||
return s;
|
||||
}, Encoders.bean(Software.class))
|
||||
|
@ -109,10 +112,12 @@ public class SparkEoscTag {
|
|||
orp.setEoscifguidelines(new ArrayList<>());
|
||||
|
||||
if (containsCriteriaGalaxy(orp)) {
|
||||
addEIG(orp.getEoscifguidelines(),"EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
||||
addEIG(
|
||||
orp.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "",
|
||||
"compliesWith");
|
||||
}
|
||||
if (containscriteriaTwitter(orp)) {
|
||||
addEIG(orp.getEoscifguidelines(),"EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||
}
|
||||
return orp;
|
||||
}, Encoders.bean(OtherResearchProduct.class))
|
||||
|
@ -133,7 +138,7 @@ public class SparkEoscTag {
|
|||
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
|
||||
d.setEoscifguidelines(new ArrayList<>());
|
||||
if (containscriteriaTwitter(d)) {
|
||||
addEIG(d.getEoscifguidelines(),"EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||
addEIG(d.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||
}
|
||||
return d;
|
||||
}, Encoders.bean(Dataset.class))
|
||||
|
@ -149,12 +154,12 @@ public class SparkEoscTag {
|
|||
.json(inputPath + "/dataset");
|
||||
}
|
||||
|
||||
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url, String sem) {
|
||||
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
|
||||
String sem) {
|
||||
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
|
||||
eoscifguidelines.add(newInstance(code, label, url, sem));
|
||||
}
|
||||
|
||||
|
||||
private static boolean containscriteriaTwitter(Result r) {
|
||||
Set<String> words = getWordsSP(r.getTitle());
|
||||
words.addAll(getWordsF(r.getDescription()));
|
||||
|
|
|
@ -126,9 +126,12 @@ public class EOSCTagJobTest {
|
|||
.assertEquals(
|
||||
4,
|
||||
tmp
|
||||
.filter(s -> s.getEoscifguidelines()!= null)
|
||||
.filter(s -> s.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
|
@ -141,13 +144,13 @@ public class EOSCTagJobTest {
|
|||
.size());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
|
@ -159,14 +162,14 @@ public class EOSCTagJobTest {
|
|||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
.assertFalse(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -186,12 +189,13 @@ public class EOSCTagJobTest {
|
|||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions.assertTrue(tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null
|
||||
);
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null);
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -211,22 +215,22 @@ public class EOSCTagJobTest {
|
|||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -245,13 +249,13 @@ public class EOSCTagJobTest {
|
|||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions.assertTrue(
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null
|
||||
);
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines() == null);
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -270,20 +274,24 @@ public class EOSCTagJobTest {
|
|||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||
Assertions.assertEquals(1,
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions.assertTrue(tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||
|
||||
List<StructuredProperty> subjects = tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
|
||||
|
@ -315,13 +323,16 @@ public class EOSCTagJobTest {
|
|||
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||
.filter(
|
||||
ds -> ds.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||
.filter(
|
||||
ds -> ds
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -336,17 +347,23 @@ public class EOSCTagJobTest {
|
|||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.filter(
|
||||
orp -> orp.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
orp -> orp
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.filter(
|
||||
orp -> orp.getSubject().stream().anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
.assertEquals(
|
||||
0, sc
|
||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||
.filter(
|
||||
orp -> orp
|
||||
.getSubject()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
|
||||
.count());
|
||||
|
||||
// spark.stop();
|
||||
}
|
||||
|
@ -413,20 +430,24 @@ public class EOSCTagJobTest {
|
|||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines()!=null)
|
||||
.count());
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines() != null)
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines()!=null)
|
||||
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
.assertEquals(
|
||||
1,
|
||||
tmp
|
||||
.filter(
|
||||
s -> s.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -446,18 +467,23 @@ public class EOSCTagJobTest {
|
|||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions.assertEquals(1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size() );
|
||||
Assertions.assertTrue(tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
tmp
|
||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -499,10 +525,18 @@ public class EOSCTagJobTest {
|
|||
.filter(
|
||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
orp.foreach(o-> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||
|
||||
Assertions.assertEquals(1, orp.filter(o -> o.getEoscifguidelines() != null)
|
||||
.filter(o -> o.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow"))).count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, orp
|
||||
.filter(o -> o.getEoscifguidelines() != null)
|
||||
.filter(
|
||||
o -> o
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||
.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -522,22 +556,22 @@ public class EOSCTagJobTest {
|
|||
.stream()
|
||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||
Assertions
|
||||
.assertEquals(
|
||||
1, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
.assertEquals(
|
||||
1, orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
.assertTrue(
|
||||
orp
|
||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||
.collect()
|
||||
.get(0)
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(s -> s.getCode().equals("EOSC::Galaxy Workflow")));
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
@ -651,11 +685,15 @@ public class EOSCTagJobTest {
|
|||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
3,
|
||||
orp
|
||||
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
.assertEquals(
|
||||
3,
|
||||
orp
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
||||
JavaRDD<Dataset> dats = sc
|
||||
.textFile(workingDir.toString() + "/input/dataset")
|
||||
|
@ -667,7 +705,11 @@ public class EOSCTagJobTest {
|
|||
.assertEquals(
|
||||
3,
|
||||
dats
|
||||
.filter(s -> s.getEoscifguidelines().stream().anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.filter(
|
||||
s -> s
|
||||
.getEoscifguidelines()
|
||||
.stream()
|
||||
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||
.count());
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue