forked from antonis.lempesis/dnet-hadoop
[EOSCTag] fixed issue in case description is null. Modified test resources and classes
This commit is contained in:
parent
a21fe310e5
commit
3aeedd931a
|
@ -179,12 +179,11 @@ public class SparkEoscTag {
|
||||||
Set<String> words = getWordsSP(r.getTitle());
|
Set<String> words = getWordsSP(r.getTitle());
|
||||||
words.addAll(getWordsF(r.getDescription()));
|
words.addAll(getWordsF(r.getDescription()));
|
||||||
if (words.contains("galaxy") &&
|
if (words.contains("galaxy") &&
|
||||||
(words.contains("workflow") || words.contains("software")))
|
words.contains("workflow"))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||||
(r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow"))) ||
|
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("software")))
|
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -214,17 +213,30 @@ public class SparkEoscTag {
|
||||||
|
|
||||||
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
elem
|
Optional
|
||||||
.forEach(
|
.ofNullable(elem)
|
||||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
.ifPresent(
|
||||||
|
e -> e
|
||||||
|
.forEach(
|
||||||
|
t -> words
|
||||||
|
.addAll(
|
||||||
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Set<String> getWordsF(List<Field<String>> elem) {
|
private static Set<String> getWordsF(List<Field<String>> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
elem
|
Optional
|
||||||
.forEach(
|
.ofNullable(elem)
|
||||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
.ifPresent(
|
||||||
|
e -> e
|
||||||
|
.forEach(
|
||||||
|
t -> words
|
||||||
|
.addAll(
|
||||||
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
|
// elem
|
||||||
|
// .forEach(
|
||||||
|
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
||||||
return words;
|
return words;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="eosc_tag"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
@ -226,7 +226,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
||||||
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
|
@ -322,7 +322,7 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/software")
|
.textFile(workingDir.toString() + "/input/software")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, tmp.count());
|
Assertions.assertEquals(11, tmp.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -534,11 +534,11 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/dataset")
|
.textFile(workingDir.toString() + "/input/dataset")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, dats.count());
|
Assertions.assertEquals(11, dats.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
2,
|
3,
|
||||||
dats
|
dats
|
||||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||||
.count());
|
.count());
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue