Master branch updates from beta September 2023 #337
|
@ -179,12 +179,11 @@ public class SparkEoscTag {
|
||||||
Set<String> words = getWordsSP(r.getTitle());
|
Set<String> words = getWordsSP(r.getTitle());
|
||||||
words.addAll(getWordsF(r.getDescription()));
|
words.addAll(getWordsF(r.getDescription()));
|
||||||
if (words.contains("galaxy") &&
|
if (words.contains("galaxy") &&
|
||||||
(words.contains("workflow") || words.contains("software")))
|
words.contains("workflow"))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
if (r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("galaxy")) &&
|
||||||
(r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow"))) ||
|
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("workflow")))
|
||||||
r.getSubject().stream().anyMatch(sbj -> sbj.getValue().toLowerCase().contains("software")))
|
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -214,17 +213,30 @@ public class SparkEoscTag {
|
||||||
|
|
||||||
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
elem
|
Optional
|
||||||
.forEach(
|
.ofNullable(elem)
|
||||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
.ifPresent(
|
||||||
|
e -> e
|
||||||
|
.forEach(
|
||||||
|
t -> words
|
||||||
|
.addAll(
|
||||||
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Set<String> getWordsF(List<Field<String>> elem) {
|
private static Set<String> getWordsF(List<Field<String>> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
elem
|
Optional
|
||||||
.forEach(
|
.ofNullable(elem)
|
||||||
t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
.ifPresent(
|
||||||
|
e -> e
|
||||||
|
.forEach(
|
||||||
|
t -> words
|
||||||
|
.addAll(
|
||||||
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
|
// elem
|
||||||
|
// .forEach(
|
||||||
|
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
||||||
return words;
|
return words;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="reset_outputpath"/>
|
<start to="eosc_tag"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
@ -226,7 +226,7 @@
|
||||||
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
<arg>--sourcePath</arg><arg>${outputPath}</arg>
|
||||||
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
<arg>--workingPath</arg><arg>${workingDir}/eoscTag</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
|
@ -322,7 +322,7 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/software")
|
.textFile(workingDir.toString() + "/input/software")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Software.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, tmp.count());
|
Assertions.assertEquals(11, tmp.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -534,11 +534,11 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/dataset")
|
.textFile(workingDir.toString() + "/input/dataset")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||||
|
|
||||||
Assertions.assertEquals(10, dats.count());
|
Assertions.assertEquals(11, dats.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
2,
|
3,
|
||||||
dats
|
dats
|
||||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||||
.count());
|
.count());
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue