forked from D-Net/dnet-hadoop
Merge pull request 'EOSC IF' (#230) from tagEosc into beta
Reviewed-on: D-Net/dnet-hadoop#230
This commit is contained in:
commit
3c23d634eb
|
@ -28,28 +28,6 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
public class SparkEoscTag {
|
public class SparkEoscTag {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkEoscTag.class);
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
|
|
||||||
.qualifier(
|
|
||||||
"EOSC",
|
|
||||||
"European Open Science Cloud",
|
|
||||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
|
|
||||||
public static final DataInfo EOSC_DATAINFO = OafMapperUtils
|
|
||||||
.dataInfo(
|
|
||||||
false, "propagation", true, false,
|
|
||||||
OafMapperUtils
|
|
||||||
.qualifier(
|
|
||||||
"propagation:subject", "Inferred by OpenAIRE",
|
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
|
|
||||||
"0.9");
|
|
||||||
public final static StructuredProperty EOSC_NOTEBOOK = OafMapperUtils
|
|
||||||
.structuredProperty(
|
|
||||||
"EOSC::Jupyter Notebook", EOSC_QUALIFIER, EOSC_DATAINFO);
|
|
||||||
public final static StructuredProperty EOSC_GALAXY = OafMapperUtils
|
|
||||||
.structuredProperty(
|
|
||||||
"EOSC::Galaxy Workflow", EOSC_QUALIFIER, EOSC_DATAINFO);
|
|
||||||
public final static StructuredProperty EOSC_TWITTER = OafMapperUtils
|
|
||||||
.structuredProperty(
|
|
||||||
"EOSC::Twitter Data", EOSC_QUALIFIER, EOSC_DATAINFO);
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
|
@ -84,29 +62,35 @@ public class SparkEoscTag {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static EoscIfGuidelines newInstance(String code, String label, String url, String semantics) {
|
||||||
|
EoscIfGuidelines eig = new EoscIfGuidelines();
|
||||||
|
eig.setCode(code);
|
||||||
|
eig.setLabel(label);
|
||||||
|
eig.setUrl(url);
|
||||||
|
eig.setSemanticRelation(semantics);
|
||||||
|
return eig;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
|
private static void execEoscTag(SparkSession spark, String inputPath, String workingPath) {
|
||||||
|
|
||||||
readPath(spark, inputPath + "/software", Software.class)
|
readPath(spark, inputPath + "/software", Software.class)
|
||||||
.map((MapFunction<Software, Software>) s -> {
|
.map((MapFunction<Software, Software>) s -> {
|
||||||
List<StructuredProperty> sbject;
|
|
||||||
if (!Optional.ofNullable(s.getSubject()).isPresent())
|
|
||||||
s.setSubject(new ArrayList<>());
|
|
||||||
sbject = s.getSubject();
|
|
||||||
|
|
||||||
if (containsCriteriaNotebook(s)) {
|
if (containsCriteriaNotebook(s)) {
|
||||||
sbject.add(EOSC_NOTEBOOK);
|
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||||
if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))) {
|
s.setEoscifguidelines(new ArrayList<>());
|
||||||
sbject = sbject.stream().map(sb -> {
|
addEIG(
|
||||||
if (sb.getValue().equals("EOSC Jupyter Notebook")) {
|
s.getEoscifguidelines(), "EOSC::Jupyter Notebook", "EOSC::Jupyter Notebook", "",
|
||||||
return null;
|
"compliesWith");
|
||||||
}
|
|
||||||
return sb;
|
|
||||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
|
||||||
s.setSubject(sbject);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (containsCriteriaGalaxy(s)) {
|
if (containsCriteriaGalaxy(s)) {
|
||||||
sbject.add(EOSC_GALAXY);
|
if (!Optional.ofNullable(s.getEoscifguidelines()).isPresent())
|
||||||
|
s.setEoscifguidelines(new ArrayList<>());
|
||||||
|
|
||||||
|
addEIG(
|
||||||
|
s.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "", "compliesWith");
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}, Encoders.bean(Software.class))
|
}, Encoders.bean(Software.class))
|
||||||
|
@ -123,15 +107,17 @@ public class SparkEoscTag {
|
||||||
|
|
||||||
readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
|
readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class)
|
||||||
.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
|
.map((MapFunction<OtherResearchProduct, OtherResearchProduct>) orp -> {
|
||||||
List<StructuredProperty> sbject;
|
|
||||||
if (!Optional.ofNullable(orp.getSubject()).isPresent())
|
if (!Optional.ofNullable(orp.getEoscifguidelines()).isPresent())
|
||||||
orp.setSubject(new ArrayList<>());
|
orp.setEoscifguidelines(new ArrayList<>());
|
||||||
sbject = orp.getSubject();
|
|
||||||
if (containsCriteriaGalaxy(orp)) {
|
if (containsCriteriaGalaxy(orp)) {
|
||||||
sbject.add(EOSC_GALAXY);
|
addEIG(
|
||||||
|
orp.getEoscifguidelines(), "EOSC::Galaxy Workflow", "EOSC::Galaxy Workflow", "",
|
||||||
|
"compliesWith");
|
||||||
}
|
}
|
||||||
if (containscriteriaTwitter(orp)) {
|
if (containscriteriaTwitter(orp)) {
|
||||||
sbject.add(EOSC_TWITTER);
|
addEIG(orp.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||||
}
|
}
|
||||||
return orp;
|
return orp;
|
||||||
}, Encoders.bean(OtherResearchProduct.class))
|
}, Encoders.bean(OtherResearchProduct.class))
|
||||||
|
@ -148,12 +134,11 @@ public class SparkEoscTag {
|
||||||
|
|
||||||
readPath(spark, inputPath + "/dataset", Dataset.class)
|
readPath(spark, inputPath + "/dataset", Dataset.class)
|
||||||
.map((MapFunction<Dataset, Dataset>) d -> {
|
.map((MapFunction<Dataset, Dataset>) d -> {
|
||||||
List<StructuredProperty> sbject;
|
|
||||||
if (!Optional.ofNullable(d.getSubject()).isPresent())
|
if (!Optional.ofNullable(d.getEoscifguidelines()).isPresent())
|
||||||
d.setSubject(new ArrayList<>());
|
d.setEoscifguidelines(new ArrayList<>());
|
||||||
sbject = d.getSubject();
|
|
||||||
if (containscriteriaTwitter(d)) {
|
if (containscriteriaTwitter(d)) {
|
||||||
sbject.add(EOSC_TWITTER);
|
addEIG(d.getEoscifguidelines(), "EOSC::Twitter Data", "EOSC::Twitter Data", "", "compliesWith");
|
||||||
}
|
}
|
||||||
return d;
|
return d;
|
||||||
}, Encoders.bean(Dataset.class))
|
}, Encoders.bean(Dataset.class))
|
||||||
|
@ -169,6 +154,12 @@ public class SparkEoscTag {
|
||||||
.json(inputPath + "/dataset");
|
.json(inputPath + "/dataset");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void addEIG(List<EoscIfGuidelines> eoscifguidelines, String code, String label, String url,
|
||||||
|
String sem) {
|
||||||
|
if (!eoscifguidelines.stream().anyMatch(eig -> eig.getCode().equals(code)))
|
||||||
|
eoscifguidelines.add(newInstance(code, label, url, sem));
|
||||||
|
}
|
||||||
|
|
||||||
private static boolean containscriteriaTwitter(Result r) {
|
private static boolean containscriteriaTwitter(Result r) {
|
||||||
Set<String> words = getWordsSP(r.getTitle());
|
Set<String> words = getWordsSP(r.getTitle());
|
||||||
words.addAll(getWordsF(r.getDescription()));
|
words.addAll(getWordsF(r.getDescription()));
|
||||||
|
@ -212,13 +203,6 @@ public class SparkEoscTag {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Set<String> getSubjects(List<StructuredProperty> s) {
|
|
||||||
Set<String> subjects = new HashSet<>();
|
|
||||||
s.stream().forEach(sbj -> subjects.addAll(Arrays.asList(sbj.getValue().toLowerCase().split(" "))));
|
|
||||||
s.stream().forEach(sbj -> subjects.add(sbj.getValue().toLowerCase()));
|
|
||||||
return subjects;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
private static Set<String> getWordsSP(List<StructuredProperty> elem) {
|
||||||
Set<String> words = new HashSet<>();
|
Set<String> words = new HashSet<>();
|
||||||
Optional
|
Optional
|
||||||
|
@ -242,9 +226,7 @@ public class SparkEoscTag {
|
||||||
t -> words
|
t -> words
|
||||||
.addAll(
|
.addAll(
|
||||||
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" ")))));
|
||||||
// elem
|
|
||||||
// .forEach(
|
|
||||||
// t -> words.addAll(Arrays.asList(t.getValue().toLowerCase().replaceAll("[^a-zA-Z ]", "").split(" "))));
|
|
||||||
return words;
|
return words;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,10 +126,23 @@ public class EOSCTagJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
4,
|
4,
|
||||||
tmp
|
tmp
|
||||||
|
.filter(s -> s.getEoscifguidelines() != null)
|
||||||
.filter(
|
.filter(
|
||||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.size());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
1, tmp
|
1, tmp
|
||||||
|
@ -140,6 +153,16 @@ public class EOSCTagJobTest {
|
||||||
.size());
|
.size());
|
||||||
Assertions
|
Assertions
|
||||||
.assertTrue(
|
.assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertFalse(
|
||||||
tmp
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
.collect()
|
.collect()
|
||||||
|
@ -166,16 +189,24 @@ public class EOSCTagJobTest {
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::501b25d420f808c8eddcd9b16e917f11"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines() == null);
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
9, tmp
|
8, tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
Assertions
|
Assertions
|
||||||
.assertTrue(
|
.assertFalse(
|
||||||
tmp
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||||
.collect()
|
.collect()
|
||||||
|
@ -183,6 +214,23 @@ public class EOSCTagJobTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::581621232a561b7e8b4952b18b8b0e56"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -201,17 +249,24 @@ public class EOSCTagJobTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::5aec1186054301b66c0c5dc35972a589"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines() == null);
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
9, tmp
|
8, tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
Assertions
|
Assertions
|
||||||
.assertTrue(
|
.assertFalse(
|
||||||
tmp
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||||
.collect()
|
.collect()
|
||||||
|
@ -219,14 +274,31 @@ public class EOSCTagJobTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::639909adfad9d708308f2aedb733e4a0"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||||
|
|
||||||
List<StructuredProperty> subjects = tmp
|
List<StructuredProperty> subjects = tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
|
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getSubject();
|
.getSubject();
|
||||||
Assertions.assertEquals(8, subjects.size());
|
Assertions.assertEquals(7, subjects.size());
|
||||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("EOSC::Jupyter Notebook")));
|
|
||||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
|
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("jupyter")));
|
||||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
|
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("Modeling and Simulation")));
|
||||||
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
|
Assertions.assertTrue(subjects.stream().anyMatch(s -> s.getValue().equals("structure granulaire")));
|
||||||
|
@ -250,6 +322,17 @@ public class EOSCTagJobTest {
|
||||||
.filter(
|
.filter(
|
||||||
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||||
.count());
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
0, sc
|
||||||
|
.textFile(workingDir.toString() + "/input/dataset")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class))
|
||||||
|
.filter(
|
||||||
|
ds -> ds
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Jupyter Notebook")))
|
||||||
|
.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -264,7 +347,22 @@ public class EOSCTagJobTest {
|
||||||
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||||
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||||
.filter(
|
.filter(
|
||||||
ds -> ds.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
orp -> orp
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(sbj -> sbj.getValue().equals("EOSC::Jupyter Notebook")))
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
0, sc
|
||||||
|
.textFile(workingDir.toString() + "/input/otherresearchproduct")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class))
|
||||||
|
.filter(
|
||||||
|
orp -> orp
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getValue().equals("EOSC::Jupyter Notebook")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
// spark.stop();
|
// spark.stop();
|
||||||
|
@ -326,22 +424,41 @@ public class EOSCTagJobTest {
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
1,
|
0,
|
||||||
tmp
|
tmp
|
||||||
.filter(
|
.filter(
|
||||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||||
.count());
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
tmp
|
||||||
|
.filter(
|
||||||
|
s -> s.getEoscifguidelines() != null)
|
||||||
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1,
|
||||||
|
tmp
|
||||||
|
.filter(
|
||||||
|
s -> s.getEoscifguidelines() != null)
|
||||||
|
.filter(
|
||||||
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||||
|
.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
2, tmp
|
1, tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
Assertions
|
Assertions
|
||||||
.assertTrue(
|
.assertFalse(
|
||||||
tmp
|
tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
.collect()
|
.collect()
|
||||||
|
@ -350,6 +467,24 @@ public class EOSCTagJobTest {
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
tmp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
5, tmp
|
5, tmp
|
||||||
|
@ -385,22 +520,34 @@ public class EOSCTagJobTest {
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
1,
|
0,
|
||||||
orp
|
orp
|
||||||
.filter(
|
.filter(
|
||||||
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Galaxy Workflow")))
|
||||||
.count());
|
.count());
|
||||||
|
orp.foreach(o -> System.out.println(OBJECT_MAPPER.writeValueAsString(o)));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3, orp
|
1, orp
|
||||||
|
.filter(o -> o.getEoscifguidelines() != null)
|
||||||
|
.filter(
|
||||||
|
o -> o
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Galaxy Workflow")))
|
||||||
|
.count());
|
||||||
|
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
2, orp
|
||||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
Assertions
|
Assertions
|
||||||
.assertTrue(
|
.assertFalse(
|
||||||
orp
|
orp
|
||||||
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||||
.collect()
|
.collect()
|
||||||
|
@ -408,6 +555,23 @@ public class EOSCTagJobTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
.anyMatch(s -> s.getValue().equals("EOSC::Galaxy Workflow")));
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
1, orp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.size());
|
||||||
|
Assertions
|
||||||
|
.assertTrue(
|
||||||
|
orp
|
||||||
|
.filter(sw -> sw.getId().equals("50|od______2017::0750a4d0782265873d669520f5e33c07"))
|
||||||
|
.collect()
|
||||||
|
.get(0)
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(s -> s.getCode().equals("EOSC::Galaxy Workflow")));
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
@ -516,10 +680,20 @@ public class EOSCTagJobTest {
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3,
|
0,
|
||||||
orp
|
orp
|
||||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
||||||
.count());
|
.count());
|
||||||
|
Assertions
|
||||||
|
.assertEquals(
|
||||||
|
3,
|
||||||
|
orp
|
||||||
|
.filter(
|
||||||
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||||
|
.count());
|
||||||
|
|
||||||
JavaRDD<Dataset> dats = sc
|
JavaRDD<Dataset> dats = sc
|
||||||
.textFile(workingDir.toString() + "/input/dataset")
|
.textFile(workingDir.toString() + "/input/dataset")
|
||||||
|
@ -531,7 +705,11 @@ public class EOSCTagJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3,
|
3,
|
||||||
dats
|
dats
|
||||||
.filter(s -> s.getSubject().stream().anyMatch(sbj -> sbj.getValue().equals("EOSC::Twitter Data")))
|
.filter(
|
||||||
|
s -> s
|
||||||
|
.getEoscifguidelines()
|
||||||
|
.stream()
|
||||||
|
.anyMatch(eig -> eig.getCode().equals("EOSC::Twitter Data")))
|
||||||
.count());
|
.count());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -801,7 +801,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[2.12.0]</dhp-schemas.version>
|
<dhp-schemas.version>[2.12.2-SNAPSHOT]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue