Oozie workflow for cleancontext #216

Merged
claudio.atzori merged 7 commits from cleancontext into beta 2022-04-22 15:46:40 +02:00
3 changed files with 367 additions and 209 deletions
Showing only changes of commit 9a961a0092 - Show all commits

View File

@ -1,16 +1,13 @@
claudio.atzori marked this conversation as resolved
Review

At first glance, this class doesn't seem to include any significant change. If it was not changed, please revert to its original formatting. Otherwise the diff just creates noise.

At first glance, this class doesn't seem to include any significant change. If it was not changed, please revert to its original formatting. Otherwise the diff just creates noise.
Review

This comment is outdated. It seems you did not issue git pull before introducing these further changes thus you did not get the reformatted file CleanContextSparkJob.java.

This comment is outdated. It seems you did not issue `git pull` before introducing these further changes thus you did not get the reformatted file CleanContextSparkJob.java.
package eu.dnetlib.dhp.oa.graph.clean; package eu.dnetlib.dhp.oa.graph.clean;
import com.fasterxml.jackson.databind.ObjectMapper; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import java.io.Serializable;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import java.util.List;
import eu.dnetlib.dhp.schema.common.ModelConstants; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.Context; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -21,12 +18,18 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.Serializable; import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class CleanContextSparkJob implements Serializable { public class CleanContextSparkJob implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CleanContextSparkJob.class); private static final Logger log = LoggerFactory.getLogger(CleanContextSparkJob.class);
@ -61,13 +64,11 @@ public class CleanContextSparkJob implements Serializable {
String verifyParam = parser.get("verifyParam"); String verifyParam = parser.get("verifyParam");
log.info("verifyParam: {}", verifyParam); log.info("verifyParam: {}", verifyParam);
String graphTableClassName = parser.get("graphTableClassName"); String graphTableClassName = parser.get("graphTableClassName");
log.info("graphTableClassName: {}", graphTableClassName); log.info("graphTableClassName: {}", graphTableClassName);
Class<? extends Result> entityClazz = (Class<? extends Result>) Class.forName(graphTableClassName); Class<? extends Result> entityClazz = (Class<? extends Result>) Class.forName(graphTableClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
runWithSparkSession( runWithSparkSession(
conf, conf,
@ -78,7 +79,8 @@ public class CleanContextSparkJob implements Serializable {
}); });
} }
private static <T extends Result> void cleanContext(SparkSession spark, String contextId, String verifyParam, String inputPath, Class<T> entityClazz, String workingPath) { private static <T extends Result> void cleanContext(SparkSession spark, String contextId, String verifyParam,
String inputPath, Class<T> entityClazz, String workingPath) {
Dataset<T> res = spark Dataset<T> res = spark
.read() .read()
.textFile(inputPath) .textFile(inputPath)
@ -87,27 +89,42 @@ public class CleanContextSparkJob implements Serializable {
Encoders.bean(entityClazz)); Encoders.bean(entityClazz));
res.map((MapFunction<T, T>) r -> { res.map((MapFunction<T, T>) r -> {
if(!r.getTitle() if (!r
.getTitle()
.stream() .stream()
.filter(t -> t.getQualifier().getClassid() .filter(
t -> t
.getQualifier()
.getClassid()
.equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid())) .equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid()))
.anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()))){ .anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()))) {
return r; return r;
} }
r.setContext(r.getContext().stream().filter(c -> !c.getId().split("::")[0] r
.equalsIgnoreCase(contextId)).collect(Collectors.toList())); .setContext(
r
.getContext()
.stream()
.filter(
c -> !c.getId().split("::")[0]
.equalsIgnoreCase(contextId))
.collect(Collectors.toList()));
return r; return r;
} ,Encoders.bean(entityClazz)) }, Encoders.bean(entityClazz))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression","gzip") .option("compression", "gzip")
.json(workingPath); .json(workingPath);
spark.read().textFile(workingPath).map((MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, entityClazz), spark
.read()
.textFile(workingPath)
.map(
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, entityClazz),
Encoders.bean(entityClazz)) Encoders.bean(entityClazz))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression","gzip") .option("compression", "gzip")
.json(inputPath); .json(inputPath);
} }
} }

View File

@ -15,7 +15,7 @@
</property> </property>
<property> <property>
<name>shouldCleanContext</name> <name>shouldCleanContext</name>
<value>false</value> <description>true if the context have to be cleaned</description>
</property> </property>
<property> <property>
Review

It should be better to include a description for this parameter, to explain its purpose and if it possible to include multiple contentIds, how they should be formatted.

It should be better to include a description for this parameter, to explain its purpose and if it possible to include multiple contentIds, how they should be formatted.
Review

This is just the first naive implementation of the context cleaning. I have no idea how it will be once done properly

This is just the first naive implementation of the context cleaning. I have no idea how it will be once done properly
Review

It might be the 1st naive implementation, but looking at the oozie workflow, it is not obvious what a parameter plays when it is not accompanied by any description.

It might be the 1st naive implementation, but looking at the oozie workflow, it is not obvious what a parameter plays when it is not accompanied by any description.
Review

extended

extended
@ -312,9 +312,9 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${inputPath}/publication</arg> <arg>--inputPath</arg><arg>${graphOutputPath}/publication</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--workingPath</arg><arg>${workingDir}/working</arg> <arg>--workingPath</arg><arg>${workingDir}/working/publication</arg>
<arg>--contextId</arg><arg>${contextId}</arg> <arg>--contextId</arg><arg>${contextId}</arg>
<arg>--verifyParam</arg><arg>${verifyParam}</arg> <arg>--verifyParam</arg><arg>${verifyParam}</arg>
</spark> </spark>
@ -339,9 +339,9 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/dataset</arg> <arg>--inputPath</arg><arg>${graphOutputPath}/dataset</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--workingPath</arg><arg>${workingDir}/working</arg> <arg>--workingPath</arg><arg>${workingDir}/working/dataset</arg>
<arg>--contextId</arg><arg>${contextId}</arg> <arg>--contextId</arg><arg>${contextId}</arg>
<arg>--verifyParam</arg><arg>${verifyParam}</arg> <arg>--verifyParam</arg><arg>${verifyParam}</arg>
</spark> </spark>
@ -366,9 +366,9 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/otherresearchproduct</arg> <arg>--inputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--workingPath</arg><arg>${workingDir}/working</arg> <arg>--workingPath</arg><arg>${workingDir}/working/otherresearchproduct</arg>
<arg>--contextId</arg><arg>${contextId}</arg> <arg>--contextId</arg><arg>${contextId}</arg>
<arg>--verifyParam</arg><arg>${verifyParam}</arg> <arg>--verifyParam</arg><arg>${verifyParam}</arg>
</spark> </spark>
@ -393,9 +393,9 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=7680
</spark-opts> </spark-opts>
<arg>--inputPath</arg><arg>${graphInputPath}/software</arg> <arg>--inputPath</arg><arg>${graphOutputPath}/software</arg>
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--workingPath</arg><arg>${workingDir}/working</arg> <arg>--workingPath</arg><arg>${workingDir}/working/software</arg>
<arg>--contextId</arg><arg>${contextId}</arg> <arg>--contextId</arg><arg>${contextId}</arg>
<arg>--verifyParam</arg><arg>${verifyParam}</arg> <arg>--verifyParam</arg><arg>${verifyParam}</arg>
</spark> </spark>

View File

@ -1,18 +1,12 @@
package eu.dnetlib.dhp.oa.graph.clean; package eu.dnetlib.dhp.oa.graph.clean;
import com.fasterxml.jackson.core.JsonProcessingException; import java.io.IOException;
import com.fasterxml.jackson.databind.ObjectMapper; import java.nio.file.Files;
import eu.dnetlib.dhp.oa.graph.dump.Constants; import java.nio.file.Path;
import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest; import java.util.List;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; import java.util.Locale;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute;
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
@ -27,11 +21,20 @@ import org.junit.jupiter.api.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import com.fasterxml.jackson.core.JsonProcessingException;
import java.nio.file.Files; import com.fasterxml.jackson.databind.ObjectMapper;
import java.nio.file.Path;
import java.util.List; import eu.dnetlib.dhp.oa.graph.dump.Constants;
import java.util.Locale; import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest;
import eu.dnetlib.dhp.oa.graph.dump.DumpProducts;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.dump.oaf.Instance;
import eu.dnetlib.dhp.schema.dump.oaf.OpenAccessRoute;
import eu.dnetlib.dhp.schema.dump.oaf.graph.GraphResult;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class CleanContextTest { public class CleanContextTest {
@ -78,18 +81,22 @@ public class CleanContextTest {
.getPath(); .getPath();
final String prefix = "gcube "; final String prefix = "gcube ";
spark
spark.read().textFile(sourcePath).map((MapFunction<String, Publication>) r -> OBJECT_MAPPER.readValue(r, Publication.class), Encoders.bean(Publication.class)) .read()
.write().json(workingDir.toString() + "/publication"); .textFile(sourcePath)
.map(
(MapFunction<String, Publication>) r -> OBJECT_MAPPER.readValue(r, Publication.class),
Encoders.bean(Publication.class))
.write()
.json(workingDir.toString() + "/publication");
CleanContextSparkJob.main(new String[] { CleanContextSparkJob.main(new String[] {
"--isSparkSessionManaged", Boolean.FALSE.toString(), "--isSparkSessionManaged", Boolean.FALSE.toString(),
"--inputPath", workingDir.toString() + "/publication", "--inputPath", workingDir.toString() + "/publication",
"-graphTableClassName", Publication.class.getCanonicalName(), "-graphTableClassName", Publication.class.getCanonicalName(),
"-workingPath", workingDir.toString() + "/working", "-workingPath", workingDir.toString() + "/working",
"-contextId","sobigdata", "-contextId", "sobigdata",
"-verifyParam","gCube " "-verifyParam", "gCube "
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -99,61 +106,195 @@ public class CleanContextTest {
Assertions.assertEquals(7, tmp.count()); Assertions.assertEquals(7, tmp.count());
//original result with sobigdata context and gcube as starting string in the main title for the publication // original result with sobigdata context and gcube as starting string in the main title for the publication
Assertions.assertEquals(0, Assertions
tmp.filter(p->p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95")).collect().get(0).getContext().size()); .assertEquals(
0,
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::0224aae28af558f21768dbc6439c7a95"))
.collect()
.get(0)
.getContext()
.size());
//original result with sobigdata context without gcube as starting string in the main title for the publication // original result with sobigdata context without gcube as starting string in the main title for the publication
Assertions.assertEquals(1, Assertions
tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().size()); .assertEquals(
Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9")).collect().get(0).getContext().get(0).getId() ); 1,
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"))
.collect()
.get(0)
.getContext()
.size());
Assertions
.assertEquals(
"sobigdata::projects::2",
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::20c414a3b1c742d5dd3851f1b67df2d9"))
.collect()
.get(0)
.getContext()
.get(0)
.getId());
//original result with sobigdata context with gcube as starting string in the subtitle // original result with sobigdata context with gcube as starting string in the subtitle
Assertions.assertEquals(1, Assertions
tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().size()); .assertEquals(
Assertions.assertEquals("sobigdata::projects::2",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getContext().get(0).getId() ); 1,
List<StructuredProperty> titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af")).collect().get(0).getTitle(); tmp
.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"))
.collect()
.get(0)
.getContext()
.size());
Assertions
.assertEquals(
"sobigdata::projects::2",
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"))
.collect()
.get(0)
.getContext()
.get(0)
.getId());
List<StructuredProperty> titles = tmp
.filter(p -> p.getId().equals("50|DansKnawCris::3c81248c335f0aa07e06817ece6fa6af"))
.collect()
.get(0)
.getTitle();
Assertions.assertEquals(1, titles.size()); Assertions.assertEquals(1, titles.size());
Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix));
Assertions.assertEquals("subtitle", titles.get(0).getQualifier().getClassid()); Assertions.assertEquals("subtitle", titles.get(0).getQualifier().getClassid());
//original result with sobigdata context with gcube not as starting string in the main title // original result with sobigdata context with gcube not as starting string in the main title
Assertions.assertEquals(1, Assertions
tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().size()); .assertEquals(
Assertions.assertEquals("sobigdata::projects::1",tmp.filter(p->p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getContext().get(0).getId() ); 1,
titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f")).collect().get(0).getTitle(); tmp
.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f"))
.collect()
.get(0)
.getContext()
.size());
Assertions
.assertEquals(
"sobigdata::projects::1",
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f"))
.collect()
.get(0)
.getContext()
.get(0)
.getId());
titles = tmp
.filter(p -> p.getId().equals("50|DansKnawCris::3c9f068ddc930360bec6925488a9a97f"))
.collect()
.get(0)
.getTitle();
Assertions.assertEquals(1, titles.size()); Assertions.assertEquals(1, titles.size());
Assertions.assertFalse(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); Assertions.assertFalse(titles.get(0).getValue().toLowerCase().startsWith(prefix));
Assertions.assertTrue(titles.get(0).getValue().toLowerCase().contains(prefix.trim()) ); Assertions.assertTrue(titles.get(0).getValue().toLowerCase().contains(prefix.trim()));
Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid());
//original result with sobigdata in context and also other contexts with gcube as starting string for the main title // original result with sobigdata in context and also other contexts with gcube as starting string for the main
Assertions.assertEquals(1, // title
tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().size()); Assertions
Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getContext().get(0).getId() ); .assertEquals(
titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53")).collect().get(0).getTitle(); 1,
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53"))
.collect()
.get(0)
.getContext()
.size());
Assertions
.assertEquals(
"dh-ch",
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53"))
.collect()
.get(0)
.getContext()
.get(0)
.getId());
titles = tmp
.filter(p -> p.getId().equals("50|DansKnawCris::4669a378a73661417182c208e6fdab53"))
.collect()
.get(0)
.getTitle();
Assertions.assertEquals(1, titles.size()); Assertions.assertEquals(1, titles.size());
Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix) ); Assertions.assertTrue(titles.get(0).getValue().toLowerCase().startsWith(prefix));
Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid()); Assertions.assertEquals("main title", titles.get(0).getQualifier().getClassid());
//original result with multiple main title one of which whith gcube as starting string and with 2 contextes // original result with multiple main title one of which whith gcube as starting string and with 2 contextes
Assertions.assertEquals(1, Assertions
tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().size()); .assertEquals(
Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getContext().get(0).getId() ); 1,
titles = tmp.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff")).collect().get(0).getTitle(); tmp
.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff"))
.collect()
.get(0)
.getContext()
.size());
Assertions
.assertEquals(
"dh-ch",
tmp
.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff"))
.collect()
.get(0)
.getContext()
.get(0)
.getId());
titles = tmp
.filter(p -> p.getId().equals("50|DansKnawCris::4a9152e80f860eab99072e921d74a0ff"))
.collect()
.get(0)
.getTitle();
Assertions.assertEquals(2, titles.size()); Assertions.assertEquals(2, titles.size());
Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix)) ); Assertions
.assertTrue(
titles
.stream()
.anyMatch(
t -> t.getQualifier().getClassid().equals("main title")
&& t.getValue().toLowerCase().startsWith(prefix)));
// original result without sobigdata in context with gcube as starting string for the main title
//original result without sobigdata in context with gcube as starting string for the main title Assertions
Assertions.assertEquals(1, .assertEquals(
tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().size()); 1,
Assertions.assertEquals("dh-ch",tmp.filter(p->p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getContext().get(0).getId() ); tmp
titles = tmp.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8")).collect().get(0).getTitle(); .filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
.collect()
.get(0)
.getContext()
.size());
Assertions
.assertEquals(
"dh-ch",
tmp
.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
.collect()
.get(0)
.getContext()
.get(0)
.getId());
titles = tmp
.filter(p -> p.getId().equals("50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8"))
.collect()
.get(0)
.getTitle();
Assertions.assertEquals(2, titles.size()); Assertions.assertEquals(2, titles.size());
Assertions.assertTrue(titles.stream().anyMatch(t -> t.getQualifier().getClassid().equals("main title") && t.getValue().toLowerCase().startsWith(prefix))); Assertions
.assertTrue(
titles
.stream()
.anyMatch(
t -> t.getQualifier().getClassid().equals("main title")
&& t.getValue().toLowerCase().startsWith(prefix)));
} }
} }