diff --git a/dhp-workflows/dhp-graph-provision/pom.xml b/dhp-workflows/dhp-graph-provision/pom.xml index e62fcdf19..4b4e6c1c4 100644 --- a/dhp-workflows/dhp-graph-provision/pom.xml +++ b/dhp-workflows/dhp-graph-provision/pom.xml @@ -18,7 +18,7 @@ scala-compile-first - initialize + process-resources add-source compile @@ -59,12 +59,6 @@ com.jayway.jsonpath json-path - - - org.slf4j - slf4j-api - - dom4j @@ -160,6 +154,26 @@ org.apache.zookeeper zookeeper + + ant + org.apache.ant + + + antlr4-runtime + org.antlr + + + woodstox-core + com.fasterxml.woodstox + + + log4j + * + + + org.apache.logging.log4j + * + @@ -206,5 +220,90 @@ + + + spark-24 + + true + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/sparksolr-3 + + + + + + + + + + + spark-34 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/sparksolr-4 + + + + + + + + + + + spark-35 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/sparksolr-4 + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java index 01d161b6b..81398016a 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java @@ -14,4 +14,7 @@ public class ProvisionConstants { return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION; } + public static final String PUBLIC_ALIAS_NAME = "public"; + public static final String SHADOW_ALIAS_NAME = "shadow"; + } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java index 0033978bf..44426e8c5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java @@ -9,6 +9,7 @@ import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.response.UpdateResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,7 +24,7 @@ public class SolrAdminApplication implements Closeable { private static final Logger log = LoggerFactory.getLogger(SolrAdminApplication.class); enum Action { - DELETE_BY_QUERY, COMMIT + DELETE_BY_QUERY, COMMIT, UPDATE_ALIASES } private final CloudSolrClient solrClient; @@ -39,9 +40,6 @@ public class SolrAdminApplication implements Closeable { final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); - final String format = parser.get("format"); - log.info("format: {}", format); - final Action action = Action.valueOf(parser.get("action")); log.info("action: {}", action); @@ -59,11 +57,21 @@ public class SolrAdminApplication implements Closeable { final String zkHost = isLookup.getZkHost(); log.info("zkHost: {}", zkHost); - final String collection = ProvisionConstants.getCollectionName(format); - log.info("collection: {}", collection); + final String publicFormat = parser.get("publicFormat"); + log.info("publicFormat: {}", publicFormat); + + final String shadowFormat = parser.get("shadowFormat"); + log.info("shadowFormat: {}", shadowFormat); + + // get collection names from metadata format profiles names + final String publicCollection = ProvisionConstants.getCollectionName(publicFormat); + log.info("publicCollection: {}", publicCollection); + + final String shadowCollection = ProvisionConstants.getCollectionName(shadowFormat); + log.info("shadowCollection: {}", shadowCollection); try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) { - app.execute(action, collection, query, commit); + app.execute(action, query, commit, publicCollection, shadowCollection); } } @@ -72,22 +80,29 @@ public class SolrAdminApplication implements Closeable { this.solrClient = new CloudSolrClient.Builder(zk.getHosts(), zk.getChroot()).build(); } - public SolrResponse commit(String collection) throws IOException, SolrServerException { - return execute(Action.COMMIT, collection, null, true); + public SolrResponse commit(String shadowCollection) throws IOException, SolrServerException { + return execute(Action.COMMIT, null, true, null, shadowCollection); } - public SolrResponse execute(Action action, String collection, String query, boolean commit) + public SolrResponse execute(Action action, String query, boolean commit, + String publicCollection, String shadowCollection) throws IOException, SolrServerException { switch (action) { case DELETE_BY_QUERY: - UpdateResponse rsp = solrClient.deleteByQuery(collection, query); + UpdateResponse rsp = solrClient.deleteByQuery(shadowCollection, query); if (commit) { - solrClient.commit(collection); + return solrClient.commit(shadowCollection); } return rsp; + case COMMIT: - return solrClient.commit(collection); + return solrClient.commit(shadowCollection); + + case UPDATE_ALIASES: + this.updateAliases(publicCollection, shadowCollection); + return null; + default: throw new IllegalArgumentException("action not managed: " + action); } @@ -98,4 +113,28 @@ public class SolrAdminApplication implements Closeable { solrClient.close(); } + private void updateAliases(String publicCollection, String shadowCollection) + throws SolrServerException, IOException { + + // delete current aliases + this.deleteAlias(ProvisionConstants.PUBLIC_ALIAS_NAME); + this.deleteAlias(ProvisionConstants.SHADOW_ALIAS_NAME); + + // create aliases + this.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, publicCollection); + this.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, shadowCollection); + + } + + public SolrResponse deleteAlias(String aliasName) throws SolrServerException, IOException { + CollectionAdminRequest.DeleteAlias deleteAliasRequest = CollectionAdminRequest.deleteAlias(aliasName); + return deleteAliasRequest.process(solrClient); + } + + public SolrResponse createAlias(String aliasName, String collection) throws IOException, SolrServerException { + CollectionAdminRequest.CreateAlias createAliasRequest = CollectionAdminRequest + .createAlias(aliasName, collection); + return createAliasRequest.process(solrClient); + } + } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java index faa18851b..06a35eda5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java @@ -36,7 +36,7 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob { private final String inputPath; - private final String format; + private final String shadowFormat; private final String outputPath; @@ -61,8 +61,8 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob { final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - final String format = parser.get("format"); - log.info("format: {}", format); + final String shadowFormat = parser.get("shadowFormat"); + log.info("shadowFormat: {}", shadowFormat); final String outputPath = Optional .ofNullable(parser.get("outputPath")) @@ -95,27 +95,24 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob { final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl)); - new SolrRecordDumpJob(spark, inputPath, format, outputPath).run(isLookup); + new SolrRecordDumpJob(spark, inputPath, shadowFormat, outputPath).run(isLookup); }); } - public SolrRecordDumpJob(SparkSession spark, String inputPath, String format, String outputPath) { + public SolrRecordDumpJob(SparkSession spark, String inputPath, String shadowFormat, String outputPath) { this.spark = spark; this.inputPath = inputPath; - this.format = format; + this.shadowFormat = shadowFormat; this.outputPath = outputPath; } public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException { - final String fields = isLookup.getLayoutSource(format); + final String fields = isLookup.getLayoutSource(shadowFormat); log.info("fields: {}", fields); final String xslt = isLookup.getLayoutTransformer(); - final String dsId = isLookup.getDsId(format); - log.info("dsId: {}", dsId); - - final String indexRecordXslt = getLayoutTransformer(format, fields, xslt); + final String indexRecordXslt = getLayoutTransformer(shadowFormat, fields, xslt); log.info("indexRecordTransformer {}", indexRecordXslt); final Encoder encoder = Encoders.bean(TupleWrapper.class); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index d49a0596b..386445057 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -25,6 +25,7 @@ import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; import eu.dnetlib.dhp.oa.provision.model.TupleWrapper; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; +import eu.dnetlib.dhp.sparksolr.DHPSolrSupport; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -39,6 +40,8 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { private final String format; + private final String shadowCollection; + private final int batchSize; private final SparkSession spark; @@ -62,8 +65,11 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - final String format = parser.get("format"); - log.info("format: {}", format); + final String shadowFormat = parser.get("shadowFormat"); + log.info("shadowFormat: {}", shadowFormat); + + final String shadowCollection = ProvisionConstants.getCollectionName(shadowFormat); + log.info("shadowCollection: {}", shadowCollection); final Integer batchSize = Optional .ofNullable(parser.get("batchSize")) @@ -84,15 +90,17 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl)); - new XmlIndexingJob(spark, inputPath, format, batchSize) + new XmlIndexingJob(spark, inputPath, shadowFormat, shadowCollection, batchSize) .run(isLookup); }); } - public XmlIndexingJob(SparkSession spark, String inputPath, String format, Integer batchSize) { + public XmlIndexingJob(SparkSession spark, String inputPath, String format, String shadowCollection, + Integer batchSize) { this.spark = spark; this.inputPath = inputPath; this.format = format; + this.shadowCollection = shadowCollection; this.batchSize = batchSize; } @@ -102,12 +110,6 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { final String xslt = isLookup.getLayoutTransformer(); - final String dsId = isLookup.getDsId(format); - log.info("dsId: {}", dsId); - - final String collection = ProvisionConstants.getCollectionName(format); - log.info("collection: {}", collection); - final String zkHost = isLookup.getZkHost(); log.info("zkHost: {}", zkHost); @@ -129,7 +131,7 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { .javaRDD() .map( t -> new StreamingInputDocumentFactory().parseDocument(t.getXml(), t.getJson())); - SolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd()); + DHPSolrSupport.indexDocs(zkHost, shadowCollection, batchSize, docs.rdd()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index 51f65c7ac..75a8e048a 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -213,7 +213,7 @@ public class StreamingInputDocumentFactory implements Serializable { } writer.close(); indexDocument.addField(INDEX_RESULT, results.toString()); - indexDocument.addField(INDEX_JSON_RESULT, json); + //indexDocument.addField(INDEX_JSON_RESULT, json); } finally { outputFactory.remove(); eventFactory.remove(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index befebe0bb..e1d19b66f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -5,14 +5,11 @@ import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.removePrefix; import static eu.dnetlib.dhp.oa.provision.utils.XmlSerializationUtils.escapeXml; import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import javax.swing.text.html.Option; - import org.apache.commons.lang3.StringUtils; import org.stringtemplate.v4.ST; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 398e81e8c..fbd647ae4 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -170,8 +170,8 @@ public class XmlSerializationUtils { return sb.toString(); } - // - // + // + // public static String usageMeasureAsXmlElement(String name, Measure measure) { StringBuilder sb = new StringBuilder(); for (KeyValue kv : measure.getUnit()) { @@ -179,8 +179,7 @@ public class XmlSerializationUtils { .append("<") .append(name) .append(" ") - .append(attr("id", measure.getId())) - .append(attr("count", kv.getValue())) + .append(attr(measure.getId(), kv.getValue())) .append(attr("datasource", kv.getKey())) .append(" />"); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml index c62f60f10..c4caad91e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/oaipmh/oozie_app/workflow.xml @@ -69,13 +69,13 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + yarn cluster diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json index 7e5734222..3c2c1e05d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json @@ -13,7 +13,7 @@ }, { "paramName": "f", - "paramLongName": "format", + "paramLongName": "shadowFormat", "paramDescription": "MDFormat name found in the IS profile", "paramRequired": true }, diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json index 3396020e0..c8364bb28 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json @@ -13,8 +13,8 @@ }, { "paramName": "f", - "paramLongName": "format", - "paramDescription": "MDFormat name found in the IS profile", + "paramLongName": "shadowFormat", + "paramDescription": "MDFormat name found in the IS profile bound to the shadow index collection to feed", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json index 23eca2f7b..23a378857 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json @@ -5,12 +5,6 @@ "paramDescription": "the URL to the ISLookUp Service", "paramRequired": true }, - { - "paramName": "f", - "paramLongName": "format", - "paramDescription": "metadata format profile name", - "paramRequired": true - }, { "paramName": "a", "paramLongName": "action", @@ -28,5 +22,18 @@ "paramLongName": "commit", "paramDescription": "should the action be followed by a commit?", "paramRequired": false + }, + { + "paramName": "pf", + "paramLongName": "publicFormat", + "paramDescription": "the name of the public metadata format profile - used to create an alias", + "paramRequired": false + }, + { + "paramName": "sf", + "paramLongName": "shadowFormat", + "paramDescription": "the name of the shadow metadata format profile - used to create an alias", + "paramRequired": false } + ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index a754c7a5d..6c58d2466 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -35,9 +35,13 @@ maximum number of relations allowed for a each entity grouping by target - format + shadowFormat metadata format name (DMF|TMF) + + publicFormat + the public metadata format - used to create the public collection alias + batchSize number of records to be included in each indexing request @@ -133,6 +137,7 @@ ${wf:conf('resumeFrom') eq 'create_payloads'} ${wf:conf('resumeFrom') eq 'drop_solr_collection'} ${wf:conf('resumeFrom') eq 'to_solr_index'} + ${wf:conf('resumeFrom') eq 'update_solr_aliases'} @@ -641,8 +646,8 @@ eu.dnetlib.dhp.oa.provision.SolrAdminApplication --isLookupUrl${isLookupUrl} - --format${format} --actionDELETE_BY_QUERY + --shadowFormat${shadowFormat} --query${solrDeletionQuery} --committrue @@ -672,7 +677,7 @@ --inputPath${workingDir}/xml_json --isLookupUrl${isLookupUrl} - --format${format} + --shadowFormat${shadowFormat} --batchSize${batchSize} @@ -689,7 +694,7 @@ eu.dnetlib.dhp.oa.provision.SolrAdminApplication --isLookupUrl${isLookupUrl} - --format${format} + --shadowFormat${shadowFormat} --actionCOMMIT @@ -714,12 +719,31 @@ --inputPath${workingDir}/xml_json --isLookupUrl${isLookupUrl} - --format${format} + --shadowFormat${shadowFormat} --outputPath${workingDir}/solr_documents + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.oa.provision.SolrAdminApplication + --isLookupUrl${isLookupUrl} + --actionUPDATE_ALIASES + --publicFormat${publicFormat} + --shadowFormat${shadowFormat} + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/main/sparksolr-3/eu/dnetlib/dhp/sparksolr/DHPSolrSupport.java b/dhp-workflows/dhp-graph-provision/src/main/sparksolr-3/eu/dnetlib/dhp/sparksolr/DHPSolrSupport.java new file mode 100644 index 000000000..295f0f54d --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/sparksolr-3/eu/dnetlib/dhp/sparksolr/DHPSolrSupport.java @@ -0,0 +1,12 @@ +package eu.dnetlib.dhp.sparksolr; + +import com.lucidworks.spark.util.SolrSupport; +import org.apache.solr.common.SolrInputDocument; +import org.apache.spark.rdd.RDD; + +public class DHPSolrSupport { + + static public void indexDocs(String zkhost, String collection, int batchSize, RDD docs) { + SolrSupport.indexDocs(zkhost, collection, batchSize, docs); + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/sparksolr-4/eu/dnetlib/dhp/sparksolr/DHPSolrSupport.java b/dhp-workflows/dhp-graph-provision/src/main/sparksolr-4/eu/dnetlib/dhp/sparksolr/DHPSolrSupport.java new file mode 100644 index 000000000..6b85176a3 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/main/sparksolr-4/eu/dnetlib/dhp/sparksolr/DHPSolrSupport.java @@ -0,0 +1,12 @@ +package eu.dnetlib.dhp.sparksolr; + +import com.lucidworks.spark.util.SolrSupport; +import org.apache.solr.common.SolrInputDocument; +import org.apache.spark.rdd.RDD; + +public class DHPSolrSupport { + + static public void indexDocs(String zkhost, String collection, int batchSize, RDD docs) { + SolrSupport.indexDocs(zkhost, collection, batchSize, com.lucidworks.spark.BatchSizeType.NUM_DOCS, docs); + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java index c22a24185..7cdaf36af 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java @@ -67,7 +67,7 @@ public class PrepareRelationsJobTest { @Test void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { - final int maxRelations = 20; + final int maxRelations = 5; PrepareRelationsJob .main( new String[] { @@ -86,7 +86,7 @@ public class PrepareRelationsJobTest { .as(Encoders.bean(Relation.class)) .cache(); - assertEquals(maxRelations, out.count()); + assertEquals(44, out.count()); Dataset freq = out .toDF() @@ -101,12 +101,8 @@ public class PrepareRelationsJobTest { long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count"); assertEquals(outcome, participation); - assertTrue(outcome > affiliation); - assertTrue(participation > affiliation); - - assertEquals(7, outcome); - assertEquals(7, participation); - assertEquals(6, affiliation); + assertEquals(outcome, affiliation); + assertEquals(4, affiliation); } protected List getRows(Dataset freq, String col) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index 9d5bff3cf..3834f530e 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -4,16 +4,20 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import org.apache.solr.client.solrj.request.SolrPing; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; class SolrAdminApplicationTest extends SolrTest { @Test void testPing() throws Exception { - SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); + final SolrPing ping = new SolrPing(); + ping.getParams().set("collection", ProvisionConstants.SHADOW_ALIAS_NAME); + SolrPingResponse pingResponse = ping.process(miniCluster.getSolrClient()); + log.info("pingResponse: '{}'", pingResponse.getStatus()); assertEquals(0, pingResponse.getStatus()); } @@ -24,7 +28,7 @@ class SolrAdminApplicationTest extends SolrTest { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin - .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false); + .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, "*:*", false, null, SHADOW_COLLECTION); assertEquals(0, rsp.getStatus()); } @@ -34,9 +38,30 @@ class SolrAdminApplicationTest extends SolrTest { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); - UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION); + UpdateResponse rsp = (UpdateResponse) admin.commit(SHADOW_COLLECTION); assertEquals(0, rsp.getStatus()); } + @Test + void testAdminApplication_CREATE_ALIAS() throws Exception { + + SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); + + CollectionAdminResponse rsp = (CollectionAdminResponse) admin + .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, SHADOW_COLLECTION); + assertEquals(0, rsp.getStatus()); + + } + + @Test + void testAdminApplication_DELETE_ALIAS() throws Exception { + + SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); + + CollectionAdminResponse rsp = (CollectionAdminResponse) admin.deleteAlias(ProvisionConstants.PUBLIC_ALIAS_NAME); + assertEquals(0, rsp.getStatus()); + + } + } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java index 424262eef..90aef5adc 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java @@ -1,21 +1,40 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; import java.io.IOException; import java.net.URI; +import java.nio.file.Path; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.embedded.JettyConfig; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.ConfigSetAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CoreAdminParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; import org.apache.spark.SparkConf; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; @@ -23,7 +42,18 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class SolrConfigExploreTest extends SolrExploreTest { +public class SolrConfigExploreTest { + + protected static final Logger log = LoggerFactory.getLogger(SolrConfigExploreTest.class); + + protected static final String SHADOW_FORMAT = "c1"; + protected static final String SHADOW_COLLECTION = SHADOW_FORMAT + "-index-openaire"; + protected static final String PUBLIC_FORMAT = "c2"; + protected static final String PUBLIC_COLLECTION = PUBLIC_FORMAT + "-index-openaire"; + + protected static final String CONFIG_NAME = "testConfig"; + + protected static SolrAdminApplication admin; protected static SparkSession spark; @@ -35,15 +65,17 @@ public class SolrConfigExploreTest extends SolrExploreTest { @Mock private ISLookupClient isLookupClient; + @TempDir + public static Path workingDir; + + protected static MiniSolrCloudCluster miniCluster; + @BeforeEach public void prepareMocks() throws ISLookUpException, IOException { isLookupClient.setIsLookup(isLookUpService); int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); Mockito .when(isLookupClient.getLayoutSource(Mockito.anyString())) @@ -54,7 +86,7 @@ public class SolrConfigExploreTest extends SolrExploreTest { } @BeforeAll - public static void before() { + public static void setup() throws Exception { SparkConf conf = new SparkConf(); conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); @@ -70,15 +102,75 @@ public class SolrConfigExploreTest extends SolrExploreTest { spark = SparkSession .builder() - .appName(XmlIndexingJobTest.class.getSimpleName()) + .appName(SolrConfigExploreTest.class.getSimpleName()) .config(conf) .getOrCreate(); + // random unassigned HTTP port + final int jettyPort = 0; + final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build(); + + log.info(String.format("working directory: %s", workingDir.toString())); + System.setProperty("solr.log.dir", workingDir.resolve("logs").toString()); + + // create a MiniSolrCloudCluster instance + miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig); + + // Upload Solr configuration directory to ZooKeeper + String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig"; + File configDir = new File(solrZKConfigDir); + + miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME); + + // override settings in the solrconfig include + System.setProperty("solr.tests.maxBufferedDocs", "100000"); + System.setProperty("solr.tests.maxIndexingThreads", "-1"); + System.setProperty("solr.tests.ramBufferSizeMB", "100"); + + // use non-test classes so RandomizedRunner isn't necessary + System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); + System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory"); + System.setProperty("solr.lock.type", "single"); + + log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); + + NamedList res = createCollection( + miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); + + // miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION); + + res = createCollection( + miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); + + admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress()); + CollectionAdminResponse rsp = (CollectionAdminResponse) admin + .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); + assertEquals(0, rsp.getStatus()); + rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION); + assertEquals(0, rsp.getStatus()); + + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); + } @AfterAll - public static void tearDown() { + public static void tearDown() throws Exception { spark.stop(); + miniCluster.shutdown(); + FileUtils.deleteDirectory(workingDir.toFile()); } @Test @@ -86,8 +178,10 @@ public class SolrConfigExploreTest extends SolrExploreTest { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) + .run(isLookupClient); + Assertions + .assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); String[] queryStrings = { "cancer", @@ -101,14 +195,14 @@ public class SolrConfigExploreTest extends SolrExploreTest { query.add(CommonParams.Q, q); query.set("debugQuery", "on"); - log.info("Submit query to Solr with params: {}", query.toString()); - QueryResponse rsp = miniCluster.getSolrClient().query(query); + log.info("Submit query to Solr with params: {}", query); + QueryResponse rsp = miniCluster.getSolrClient().query(ProvisionConstants.SHADOW_ALIAS_NAME, query); // System.out.println(rsp.getHighlighting()); // System.out.println(rsp.getExplainMap()); for (SolrDocument doc : rsp.getResults()) { - System.out - .println( + log + .info( doc.get("score") + "\t" + doc.get("__indexrecordidentifier") + "\t" + doc.get("resultidentifier") + "\t" + @@ -122,4 +216,18 @@ public class SolrConfigExploreTest extends SolrExploreTest { } } } + + protected static NamedList createCollection(CloudSolrClient client, String name, int numShards, + int replicationFactor, int maxShardsPerNode, String configName) throws Exception { + ModifiableSolrParams modParams = new ModifiableSolrParams(); + modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); + modParams.set("name", name); + modParams.set("numShards", numShards); + modParams.set("replicationFactor", replicationFactor); + modParams.set("collection.configName", configName); + modParams.set("maxShardsPerNode", maxShardsPerNode); + QueryRequest request = new QueryRequest(modParams); + request.setPath("/admin/collections"); + return client.request(request); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java index 625b6d131..c04fa1cc6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java @@ -2,24 +2,15 @@ package eu.dnetlib.dhp.oa.provision; import java.io.IOException; -import java.io.StringReader; import java.net.URI; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.Text; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrInputField; import org.apache.solr.common.params.CommonParams; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; -import org.dom4j.io.SAXReader; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -50,9 +41,6 @@ public class SolrConfigTest extends SolrTest { int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); Mockito .when(isLookupClient.getLayoutSource(Mockito.anyString())) @@ -95,9 +83,10 @@ public class SolrConfigTest extends SolrTest { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize) + new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) .run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + Assertions + .assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); String[] queryStrings = { "cancer", @@ -109,8 +98,8 @@ public class SolrConfigTest extends SolrTest { SolrQuery query = new SolrQuery(); query.add(CommonParams.Q, q); - log.info("Submit query to Solr with params: {}", query.toString()); - QueryResponse rsp = miniCluster.getSolrClient().query(query); + log.info("Submit query to Solr with params: {}", query); + QueryResponse rsp = miniCluster.getSolrClient().query(ProvisionConstants.SHADOW_ALIAS_NAME, query); for (SolrDocument doc : rsp.getResults()) { System.out diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java index 34a9465a7..5b5e42fbd 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java @@ -34,58 +34,6 @@ public abstract class SolrExploreTest { @TempDir public static Path workingDir; - @BeforeAll - public static void setup() throws Exception { - - // random unassigned HTTP port - final int jettyPort = 0; - final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build(); - - log.info(String.format("working directory: %s", workingDir.toString())); - System.setProperty("solr.log.dir", workingDir.resolve("logs").toString()); - - // create a MiniSolrCloudCluster instance - miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig); - - // Upload Solr configuration directory to ZooKeeper - String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig"; - File configDir = new File(solrZKConfigDir); - - miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME); - - // override settings in the solrconfig include - System.setProperty("solr.tests.maxBufferedDocs", "100000"); - System.setProperty("solr.tests.maxIndexingThreads", "-1"); - System.setProperty("solr.tests.ramBufferSizeMB", "100"); - - // use non-test classes so RandomizedRunner isn't necessary - System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); - System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory"); - System.setProperty("solr.lock.type", "single"); - - log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); - log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); - - NamedList res = createCollection( - miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME); - res.forEach(o -> log.info(o.toString())); - - miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION); - - log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); - - } - @AfterAll public static void shutDown() throws Exception { miniCluster.shutdown(); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java index c2cd3497a..b96ab8481 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJobTest.java @@ -91,9 +91,6 @@ class SolrRecordDumpJobTest { public void prepareMocks() throws ISLookUpException, IOException { isLookupClient.setIsLookup(isLookUpService); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); Mockito .when(isLookupClient.getLayoutSource(Mockito.anyString())) .thenReturn(IOUtils.toString(getClass().getResourceAsStream("fields.xml"))); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java index 79527b891..2caf09799 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.File; import java.nio.file.Path; @@ -10,6 +12,7 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.ConfigSetAdminRequest; import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CoreAdminParams; @@ -21,14 +24,21 @@ import org.junit.jupiter.api.io.TempDir; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import sun.security.provider.SHA; + public abstract class SolrTest { protected static final Logger log = LoggerFactory.getLogger(SolrTest.class); - protected static final String FORMAT = "test"; - protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire"; + protected static final String SHADOW_FORMAT = "c1"; + protected static final String SHADOW_COLLECTION = SHADOW_FORMAT + "-index-openaire"; + protected static final String PUBLIC_FORMAT = "c2"; + protected static final String PUBLIC_COLLECTION = PUBLIC_FORMAT + "-index-openaire"; + protected static final String CONFIG_NAME = "testConfig"; + protected static SolrAdminApplication admin; + protected static MiniSolrCloudCluster miniCluster; @TempDir @@ -72,10 +82,21 @@ public abstract class SolrTest { .toString()); NamedList res = createCollection( - miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME); + miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); res.forEach(o -> log.info(o.toString())); - miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION); + // miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION); + + res = createCollection( + miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); + + admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress()); + CollectionAdminResponse rsp = (CollectionAdminResponse) admin + .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); + assertEquals(0, rsp.getStatus()); + rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION); + assertEquals(0, rsp.getStatus()); log .info( @@ -83,12 +104,12 @@ public abstract class SolrTest { .getClusterStatus() .process(miniCluster.getSolrClient()) .toString()); - } @AfterAll public static void shutDown() throws Exception { miniCluster.shutdown(); + admin.close(); FileUtils.deleteDirectory(workingDir.toFile()); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index ee2f5d349..8149c2526 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -10,6 +10,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.params.CommonParams; import org.apache.spark.SparkConf; @@ -50,9 +51,6 @@ public class XmlIndexingJobTest extends SolrTest { int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); Mockito .when(isLookupClient.getLayoutSource(Mockito.anyString())) @@ -103,37 +101,62 @@ public class XmlIndexingJobTest extends SolrTest { long nRecord = records.count(); - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient); + new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) + .run(isLookupClient); - assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + assertEquals(0, miniCluster.getSolrClient().commit(SHADOW_COLLECTION).getStatus()); - QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*")); + QueryResponse rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "*:*")); assertEquals( nRecord, rsp.getResults().getNumFound(), "the number of indexed records should be equal to the number of input records"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "isgreen:true")); assertEquals( 4, rsp.getResults().getNumFound(), "the number of indexed records having isgreen = true"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); assertEquals( 2, rsp.getResults().getNumFound(), "the number of indexed records having openaccesscolor = bronze"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); assertEquals( 0, rsp.getResults().getNumFound(), "the number of indexed records having isindiamondjournal = true"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); assertEquals( 0, rsp.getResults().getNumFound(), "the number of indexed records having publiclyfunded = true"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); assertEquals( 35, rsp.getResults().getNumFound(), "the number of indexed records having peerreviewed = true"); @@ -141,6 +164,7 @@ public class XmlIndexingJobTest extends SolrTest { rsp = miniCluster .getSolrClient() .query( + ProvisionConstants.SHADOW_ALIAS_NAME, new SolrQuery() .add(CommonParams.Q, "objidentifier:\"57a035e5b1ae::236d6d8c1e03368b5ae72acfeeb11bbc\"") .add(CommonParams.FL, "__json")); @@ -158,6 +182,22 @@ public class XmlIndexingJobTest extends SolrTest { log.info((String) json.get()); + admin + .execute( + SolrAdminApplication.Action.UPDATE_ALIASES, null, false, + SHADOW_COLLECTION, PUBLIC_COLLECTION); + + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.PUBLIC_ALIAS_NAME, + new SolrQuery() + .add(CommonParams.Q, "objidentifier:\"57a035e5b1ae::236d6d8c1e03368b5ae72acfeeb11bbc\"") + .add(CommonParams.FL, "__json")); + + assertEquals( + 1, rsp.getResults().getNumFound(), + "the number of indexed records having the given identifier, found in the public collection"); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 055156a32..d617991a1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -9,7 +9,6 @@ import java.io.StringReader; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.commons.math3.optim.AbstractOptimizationProblem; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; @@ -52,12 +51,7 @@ public class XmlRecordFactoryTest { assertNotNull(doc); - System.out.println(doc.asXML()); - - assertEquals("10", doc.valueOf("//measure[@id = 'downloads']/@count")); - assertEquals("fakeds", doc.valueOf("//measure[@id = 'downloads']/@datasource")); - - assertEquals(0, doc.selectNodes("//measure[@id = 'views']").size()); + // System.out.println(doc.asXML()); assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.1098-rsta.2020.0257.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.1098-rsta.2020.0257.xml deleted file mode 100644 index 648a59a7c..000000000 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.1098-rsta.2020.0257.xml +++ /dev/null @@ -1,586 +0,0 @@ - - -
- doi_dedup___::e225555a08a082ad8f53f179bc59c5d0 - 2023-01-27T05:32:10Z -
- - - - - - - - - - - - 10.1098/rsta.2020.0257 - 50|doiboost____::e225555a08a082ad8f53f179bc59c5d0 - 3211056089 - 50|od______1064::83eb0f76b60445d72bb7428a1b68ef1a - oai:ora.ox.ac.uk:uuid:9fc4563a-07e1-41d1-8b99-31ce2f8ac027 - 50|od_______267::6d978e42c57dfc79d61a84ab5be28cb8 - oai:pubmedcentral.nih.gov:8543046 - od_______267::6d978e42c57dfc79d61a84ab5be28cb8 - 34689630 - PMC8543046 - 10.1098/rsta.2020.0257 - - PMC8543046 - - 34689630 - - - - - - - - - A completely automated pipeline for 3D reconstruction of - human heart from 2D cine magnetic resonance slices. - - A completely automated pipeline for 3D reconstruction of - human heart from 2D cine magnetic resonance slices - - - Vicente Grau - Abhirup Banerjee - - Ernesto Zacur - Robin P. Choudhury - - Blanca Rodriguez - - Julia Camps - Yoram Rudy - Christopher M. Andrews - - - 2021-10-01 - Cardiac magnetic resonance (CMR) imaging is a valuable modality in the diagnosis and - characterization of cardiovascular diseases, since it can identify abnormalities in structure - and function of the myocardium non-invasively and without the need for ionizing radiation. - However, in clinical practice, it is commonly acquired as a collection of separated and - independent 2D image planes, which limits its accuracy in 3D analysis. This paper presents a - completely automated pipeline for generating patient-specific 3D biventricular heart models from - cine magnetic resonance (MR) slices. Our pipeline automatically selects the relevant cine MR - images, segments them using a deep learning-based method to extract the heart contours, and - aligns the contours in 3D space correcting possible misalignments due to breathing or subject - motion first using the intensity and contours information from the cine data and next with the - help of a statistical shape model. Finally, the sparse 3D representation of the contours is used - to generate a smooth 3D biventricular mesh. The computational pipeline is applied and evaluated - in a CMR dataset of 20 healthy subjects. Our results show an average reduction of misalignment - artefacts from 1.82 ± 1.60 mm to 0.72 ± 0.73 mm over 20 subjects, in terms of distance from the - final reconstructed mesh. The high-resolution 3D biventricular meshes obtained with our - computational pipeline are used for simulations of electrical activation patterns, showing - agreement with non-invasive electrocardiographic imaging. The automatic methodologies presented - here for patient-specific MR imaging-based 3D biventricular representations contribute to the - efficient realization of precision medicine, enabling the enhanced interpretability of clinical - data, the digital twin vision through patient-specific image-based modelling and simulation, and - augmented reality applications. - This article is part of the theme issue ‘Advanced computation in cardiovascular physiology: new - challenges and opportunities’. - - General Physics and Astronomy - - General Engineering - - General Mathematics - - Pipeline (computing) - - Cine mri - - Structure and function - - Cardiac magnetic resonance - - Magnetic resonance imaging - - medicine.diagnostic_test - - medicine - - Human heart - - Modality (human–computer interaction) - - 3D reconstruction - - Computer science - - Nuclear magnetic resonance - - 3. Good health - - 03 medical and health sciences - - 0302 clinical medicine - - 030218 Nuclear Medicine & - Medical Imaging - - 03021801 Radiology/Image - segmentation - - 03021801 Radiology/Image - segmentation - deep learning/datum - - 030204 Cardiovascular System - & Hematology - - 03020401 Aging-associated - diseases/Heart diseases - - 030217 Neurology & - Neurosurgery - - 03021701 Brain/Neural circuits - - - Articles - - Research Articles - - cardiac mesh reconstruction - - cine MRI - - misalignment correction - - electrophysiological - simulation - - ECGI - - Heart - - Humans - - Imaging, Three-Dimensional - - Magnetic Resonance Imaging - - Magnetic Resonance Imaging, Cine - - Magnetic Resonance Spectroscopy - - - 2021-10-25 - - 2021-10-25 - - 2021-12-13 - - 2021-01-01 - - 2023-01-05 - - 2021-05-28 - - 2023-01-05 - - The Royal Society - Crossref - - Philosophical transactions. Series A, Mathematical, physical, and engineering sciences - - - - Philosophical Transactions of the Royal - Society A: Mathematical, Physical and Engineering Sciences - - - - - - - - - true - false - 0.8 - dedup-result-decisiontree-v3 - - - - - openorgs____::6a7b1b4c40a067a1f209de6867fe094d - - - University of Oxford - University of Oxford - - - - doi_dedup___::015b27b0b7c55649236bf23a5c75f817 - - 10.6084/m9.figshare.15656924.v2 - - 2021-01-01 - Implementation Details of the Reconstruction - Pipeline and Electrophysiological Inference Results from A completely automated pipeline - for 3D reconstruction of human heart from 2D cine magnetic resonance slices - - The Royal Society - - 10.6084/m9.figshare.15656924 - - - - - corda__h2020::27f89b49dee12d828cc0f90f51727204 - - 823712 - - - ec__________::EC::H2020 - ec__________::EC::H2020::RIA - - CompBioMed2 - A Centre of Excellence in Computational Biomedicine - - - - doi_dedup___::be1ef3b30a8d7aa7e4dfe1570d5febf7 - - 2021-01-01 - 10.6084/m9.figshare.15656927 - - The Royal Society - 10.6084/m9.figshare.15656927.v1 - - - Montage Video of the Stepwise Performance of 3D - Reconstruction Pipeline on All 20 Patients from A completely automated pipeline for 3D - reconstruction of human heart from 2D cine magnetic resonance slices - - - - - doi_________::9f9f2328e11d379b14cb888209e33088 - - 2021-01-01 - 10.6084/m9.figshare.15656924.v1 - - Implementation Details of the Reconstruction - Pipeline and Electrophysiological Inference Results from A completely automated pipeline - for 3D reconstruction of human heart from 2D cine magnetic resonance slices - - The Royal Society - - - - - - 2021-10-01 - 34689630 - - - 34689630 - - The Royal Society - - PMC8543046 - - A completely automated - pipeline for 3D reconstruction of human heart from 2D cine magnetic resonance slices - - PMC8543046 - - - - 2023-01-05 - Royal Society - - A completely automated - pipeline for 3D reconstruction of human heart from 2D cine magnetic resonance slices - - - - 2021-10-25 - The Royal Society - - A completely - automated pipeline for 3D reconstruction of human heart from 2D cine magnetic resonance - slices. - - - - - 10.1098/rsta.2020.0257 - - - - - - - 2023-01-05 - - 34689630 - - - 10.1098/rsta.2020.0257 - - - http://creativecommons.org/licenses/by/4.0/ - - https://ora.ox.ac.uk/objects/uuid:9fc4563a-07e1-41d1-8b99-31ce2f8ac027 - - - - - - - - 10.1098/rsta.2020.0257 - - - - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8543046 - - - - - - - 2021-10-25 - - 10.1098/rsta.2020.0257 - - - https://royalsociety.org/journals/ethics-policies/data-sharing-mining/ - - https://doi.org/10.1098/rsta.2020.0257 - - - - - - - 2021-10-25 - - 34689630 - - PMC8543046 - - - 10.1098/rsta.2020.0257 - - - - https://pubmed.ncbi.nlm.nih.gov/34689630 - - - - - - - 2021-10-01 - - 34689630 - - PMC8543046 - - - 10.1098/rsta.2020.0257 - - - - http://europepmc.org/articles/PMC8543046 - - - - - - -
-
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.2196-33081-ethics.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.2196-33081-ethics.xml deleted file mode 100644 index 4c4443447..000000000 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.2196-33081-ethics.xml +++ /dev/null @@ -1,238 +0,0 @@ - - -
- doi_dedup___::88a9861b26cdda1c3dd162d11f0bedbe - 2023-03-13T00:12:27+0000 - 2023-03-13T04:39:52.807Z -
- - - - - - - - - - - - oai:pure.eur.nl:publications/70bf9bd0-5ea6-45fd-bb62-8d79b48cd69f - 50|narcis______::3df5b8b060b819af0d439dd6751c8a77 - 10.2196/33081 - 50|doiboost____::88a9861b26cdda1c3dd162d11f0bedbe - 3212148341 - od_______267::276eb3ebee07cf1f3e8bfc43926fd0c2 - 35099399 - PMC8844982 - oai:services.nod.dans.knaw.nl:Publications/eur:oai:pure.eur.nl:publications/70bf9bd0-5ea6-45fd-bb62-8d79b48cd69f - 50|dris___00893::107e97e645cbb06fb7b454ce2569d6c2 - 10.2196/33081 - 35099399 - PMC8844982 - - - - - - Mapping the Ethical Issues of Digital Twins for Personalised Healthcare Service (Preprint) - Preliminary Mapping Study - Ethical Issues of Digital Twins for Personalized Health Care Service: Preliminary Mapping Study - - Ki-hun Kim - Pei-hua Huang - Maartje Schermer - Public Health - - 2022-01-01 - Background: The concept of digital twins has great potential for transforming the existing health care system by making it more personalized. As a convergence of health care, artificial intelligence, and information and communication technologies, personalized health care services that are developed under the concept of digital twins raise a myriad of ethical issues. Although some of the ethical issues are known to researchers working on digital health and personalized medicine, currently, there is no comprehensive review that maps the major ethical risks of digital twins for personalized health care services. Objective This study aims to fill the research gap by identifying the major ethical risks of digital twins for personalized health care services. We first propose a working definition for digital twins for personalized health care services to facilitate future discussions on the ethical issues related to these emerging digital health services. We then develop a process-oriented ethical map to identify the major ethical risks in each of the different data processing phases. MethodsWe resorted to the literature on eHealth, personalized medicine, precision medicine, and information engineering to identify potential issues and developed a process-oriented ethical map to structure the inquiry in a more systematic way. The ethical map allows us to see how each of the major ethical concerns emerges during the process of transforming raw data into valuable information. Developers of a digital twin for personalized health care service may use this map to identify ethical risks during the development stage in a more systematic way and can proactively address them. ResultsThis paper provides a working definition of digital twins for personalized health care services by identifying 3 features that distinguish the new application from other eHealth services. On the basis of the working definition, this paper further layouts 10 major operational problems and the corresponding ethical risks. ConclusionsIt is challenging to address all the major ethical risks that a digital twin for a personalized health care service might encounter proactively without a conceptual map at hand. The process-oriented ethical map we propose here can assist the developers of digital twins for personalized health care services in analyzing ethical risks in a more systematic manner. - education - Health Informatics - Ethical issues - Healthcare service - Psychology - Internet privacy - business.industry - business - Preprint - Artificial Intelligence - Delivery of Health Care - Health Services - Humans - Precision Medicine - Telemedicine - 03 medical and health sciences - 0302 clinical medicine - - 030212 General & Internal Medicine - - 03021201 Health care/Health care quality - datum/health care - - 03021201 Health care/Health care quality - datum/electronic health - - 0301 basic medicine - - 030104 Developmental Biology - - 0303 health sciences - - 030304 Developmental Biology - - 030304 Developmental Biology - datum/datum management/ethical - - 06 humanities and the arts - 0603 philosophy, ethics and religion - - 060301 Applied Ethics - - 06030101 Bioethics/Coordinates on Wikidata - intelligence/artificial intelligence/ethical - - 06030101 Bioethics/Coordinates on Wikidata - datum/ethical - - - - 2021-11-17 - 2022-01-31 - 2022-01-01 - Crossref - - Journal of Medical Internet Research, 24(1):e33081. Journal of medical Internet Research - urn:issn:1439-4456 - VOLUME=24;ISSUE=1;ISSN=1439-4456;TITLE=Journal of Medical Internet Research - application/pdf - - - - - - true - false - 0.8 - dedup-result-decisiontree-v3 - - - - - - Ethical Issues of Digital Twins for Personalized Health Care Service: Preliminary Mapping Study - - 2022-01-01 - - - PMC8844982 - - 2021-08-23 - Ethical Issues of Digital Twins for Personalized Health Care Service: Preliminary Mapping Study. - 35099399 - - - Preliminary Mapping Study - 2022-01-01 - - - - 2022-01-31 - Mapping the Ethical Issues of Digital Twins for Personalised Healthcare Service (Preprint) - - - - - 10.2196/33081 - JMIR Publications Inc. - - - - - - 2021-08-23 - - 35099399 - PMC8844982 - 10.2196/33081 - - - https://pubmed.ncbi.nlm.nih.gov/35099399 - - - - - - - - 2022-01-01 - 2022-01-31 - - 10.2196/33081 - 10.2196/33081 - urn:nbn:nl:ui:15-70bf9bd0-5ea6-45fd-bb62-8d79b48cd69f - - - https://doi.org/10.2196/33081 - - - - - - - 2022-01-01 - - 10.2196/33081 - urn:nbn:nl:ui:15-70bf9bd0-5ea6-45fd-bb62-8d79b48cd69f - - - https://pure.eur.nl/en/publications/70bf9bd0-5ea6-45fd-bb62-8d79b48cd69f - - - - - - -
-
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.3390-pr9111967-covid.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.3390-pr9111967-covid.xml deleted file mode 100644 index 6287f90ee..000000000 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/edith-demo/10.3390-pr9111967-covid.xml +++ /dev/null @@ -1,223 +0,0 @@ - - -
- doi_________::c166d06aeaed817937a79a400906a4b9 - 2023-03-09T00:12:02.045Z - 2023-03-09T00:24:00.8Z -
- - - - - - - - - 50|openapc_____::c166d06aeaed817937a79a400906a4b9 - 10.3390/pr9111967 - pr9111967 - 50|doiboost____::c166d06aeaed817937a79a400906a4b9 - 3209532762 - 10.3390/pr9111967 - - - - - - - Digital Twins for Continuous mRNA Production - - - 2021-11-04 - The global coronavirus pandemic continues to restrict public life worldwide. An - effective means of limiting the pandemic is vaccination. Messenger ribonucleic acid (mRNA) - vaccines currently available on the market have proven to be a well-tolerated and effective - class of vaccine against coronavirus type 2 (CoV2). Accordingly, demand is presently - outstripping mRNA vaccine production. One way to increase productivity is to switch from the - currently performed batch to continuous in vitro transcription, which has proven to be a crucial - material-consuming step. In this article, a physico-chemical model of in vitro mRNA - transcription in a tubular reactor is presented and compared to classical batch and continuous - in vitro transcription in a stirred tank. The three models are validated based on a distinct and - quantitative validation workflow. Statistically significant parameters are identified as part of - the parameter determination concept. Monte Carlo simulations showed that the model is precise, - with a deviation of less than 1%. The advantages of continuous production are pointed out - compared to batchwise in vitro transcription by optimization of the space–time yield. - Improvements of a factor of 56 (0.011 µM/min) in the case of the continuously stirred tank - reactor (CSTR) and 68 (0.013 µM/min) in the case of the plug flow reactor (PFR) were found. - - Process Chemistry and Technology - - Chemical Engineering (miscellaneous) - - Bioengineering - - Coronavirus - - medicine.disease_cause - - medicine - - Continuous production - - Messenger RNA - - Continuous stirred-tank reactor - - Plug flow reactor model - - Mathematics - - In vitro transcription - - Biological system - - Yield (chemistry) - - Public life - - 03 medical and health sciences - - 0301 basic medicine - - 030104 Developmental Biology - - 0303 health sciences - - 030304 Developmental Biology - - 02 engineering and technology - - 0210 nano-technology - - 021001 Nanoscience & - Nanotechnology - - 01 natural sciences - - 0104 chemical sciences - - 010405 Organic Chemistry - - - 2021-11-05 - - 2021-11-04 - - Crossref - - - - 2146.08 - EUR - Processes - - - - false - false - 0.9 - null - - - - - - - - - 2021-11-04 - - 10.3390/pr9111967 - - - https://creativecommons.org/licenses/by/4.0/ - - https://doi.org/10.3390/pr9111967 - - - - - - -
-
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/sentinel.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/sentinel.xml deleted file mode 100644 index 475a375d3..000000000 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/sentinel.xml +++ /dev/null @@ -1,138 +0,0 @@ - - -
- doi_dedup___::10a910f4a66b7f4bce8407d7a486a80a - 2023-04-05T00:36:27+0000 - 2023-04-05T07:33:52.185Z -
- - - - - - 50|datacite____::10a910f4a66b7f4bce8407d7a486a80a - 10.5281/zenodo.6967373 - 50|datacite____::172969c66c312a9656fc745f0ec62ce5 - 10.5281/zenodo.6969999 - 50|datacite____::4fa8f1c89ff11e8e99f9ded870ade80d - 10.5281/zenodo.6967372 - 50|datacite____::a466b6173773d742b7a5881682748a8c - 10.5281/zenodo.6970067 - 10.5281/zenodo.6967373 - 10.5281/zenodo.6969999 - 10.5281/zenodo.6967372 - 10.5281/zenodo.6970067 - Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia - - Marasco Pier Lorenzo - 2022-08-05 - Sentinel-3 NDVI Analysis Ready Data (ARD) (C_GLS_NDVI_20220101_20220701_Lombardia_S3_2.nc) product provided by the Copernicus Global Land Service [3]. The file C_GLS_NDVI_20220101_20220701_Lombardia_S3_2_masked.nc is derived from C_GLS_NDVI_20220101_20220701_Lombardia_S3_2.nc but values have been scaled (raw_value * ( 1/250) - 0.08) and values lower then -0.08 and greater than 0.92 have been removed (set to missing values). The original dataset can also be discovered through the OpenEO API[5] from the CGLS distributor VITO [4]. Access is free of charge but an EGI registration is needed. The file called Italy.geojson has been created using the Global Administrative Unit Layers GAUL G2015_2014 provided by FAO-UN (see Documentation). It only contains information related to Italy. Further info about drought indexes can be found in the Integrated Drought Management Programme [5] [1] Application of vegetation index and brightness temperature for drought detection [2] NDVI [3] Copernicus Global Land Service [4] Vito [5] OpenEO [5] Integrated Drought Management - These datasets are used for training purposes. See https://pangeo-data.github.io/foss4g-2022/intro.html - NDVI - vegetaion - Copernicus Global Land Service - pangeo - - 2022-08-05 - Zenodo - - - - - true - false - 0.8 - dedup-result-decisiontree-v3 - - - - - - Zenodo - 10.5281/zenodo.6967372 - 2022-08-05 - - Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia - - - Zenodo - 10.5281/zenodo.6970067 - 2022-08-05 - - Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia - - - Zenodo - 2022-08-05 - 10.5281/zenodo.6969999 - - Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia - - - Zenodo - 2022-08-05 - - Sentinel-3 NDVI ARD and Long Term Statistics (1999-2019) from the Copernicus Global Land Service over Lombardia - 10.5281/zenodo.6967373 - - - - - - 2022-08-05 - - 10.5281/zenodo.6967373 - - https://creativecommons.org/licenses/by/4.0/legalcode - - https://doi.org/10.5281/zenodo.6967373 - - - - - - - 2022-08-05 - - 10.5281/zenodo.6970067 - - https://creativecommons.org/licenses/by/4.0/legalcode - - https://doi.org/10.5281/zenodo.6970067 - - - - - - - 2022-08-05 - - 10.5281/zenodo.6969999 - - https://creativecommons.org/licenses/by/4.0/legalcode - - https://doi.org/10.5281/zenodo.6969999 - - - - - - - 2022-08-05 - - 10.5281/zenodo.6967372 - - https://creativecommons.org/licenses/by/4.0/legalcode - - https://doi.org/10.5281/zenodo.6967372 - - - - - - -
-
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index ba3fa83df..8567acf85 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -68,15 +68,12 @@ - - - - + @@ -93,6 +90,7 @@ + @@ -122,6 +120,7 @@ + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json index f6c3f0324..a89ec62d5 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/publication.json @@ -1,33 +1,13 @@ { "eoscifguidelines": [ { - "code": "EOSC::Jupyter Notebook", - "label": "EOSC::Jupyter Notebook", - "url": "", - "semanticRelation": "compliesWith" + "code" : "EOSC::Jupyter Notebook", + "label" : "EOSC::Jupyter Notebook", + "url" : "", + "semanticRelation" : "compliesWith" } ], "measures": [ - { - "id": "downloads", - "unit": [ - { - "key": "fakeds", - "value": "10", - "dataInfo": null - } - ] - }, - { - "id": "views", - "unit": [ - { - "key": "fakedss", - "value": "0", - "dataInfo": null - } - ] - }, { "id": "influence", "unit": [ @@ -451,26 +431,7 @@ "value": "VIRTA" } ], - "context": [ - { - "id": "eosc", - "dataInfo": [ - { - "invisible": false, - "inferred": false, - "deletedbyinference": false, - "trust": "0.9", - "inferenceprovenance": "", - "provenanceaction": { - "classid": "sysimport:crosswalk", - "classname": "sysimport:crosswalk", - "schemeid": "dnet:provenanceActions", - "schemename": "dnet:provenanceActions" - } - } - ] - } - ], + "context": [], "contributor": [], "country": [], "coverage": [], @@ -525,13 +486,13 @@ "extraInfo": [], "format": [], "fulltext": [ - {"value": "https://osf.io/preprints/socarxiv/7vgtu/download"}, - {"value": "https://osf.io/preprints/socarxiv/7vgtu/download2"} + { "value" : "https://osf.io/preprints/socarxiv/7vgtu/download" }, + { "value" : "https://osf.io/preprints/socarxiv/7vgtu/download2" } ], "id": "50|CSC_________::0000ec4dd9df012feaafa77e71a0fb4c", "instance": [ { - "fulltext": "https://osf.io/preprints/socarxiv/7vgtu/download", + "fulltext" : "https://osf.io/preprints/socarxiv/7vgtu/download", "measures": [ { "id": "influence", @@ -559,24 +520,6 @@ "value": "4.62970429725e-08" } ] - }, - { - "id": "downloads", - "unit": [ - { - "key": "fakeds", - "value": "10" - } - ] - }, - { - "id": "views", - "unit": [ - { - "key": "fakeds", - "value": "10" - } - ] } ], "pid": [ @@ -712,7 +655,9 @@ }, "value": "" }, - "url": ["http://dx.doi.org/10.1109/TED.2018.2853550"] + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853550" + ] }, { "pid": [ @@ -848,7 +793,9 @@ }, "value": "CC-BY" }, - "url": ["http://dx.doi.org/10.1109/TED.2018.2853550"] + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853550" + ] }, { "pid": [ @@ -984,7 +931,9 @@ }, "value": "" }, - "url": ["http://dx.doi.org/10.1109/TED.2018.2853551"] + "url": [ + "http://dx.doi.org/10.1109/TED.2018.2853551" + ] }, { "pid": [ @@ -1253,7 +1202,8 @@ }, "value": "" }, - "url": [] + "url": [ + ] }, { "pid": [ @@ -1383,7 +1333,9 @@ }, "value": "" }, - "url": [""] + "url": [ + "" + ] }, { "pid": [ @@ -1513,7 +1465,9 @@ }, "value": "" }, - "url": ["asdasd://not a URL"] + "url": [ + "asdasd://not a URL" + ] } ], "journal": { @@ -1607,9 +1561,7 @@ "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies" }, - "source": [ - {"value": "Bulletin of the National Technical University \"KhPI\" A series of \"Information and Modeling\"; № 1 (3) (2020):"} - ], + "source": [ { "value" : "Bulletin of the National Technical University \"KhPI\" A series of \"Information and Modeling\"; № 1 (3) (2020):" } ], "subject": [ { "dataInfo": { diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema index 9720d3f37..d4e1cbede 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema @@ -194,228 +194,173 @@ - - - - - - + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - + + + + + + - - - - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - + - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - + + + + + + + + \ No newline at end of file