diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java index 459ca0b50..44426e8c5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java @@ -40,9 +40,6 @@ public class SolrAdminApplication implements Closeable { final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); - final String format = parser.get("format"); - log.info("format: {}", format); - final Action action = Action.valueOf(parser.get("action")); log.info("action: {}", action); @@ -60,9 +57,6 @@ public class SolrAdminApplication implements Closeable { final String zkHost = isLookup.getZkHost(); log.info("zkHost: {}", zkHost); - final String collection = ProvisionConstants.getCollectionName(format); - log.info("collection: {}", collection); - final String publicFormat = parser.get("publicFormat"); log.info("publicFormat: {}", publicFormat); @@ -77,7 +71,7 @@ public class SolrAdminApplication implements Closeable { log.info("shadowCollection: {}", shadowCollection); try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) { - app.execute(action, collection, query, commit, publicCollection, shadowCollection); + app.execute(action, query, commit, publicCollection, shadowCollection); } } @@ -86,24 +80,24 @@ public class SolrAdminApplication implements Closeable { this.solrClient = new CloudSolrClient.Builder(zk.getHosts(), zk.getChroot()).build(); } - public SolrResponse commit(String collection) throws IOException, SolrServerException { - return execute(Action.COMMIT, collection, null, true, null, null); + public SolrResponse commit(String shadowCollection) throws IOException, SolrServerException { + return execute(Action.COMMIT, null, true, null, shadowCollection); } - public SolrResponse execute(Action action, String collection, String query, boolean commit, + public SolrResponse execute(Action action, String query, boolean commit, String publicCollection, String shadowCollection) throws IOException, SolrServerException { switch (action) { case DELETE_BY_QUERY: - UpdateResponse rsp = solrClient.deleteByQuery(collection, query); + UpdateResponse rsp = solrClient.deleteByQuery(shadowCollection, query); if (commit) { - return solrClient.commit(collection); + return solrClient.commit(shadowCollection); } return rsp; case COMMIT: - return solrClient.commit(collection); + return solrClient.commit(shadowCollection); case UPDATE_ALIASES: this.updateAliases(publicCollection, shadowCollection); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java index faa18851b..06a35eda5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrRecordDumpJob.java @@ -36,7 +36,7 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob { private final String inputPath; - private final String format; + private final String shadowFormat; private final String outputPath; @@ -61,8 +61,8 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob { final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - final String format = parser.get("format"); - log.info("format: {}", format); + final String shadowFormat = parser.get("shadowFormat"); + log.info("shadowFormat: {}", shadowFormat); final String outputPath = Optional .ofNullable(parser.get("outputPath")) @@ -95,27 +95,24 @@ public class SolrRecordDumpJob extends AbstractSolrRecordTransformJob { final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl)); - new SolrRecordDumpJob(spark, inputPath, format, outputPath).run(isLookup); + new SolrRecordDumpJob(spark, inputPath, shadowFormat, outputPath).run(isLookup); }); } - public SolrRecordDumpJob(SparkSession spark, String inputPath, String format, String outputPath) { + public SolrRecordDumpJob(SparkSession spark, String inputPath, String shadowFormat, String outputPath) { this.spark = spark; this.inputPath = inputPath; - this.format = format; + this.shadowFormat = shadowFormat; this.outputPath = outputPath; } public void run(ISLookupClient isLookup) throws ISLookUpException, TransformerException { - final String fields = isLookup.getLayoutSource(format); + final String fields = isLookup.getLayoutSource(shadowFormat); log.info("fields: {}", fields); final String xslt = isLookup.getLayoutTransformer(); - final String dsId = isLookup.getDsId(format); - log.info("dsId: {}", dsId); - - final String indexRecordXslt = getLayoutTransformer(format, fields, xslt); + final String indexRecordXslt = getLayoutTransformer(shadowFormat, fields, xslt); log.info("indexRecordTransformer {}", indexRecordXslt); final Encoder encoder = Encoders.bean(TupleWrapper.class); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index 78154e0ab..386445057 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -40,6 +40,8 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { private final String format; + private final String shadowCollection; + private final int batchSize; private final SparkSession spark; @@ -63,8 +65,11 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - final String format = parser.get("format"); - log.info("format: {}", format); + final String shadowFormat = parser.get("shadowFormat"); + log.info("shadowFormat: {}", shadowFormat); + + final String shadowCollection = ProvisionConstants.getCollectionName(shadowFormat); + log.info("shadowCollection: {}", shadowCollection); final Integer batchSize = Optional .ofNullable(parser.get("batchSize")) @@ -85,15 +90,17 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl)); - new XmlIndexingJob(spark, inputPath, format, batchSize) + new XmlIndexingJob(spark, inputPath, shadowFormat, shadowCollection, batchSize) .run(isLookup); }); } - public XmlIndexingJob(SparkSession spark, String inputPath, String format, Integer batchSize) { + public XmlIndexingJob(SparkSession spark, String inputPath, String format, String shadowCollection, + Integer batchSize) { this.spark = spark; this.inputPath = inputPath; this.format = format; + this.shadowCollection = shadowCollection; this.batchSize = batchSize; } @@ -103,12 +110,6 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { final String xslt = isLookup.getLayoutTransformer(); - final String dsId = isLookup.getDsId(format); - log.info("dsId: {}", dsId); - - final String collection = ProvisionConstants.getCollectionName(format); - log.info("collection: {}", collection); - final String zkHost = isLookup.getZkHost(); log.info("zkHost: {}", zkHost); @@ -130,7 +131,7 @@ public class XmlIndexingJob extends AbstractSolrRecordTransformJob { .javaRDD() .map( t -> new StreamingInputDocumentFactory().parseDocument(t.getXml(), t.getJson())); - DHPSolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd()); + DHPSolrSupport.indexDocs(zkHost, shadowCollection, batchSize, docs.rdd()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json index 7e5734222..3c2c1e05d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_solr_record_dump.json @@ -13,7 +13,7 @@ }, { "paramName": "f", - "paramLongName": "format", + "paramLongName": "shadowFormat", "paramDescription": "MDFormat name found in the IS profile", "paramRequired": true }, diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json index 3396020e0..c8364bb28 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_update_index.json @@ -13,8 +13,8 @@ }, { "paramName": "f", - "paramLongName": "format", - "paramDescription": "MDFormat name found in the IS profile", + "paramLongName": "shadowFormat", + "paramDescription": "MDFormat name found in the IS profile bound to the shadow index collection to feed", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json index 6e3f21ef2..23a378857 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_solradmin_parameters.json @@ -5,12 +5,6 @@ "paramDescription": "the URL to the ISLookUp Service", "paramRequired": true }, - { - "paramName": "f", - "paramLongName": "format", - "paramDescription": "metadata format profile name", - "paramRequired": true - }, { "paramName": "a", "paramLongName": "action", diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 02195c0aa..6c58d2466 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -35,9 +35,13 @@ maximum number of relations allowed for a each entity grouping by target - format + shadowFormat metadata format name (DMF|TMF) + + publicFormat + the public metadata format - used to create the public collection alias + batchSize number of records to be included in each indexing request @@ -110,14 +114,6 @@ sparkNetworkTimeout configures spark.network.timeout - - publicFormat - the public metadata format - used to create the public collection alias - - - shadowFormat - the shadow metadata format - used to create the shadow collection alias - @@ -650,8 +646,8 @@ eu.dnetlib.dhp.oa.provision.SolrAdminApplication --isLookupUrl${isLookupUrl} - --format${format} --actionDELETE_BY_QUERY + --shadowFormat${shadowFormat} --query${solrDeletionQuery} --committrue @@ -681,7 +677,7 @@ --inputPath${workingDir}/xml_json --isLookupUrl${isLookupUrl} - --format${format} + --shadowFormat${shadowFormat} --batchSize${batchSize} @@ -698,7 +694,7 @@ eu.dnetlib.dhp.oa.provision.SolrAdminApplication --isLookupUrl${isLookupUrl} - --format${format} + --shadowFormat${shadowFormat} --actionCOMMIT @@ -723,7 +719,7 @@ --inputPath${workingDir}/xml_json --isLookupUrl${isLookupUrl} - --format${format} + --shadowFormat${shadowFormat} --outputPath${workingDir}/solr_documents @@ -741,7 +737,6 @@ eu.dnetlib.dhp.oa.provision.SolrAdminApplication --isLookupUrl${isLookupUrl} - --format${format} --actionUPDATE_ALIASES --publicFormat${publicFormat} --shadowFormat${shadowFormat} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index 43e4c9ed7..3834f530e 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import org.apache.solr.client.solrj.request.SolrPing; import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; @@ -13,7 +14,10 @@ class SolrAdminApplicationTest extends SolrTest { @Test void testPing() throws Exception { - SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); + final SolrPing ping = new SolrPing(); + ping.getParams().set("collection", ProvisionConstants.SHADOW_ALIAS_NAME); + SolrPingResponse pingResponse = ping.process(miniCluster.getSolrClient()); + log.info("pingResponse: '{}'", pingResponse.getStatus()); assertEquals(0, pingResponse.getStatus()); } @@ -24,7 +28,7 @@ class SolrAdminApplicationTest extends SolrTest { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin - .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false, null, null); + .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, "*:*", false, null, SHADOW_COLLECTION); assertEquals(0, rsp.getStatus()); } @@ -34,7 +38,7 @@ class SolrAdminApplicationTest extends SolrTest { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); - UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION); + UpdateResponse rsp = (UpdateResponse) admin.commit(SHADOW_COLLECTION); assertEquals(0, rsp.getStatus()); } @@ -45,7 +49,7 @@ class SolrAdminApplicationTest extends SolrTest { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); CollectionAdminResponse rsp = (CollectionAdminResponse) admin - .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, DEFAULT_COLLECTION); + .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, SHADOW_COLLECTION); assertEquals(0, rsp.getStatus()); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java index 424262eef..41eac2a30 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigExploreTest.java @@ -1,29 +1,59 @@ package eu.dnetlib.dhp.oa.provision; +import java.io.File; import java.io.IOException; import java.net.URI; +import java.nio.file.Path; +import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.embedded.JettyConfig; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.ConfigSetAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CoreAdminParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; import org.apache.spark.SparkConf; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; -import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.jupiter.api.Assertions.assertEquals; @ExtendWith(MockitoExtension.class) -public class SolrConfigExploreTest extends SolrExploreTest { +public class SolrConfigExploreTest { + + protected static final Logger log = LoggerFactory.getLogger(SolrConfigExploreTest.class); + + protected static final String SHADOW_FORMAT = "c1"; + protected static final String SHADOW_COLLECTION = SHADOW_FORMAT + "-index-openaire"; + protected static final String PUBLIC_FORMAT = "c2"; + protected static final String PUBLIC_COLLECTION = PUBLIC_FORMAT + "-index-openaire"; + + protected static final String CONFIG_NAME = "testConfig"; + + protected static SolrAdminApplication admin; protected static SparkSession spark; @@ -35,15 +65,17 @@ public class SolrConfigExploreTest extends SolrExploreTest { @Mock private ISLookupClient isLookupClient; + @TempDir + public static Path workingDir; + + protected static MiniSolrCloudCluster miniCluster; + @BeforeEach public void prepareMocks() throws ISLookUpException, IOException { isLookupClient.setIsLookup(isLookUpService); int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); Mockito .when(isLookupClient.getLayoutSource(Mockito.anyString())) @@ -54,12 +86,12 @@ public class SolrConfigExploreTest extends SolrExploreTest { } @BeforeAll - public static void before() { + public static void setup() throws Exception { SparkConf conf = new SparkConf(); conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); conf.registerKryoClasses(new Class[] { - SerializableSolrInputDocument.class + SerializableSolrInputDocument.class }); conf.setMaster("local[1]"); @@ -69,16 +101,76 @@ public class SolrConfigExploreTest extends SolrExploreTest { conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); spark = SparkSession - .builder() - .appName(XmlIndexingJobTest.class.getSimpleName()) - .config(conf) - .getOrCreate(); + .builder() + .appName(SolrConfigExploreTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + + // random unassigned HTTP port + final int jettyPort = 0; + final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build(); + + log.info(String.format("working directory: %s", workingDir.toString())); + System.setProperty("solr.log.dir", workingDir.resolve("logs").toString()); + + // create a MiniSolrCloudCluster instance + miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig); + + // Upload Solr configuration directory to ZooKeeper + String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig"; + File configDir = new File(solrZKConfigDir); + + miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME); + + // override settings in the solrconfig include + System.setProperty("solr.tests.maxBufferedDocs", "100000"); + System.setProperty("solr.tests.maxIndexingThreads", "-1"); + System.setProperty("solr.tests.ramBufferSizeMB", "100"); + + // use non-test classes so RandomizedRunner isn't necessary + System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); + System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory"); + System.setProperty("solr.lock.type", "single"); + + log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); + + NamedList res = createCollection( + miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); + + // miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION); + + res = createCollection( + miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); + + admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress()); + CollectionAdminResponse rsp = (CollectionAdminResponse) admin + .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); + assertEquals(0, rsp.getStatus()); + rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION); + assertEquals(0, rsp.getStatus()); + + log + .info( + CollectionAdminRequest.ClusterStatus + .getClusterStatus() + .process(miniCluster.getSolrClient()) + .toString()); } @AfterAll - public static void tearDown() { + public static void tearDown() throws Exception { spark.stop(); + miniCluster.shutdown(); + FileUtils.deleteDirectory(workingDir.toFile()); } @Test @@ -86,8 +178,9 @@ public class SolrConfigExploreTest extends SolrExploreTest { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) + .run(isLookupClient); + Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); String[] queryStrings = { "cancer", @@ -101,14 +194,13 @@ public class SolrConfigExploreTest extends SolrExploreTest { query.add(CommonParams.Q, q); query.set("debugQuery", "on"); - log.info("Submit query to Solr with params: {}", query.toString()); - QueryResponse rsp = miniCluster.getSolrClient().query(query); + log.info("Submit query to Solr with params: {}", query); + QueryResponse rsp = miniCluster.getSolrClient().query(ProvisionConstants.SHADOW_ALIAS_NAME, query); // System.out.println(rsp.getHighlighting()); // System.out.println(rsp.getExplainMap()); for (SolrDocument doc : rsp.getResults()) { - System.out - .println( + log.info( doc.get("score") + "\t" + doc.get("__indexrecordidentifier") + "\t" + doc.get("resultidentifier") + "\t" + @@ -122,4 +214,18 @@ public class SolrConfigExploreTest extends SolrExploreTest { } } } + + protected static NamedList createCollection(CloudSolrClient client, String name, int numShards, + int replicationFactor, int maxShardsPerNode, String configName) throws Exception { + ModifiableSolrParams modParams = new ModifiableSolrParams(); + modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); + modParams.set("name", name); + modParams.set("numShards", numShards); + modParams.set("replicationFactor", replicationFactor); + modParams.set("collection.configName", configName); + modParams.set("maxShardsPerNode", maxShardsPerNode); + QueryRequest request = new QueryRequest(modParams); + request.setPath("/admin/collections"); + return client.request(request); + } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java index 625b6d131..2c62389c6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrConfigTest.java @@ -2,24 +2,15 @@ package eu.dnetlib.dhp.oa.provision; import java.io.IOException; -import java.io.StringReader; import java.net.URI; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.Text; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrInputField; import org.apache.solr.common.params.CommonParams; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; -import org.dom4j.io.SAXReader; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -50,9 +41,6 @@ public class SolrConfigTest extends SolrTest { int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); Mockito .when(isLookupClient.getLayoutSource(Mockito.anyString())) @@ -95,9 +83,9 @@ public class SolrConfigTest extends SolrTest { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize) + new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) .run(isLookupClient); - Assertions.assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); String[] queryStrings = { "cancer", @@ -109,8 +97,8 @@ public class SolrConfigTest extends SolrTest { SolrQuery query = new SolrQuery(); query.add(CommonParams.Q, q); - log.info("Submit query to Solr with params: {}", query.toString()); - QueryResponse rsp = miniCluster.getSolrClient().query(query); + log.info("Submit query to Solr with params: {}", query); + QueryResponse rsp = miniCluster.getSolrClient().query(ProvisionConstants.SHADOW_ALIAS_NAME, query); for (SolrDocument doc : rsp.getResults()) { System.out diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java index 34a9465a7..5b5e42fbd 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrExploreTest.java @@ -34,58 +34,6 @@ public abstract class SolrExploreTest { @TempDir public static Path workingDir; - @BeforeAll - public static void setup() throws Exception { - - // random unassigned HTTP port - final int jettyPort = 0; - final JettyConfig jettyConfig = JettyConfig.builder().setPort(jettyPort).build(); - - log.info(String.format("working directory: %s", workingDir.toString())); - System.setProperty("solr.log.dir", workingDir.resolve("logs").toString()); - - // create a MiniSolrCloudCluster instance - miniCluster = new MiniSolrCloudCluster(2, workingDir.resolve("solr"), jettyConfig); - - // Upload Solr configuration directory to ZooKeeper - String solrZKConfigDir = "src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig"; - File configDir = new File(solrZKConfigDir); - - miniCluster.uploadConfigSet(configDir.toPath(), CONFIG_NAME); - - // override settings in the solrconfig include - System.setProperty("solr.tests.maxBufferedDocs", "100000"); - System.setProperty("solr.tests.maxIndexingThreads", "-1"); - System.setProperty("solr.tests.ramBufferSizeMB", "100"); - - // use non-test classes so RandomizedRunner isn't necessary - System.setProperty("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); - System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory"); - System.setProperty("solr.lock.type", "single"); - - log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); - log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); - - NamedList res = createCollection( - miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME); - res.forEach(o -> log.info(o.toString())); - - miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION); - - log - .info( - CollectionAdminRequest.ClusterStatus - .getClusterStatus() - .process(miniCluster.getSolrClient()) - .toString()); - - } - @AfterAll public static void shutDown() throws Exception { miniCluster.shutdown(); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java index 79527b891..2caf09799 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.File; import java.nio.file.Path; @@ -10,6 +12,7 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.ConfigSetAdminRequest; import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CoreAdminParams; @@ -21,14 +24,21 @@ import org.junit.jupiter.api.io.TempDir; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import sun.security.provider.SHA; + public abstract class SolrTest { protected static final Logger log = LoggerFactory.getLogger(SolrTest.class); - protected static final String FORMAT = "test"; - protected static final String DEFAULT_COLLECTION = FORMAT + "-index-openaire"; + protected static final String SHADOW_FORMAT = "c1"; + protected static final String SHADOW_COLLECTION = SHADOW_FORMAT + "-index-openaire"; + protected static final String PUBLIC_FORMAT = "c2"; + protected static final String PUBLIC_COLLECTION = PUBLIC_FORMAT + "-index-openaire"; + protected static final String CONFIG_NAME = "testConfig"; + protected static SolrAdminApplication admin; + protected static MiniSolrCloudCluster miniCluster; @TempDir @@ -72,10 +82,21 @@ public abstract class SolrTest { .toString()); NamedList res = createCollection( - miniCluster.getSolrClient(), DEFAULT_COLLECTION, 4, 2, 20, CONFIG_NAME); + miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); res.forEach(o -> log.info(o.toString())); - miniCluster.getSolrClient().setDefaultCollection(DEFAULT_COLLECTION); + // miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION); + + res = createCollection( + miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); + res.forEach(o -> log.info(o.toString())); + + admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress()); + CollectionAdminResponse rsp = (CollectionAdminResponse) admin + .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); + assertEquals(0, rsp.getStatus()); + rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION); + assertEquals(0, rsp.getStatus()); log .info( @@ -83,12 +104,12 @@ public abstract class SolrTest { .getClusterStatus() .process(miniCluster.getSolrClient()) .toString()); - } @AfterAll public static void shutDown() throws Exception { miniCluster.shutdown(); + admin.close(); FileUtils.deleteDirectory(workingDir.toFile()); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index 522c34ef1..8149c2526 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -10,6 +10,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.params.CommonParams; import org.apache.spark.SparkConf; @@ -50,9 +51,6 @@ public class XmlIndexingJobTest extends SolrTest { int solrPort = URI.create("http://" + miniCluster.getZkClient().getZkServerAddress()).getPort(); - Mockito - .when(isLookupClient.getDsId(Mockito.anyString())) - .thenReturn("313f0381-23b6-466f-a0b8-c72a9679ac4b_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"); Mockito.when(isLookupClient.getZkHost()).thenReturn(String.format("127.0.0.1:%s/solr", solrPort)); Mockito .when(isLookupClient.getLayoutSource(Mockito.anyString())) @@ -103,46 +101,72 @@ public class XmlIndexingJobTest extends SolrTest { long nRecord = records.count(); - new XmlIndexingJob(spark, inputPath, FORMAT, batchSize).run(isLookupClient); + new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) + .run(isLookupClient); - assertEquals(0, miniCluster.getSolrClient().commit().getStatus()); + assertEquals(0, miniCluster.getSolrClient().commit(SHADOW_COLLECTION).getStatus()); - QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*")); + QueryResponse rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "*:*")); assertEquals( nRecord, rsp.getResults().getNumFound(), "the number of indexed records should be equal to the number of input records"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "isgreen:true")); assertEquals( - 0, rsp.getResults().getNumFound(), + 4, rsp.getResults().getNumFound(), "the number of indexed records having isgreen = true"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); assertEquals( - 0, rsp.getResults().getNumFound(), + 2, rsp.getResults().getNumFound(), "the number of indexed records having openaccesscolor = bronze"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); assertEquals( 0, rsp.getResults().getNumFound(), "the number of indexed records having isindiamondjournal = true"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); assertEquals( 0, rsp.getResults().getNumFound(), "the number of indexed records having publiclyfunded = true"); - rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.SHADOW_ALIAS_NAME, + new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); assertEquals( - 0, rsp.getResults().getNumFound(), + 35, rsp.getResults().getNumFound(), "the number of indexed records having peerreviewed = true"); rsp = miniCluster .getSolrClient() .query( + ProvisionConstants.SHADOW_ALIAS_NAME, new SolrQuery() - .add(CommonParams.Q, "objidentifier:\"iddesignpres::ae77e56e84ad058d9e7f19fa2f7325db\"") + .add(CommonParams.Q, "objidentifier:\"57a035e5b1ae::236d6d8c1e03368b5ae72acfeeb11bbc\"") .add(CommonParams.FL, "__json")); assertEquals( 1, rsp.getResults().getNumFound(), @@ -158,6 +182,22 @@ public class XmlIndexingJobTest extends SolrTest { log.info((String) json.get()); + admin + .execute( + SolrAdminApplication.Action.UPDATE_ALIASES, null, false, + SHADOW_COLLECTION, PUBLIC_COLLECTION); + + rsp = miniCluster + .getSolrClient() + .query( + ProvisionConstants.PUBLIC_ALIAS_NAME, + new SolrQuery() + .add(CommonParams.Q, "objidentifier:\"57a035e5b1ae::236d6d8c1e03368b5ae72acfeeb11bbc\"") + .add(CommonParams.FL, "__json")); + + assertEquals( + 1, rsp.getResults().getNumFound(), + "the number of indexed records having the given identifier, found in the public collection"); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index ba3fa83df..8567acf85 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -68,15 +68,12 @@ - - - - + @@ -93,6 +90,7 @@ + @@ -122,6 +120,7 @@ + diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema index 9720d3f37..d4e1cbede 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/solr/conf/exploreTestConfig/managed-schema @@ -194,228 +194,173 @@ - - - - - - + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - + + + + + + - - - - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - + - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/xml/xml_json_sample.json.gz b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/xml/xml_json_sample.json.gz new file mode 100644 index 000000000..81e4a1c3a Binary files /dev/null and b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/xml/xml_json_sample.json.gz differ diff --git a/pom.xml b/pom.xml index 9e554204d..42afbd372 100644 --- a/pom.xml +++ b/pom.xml @@ -960,7 +960,7 @@ 1.1.3 1.7 1.0.7 - [6.1.3-SNAPSHOT] + [6.1.3] cdh5.9.2 3.5 11.0.2