[BulkTag]added tagging for the organization relevant for the community. #461

Merged
claudio.atzori merged 3 commits from tagOrganization into beta 2024-07-17 11:11:53 +02:00
9 changed files with 243 additions and 82 deletions
Showing only changes of commit 9d27910144 - Show all commits

View File

@ -1,3 +1,4 @@
package eu.dnetlib.pace.tree; package eu.dnetlib.pace.tree;
import java.util.Map; import java.util.Map;
@ -11,37 +12,37 @@ import eu.dnetlib.pace.tree.support.ComparatorClass;
@ComparatorClass("countryMatch") @ComparatorClass("countryMatch")
public class CountryMatch extends AbstractStringComparator { public class CountryMatch extends AbstractStringComparator {
public CountryMatch(Map<String, String> params) { public CountryMatch(Map<String, String> params) {
super(params, new com.wcohen.ss.JaroWinkler()); super(params, new com.wcohen.ss.JaroWinkler());
} }
public CountryMatch(final double weight) { public CountryMatch(final double weight) {
super(weight, new com.wcohen.ss.JaroWinkler()); super(weight, new com.wcohen.ss.JaroWinkler());
} }
protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) { protected CountryMatch(final double weight, final AbstractStringDistance ssalgo) {
super(weight, ssalgo); super(weight, ssalgo);
} }
@Override @Override
public double distance(final String a, final String b, final Config conf) { public double distance(final String a, final String b, final Config conf) {
if (a.isEmpty() || b.isEmpty()) { if (a.isEmpty() || b.isEmpty()) {
return -1.0; // return -1 if a field is missing return -1.0; // return -1 if a field is missing
} }
if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) { if (a.equalsIgnoreCase("unknown") || b.equalsIgnoreCase("unknown")) {
return -1.0; // return -1 if a country is UNKNOWN return -1.0; // return -1 if a country is UNKNOWN
} }
return a.equals(b) ? 1.0 : 0; return a.equals(b) ? 1.0 : 0;
} }
@Override @Override
public double getWeight() { public double getWeight() {
return super.weight; return super.weight;
} }
@Override @Override
protected double normalize(final double d) { protected double normalize(final double d) {
return d; return d;
} }
} }

View File

@ -33,10 +33,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.Tuple2; import scala.Tuple2;
@ -114,27 +111,35 @@ public class SparkBulkTagJob {
extendCommunityConfigurationForEOSC(spark, inputPath, cc); extendCommunityConfigurationForEOSC(spark, inputPath, cc);
execBulkTag( execBulkTag(
spark, inputPath, outputPath, protoMap, cc); spark, inputPath, outputPath, protoMap, cc);
execEntityTag(
spark, inputPath + "organization", outputPath + "organization",
Utils.getCommunityOrganization(baseURL), Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION,
TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
execEntityTag(
spark, inputPath + "project", outputPath + "project", Utils.getCommunityProjects(baseURL),
Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL)); execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL));
execProjectTag(spark, inputPath, outputPath, Utils.getCommunityProjects(baseURL));
}); });
} }
private static void execProjectTag(SparkSession spark, String inputPath, String outputPath, private static <E extends OafEntity> void execEntityTag(SparkSession spark, String inputPath, String outputPath,
CommunityEntityMap communityProjects) { CommunityEntityMap communityEntity, Class<E> entityClass,
Dataset<Project> projects = readPath(spark, inputPath + "project", Project.class); String classID, String calssName) {
Dataset<E> entity = readPath(spark, inputPath, entityClass);
Dataset<EntityCommunities> pc = spark Dataset<EntityCommunities> pc = spark
.createDataset( .createDataset(
communityProjects communityEntity
.keySet() .keySet()
.stream() .stream()
.map(k -> EntityCommunities.newInstance(k, communityProjects.get(k))) .map(k -> EntityCommunities.newInstance(k, communityEntity.get(k)))
.collect(Collectors.toList()), .collect(Collectors.toList()),
Encoders.bean(EntityCommunities.class)); Encoders.bean(EntityCommunities.class));
projects entity
.joinWith(pc, projects.col("id").equalTo(pc.col("entityId")), "left") .joinWith(pc, entity.col("id").equalTo(pc.col("entityId")), "left")
.map((MapFunction<Tuple2<Project, EntityCommunities>, Project>) t2 -> { .map((MapFunction<Tuple2<E, EntityCommunities>, E>) t2 -> {
Project ds = t2._1(); E ds = t2._1();
if (t2._2() != null) { if (t2._2() != null) {
List<String> context = Optional List<String> context = Optional
.ofNullable(ds.getContext()) .ofNullable(ds.getContext())
@ -156,8 +161,8 @@ public class SparkBulkTagJob {
false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false, false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
TaggingConstants.CLASS_ID_DATASOURCE, classID,
TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE, calssName,
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS), ModelConstants.DNET_PROVENANCE_ACTIONS),
"1"))); "1")));
@ -166,17 +171,17 @@ public class SparkBulkTagJob {
}); });
} }
return ds; return ds;
}, Encoders.bean(Project.class)) }, Encoders.bean(entityClass))
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath + "project"); .json(outputPath);
readPath(spark, outputPath + "project", Project.class) readPath(spark, outputPath, entityClass)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(inputPath + "project"); .json(inputPath);
} }
private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath, private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath,

View File

@ -13,6 +13,9 @@ public class TaggingConstants {
public static final String CLASS_ID_CZENODO = "community:zenodocommunity"; public static final String CLASS_ID_CZENODO = "community:zenodocommunity";
public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint"; public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint";
public static final String CLASS_ID_PROJECT = "community:project";
public static final String CLASS_ID_ORGANIZATION = "community:organization";
public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/"; public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/";
public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject"; public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject";
@ -20,5 +23,8 @@ public class TaggingConstants {
public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo"; public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo";
public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints"; public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints";
public static final String CLASS_NAME_BULKTAG_PROJECT = "Bulktagging for Community - Project";
public static final String CLASS_NAME_BULKTAG_ORGANIZATION = "Bulktagging for Community - Organization";
public static final String TAGGING_TRUST = "0.8"; public static final String TAGGING_TRUST = "0.8";
} }

View File

@ -465,6 +465,138 @@ public class BulkTagJobTest {
} }
@Test
void organizationTag() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
.getPath();
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/bulktag/pathMap/")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap"));
SparkBulkTagJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Organization> tmp = sc
.textFile(workingDir.toString() + "/organization")
.map(item -> OBJECT_MAPPER.readValue(item, Organization.class));
Assertions.assertEquals(4, tmp.count());
org.apache.spark.sql.Dataset<Organization> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Organization.class));
verificationDataset.createOrReplaceTempView("organization");
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
+ "from organization "
+ "lateral view explode(context) c as MyT "
+ "lateral view explode(MyT.datainfo) d as MyD "
+ "where MyD.inferenceprovenance = 'bulktagging'";
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
idExplodeCommunity.show(false);
Assertions.assertEquals(3, idExplodeCommunity.count());
Assertions
.assertEquals(
3, idExplodeCommunity.filter("provenance = 'community:organization'").count());
Assertions
.assertEquals(
3,
idExplodeCommunity
.filter("name = 'Bulktagging for Community - Organization'")
.count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'netherlands'").count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'beopen'").count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'mes'").count());
}
@Test
void projectTag() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/")
.getPath();
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
fs
.copyFromLocalFile(
false, new org.apache.hadoop.fs.Path(getClass()
.getResource("/eu/dnetlib/dhp/bulktag/pathMap/")
.getPath()),
new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap"));
SparkBulkTagJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", taggingConf,
"-outputPath", workingDir.toString() + "/",
"-baseURL", "https://services.openaire.eu/openaire/community/",
"-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap",
"-nameNode", "local"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Project> tmp = sc
.textFile(workingDir.toString() + "/project")
.map(item -> OBJECT_MAPPER.readValue(item, Project.class));
Assertions.assertEquals(4, tmp.count());
org.apache.spark.sql.Dataset<Project> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Project.class));
verificationDataset.createOrReplaceTempView("project");
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
+ "from project "
+ "lateral view explode(context) c as MyT "
+ "lateral view explode(MyT.datainfo) d as MyD "
+ "where MyD.inferenceprovenance = 'bulktagging'";
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
idExplodeCommunity.show(false);
Assertions.assertEquals(4, idExplodeCommunity.count());
Assertions
.assertEquals(
4, idExplodeCommunity.filter("provenance = 'community:project'").count());
Assertions
.assertEquals(
4,
idExplodeCommunity
.filter("name = 'Bulktagging for Community - Project'")
.count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'enermaps'").count());
Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'clarin'").count());
Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'dh-ch'").count());
}
@Test @Test
void bulktagByZenodoCommunityTest() throws Exception { void bulktagByZenodoCommunityTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()

View File

@ -5,7 +5,6 @@ import java.io.StringReader;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
import eu.dnetlib.dhp.schema.solr.Country; import eu.dnetlib.dhp.schema.solr.Country;
import eu.dnetlib.dhp.schema.solr.Datasource; import eu.dnetlib.dhp.schema.solr.Datasource;
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines; import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
import eu.dnetlib.dhp.schema.solr.ExternalReference;
import eu.dnetlib.dhp.schema.solr.Instance; import eu.dnetlib.dhp.schema.solr.Instance;
import eu.dnetlib.dhp.schema.solr.Journal; import eu.dnetlib.dhp.schema.solr.Journal;
import eu.dnetlib.dhp.schema.solr.Measure; import eu.dnetlib.dhp.schema.solr.Measure;
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
.orElse(null); .orElse(null);
} }
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) { private static List<ExternalReference> mapExternalReference(
return Optional.ofNullable(externalReference) List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
.map(ext -> ext.stream() return Optional
.map(e -> ExternalReference.newInstance( .ofNullable(externalReference)
.map(
ext -> ext
.stream()
.map(
e -> ExternalReference
.newInstance(
e.getSitename(), e.getSitename(),
e.getLabel(), e.getLabel(),
e.getAlternateLabel(), e.getAlternateLabel(),
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
mapCodeLabel(e.getQualifier()), mapCodeLabel(e.getQualifier()),
e.getRefidentifier(), e.getRefidentifier(),
e.getQuery())) e.getQuery()))
.collect(Collectors.toList())) .collect(Collectors.toList()))
.orElse(Lists.newArrayList()); .orElse(Lists.newArrayList());
} }
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList, private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,

View File

@ -1,12 +1,13 @@
package eu.dnetlib.dhp.oa.provision; package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.URI; import java.net.URI;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.junit.jupiter.api.Assertions.assertEquals; import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class SolrConfigExploreTest { public class SolrConfigExploreTest {
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.setAppName(XmlIndexingJobTest.class.getSimpleName()); conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
conf.registerKryoClasses(new Class[] { conf.registerKryoClasses(new Class[] {
SerializableSolrInputDocument.class SerializableSolrInputDocument.class
}); });
conf.setMaster("local[1]"); conf.setMaster("local[1]");
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString()); conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
spark = SparkSession spark = SparkSession
.builder() .builder()
.appName(SolrConfigExploreTest.class.getSimpleName()) .appName(SolrConfigExploreTest.class.getSimpleName())
.config(conf) .config(conf)
.getOrCreate(); .getOrCreate();
// random unassigned HTTP port // random unassigned HTTP port
final int jettyPort = 0; final int jettyPort = 0;
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString()); log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
log log
.info( .info(
CollectionAdminRequest.ClusterStatus CollectionAdminRequest.ClusterStatus
.getClusterStatus() .getClusterStatus()
.process(miniCluster.getSolrClient()) .process(miniCluster.getSolrClient())
.toString()); .toString());
NamedList<Object> res = createCollection( NamedList<Object> res = createCollection(
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME); miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
res.forEach(o -> log.info(o.toString())); res.forEach(o -> log.info(o.toString()));
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION); // miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
res = createCollection( res = createCollection(
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME); miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
res.forEach(o -> log.info(o.toString())); res.forEach(o -> log.info(o.toString()));
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress()); admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
CollectionAdminResponse rsp = (CollectionAdminResponse) admin CollectionAdminResponse rsp = (CollectionAdminResponse) admin
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION); .createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
assertEquals(0, rsp.getStatus()); assertEquals(0, rsp.getStatus());
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION); rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
assertEquals(0, rsp.getStatus()); assertEquals(0, rsp.getStatus());
log log
.info( .info(
CollectionAdminRequest.ClusterStatus CollectionAdminRequest.ClusterStatus
.getClusterStatus() .getClusterStatus()
.process(miniCluster.getSolrClient()) .process(miniCluster.getSolrClient())
.toString()); .toString());
} }
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient); .run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = { String[] queryStrings = {
"cancer", "cancer",
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
// System.out.println(rsp.getExplainMap()); // System.out.println(rsp.getExplainMap());
for (SolrDocument doc : rsp.getResults()) { for (SolrDocument doc : rsp.getResults()) {
log.info( log
.info(
doc.get("score") + "\t" + doc.get("score") + "\t" +
doc.get("__indexrecordidentifier") + "\t" + doc.get("__indexrecordidentifier") + "\t" +
doc.get("resultidentifier") + "\t" + doc.get("resultidentifier") + "\t" +
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
} }
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards, protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws Exception { int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
ModifiableSolrParams modParams = new ModifiableSolrParams(); ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name()); modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name); modParams.set("name", name);

View File

@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize) new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
.run(isLookupClient); .run(isLookupClient);
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus()); Assertions
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
String[] queryStrings = { String[] queryStrings = {
"cancer", "cancer",