forked from D-Net/dnet-hadoop
[AffiliationIngestion]refactoring
This commit is contained in:
parent
9cbe966b4a
commit
a2b708bb71
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
@ -31,6 +30,7 @@ import eu.dnetlib.dhp.schema.solr.Context;
|
||||||
import eu.dnetlib.dhp.schema.solr.Country;
|
import eu.dnetlib.dhp.schema.solr.Country;
|
||||||
import eu.dnetlib.dhp.schema.solr.Datasource;
|
import eu.dnetlib.dhp.schema.solr.Datasource;
|
||||||
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.ExternalReference;
|
||||||
import eu.dnetlib.dhp.schema.solr.Instance;
|
import eu.dnetlib.dhp.schema.solr.Instance;
|
||||||
import eu.dnetlib.dhp.schema.solr.Journal;
|
import eu.dnetlib.dhp.schema.solr.Journal;
|
||||||
import eu.dnetlib.dhp.schema.solr.Measure;
|
import eu.dnetlib.dhp.schema.solr.Measure;
|
||||||
|
@ -562,10 +562,16 @@ public class ProvisionModelSupport {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<ExternalReference> mapExternalReference(List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
private static List<ExternalReference> mapExternalReference(
|
||||||
return Optional.ofNullable(externalReference)
|
List<eu.dnetlib.dhp.schema.oaf.ExternalReference> externalReference) {
|
||||||
.map(ext -> ext.stream()
|
return Optional
|
||||||
.map(e -> ExternalReference.newInstance(
|
.ofNullable(externalReference)
|
||||||
|
.map(
|
||||||
|
ext -> ext
|
||||||
|
.stream()
|
||||||
|
.map(
|
||||||
|
e -> ExternalReference
|
||||||
|
.newInstance(
|
||||||
e.getSitename(),
|
e.getSitename(),
|
||||||
e.getLabel(),
|
e.getLabel(),
|
||||||
e.getAlternateLabel(),
|
e.getAlternateLabel(),
|
||||||
|
@ -573,8 +579,8 @@ public class ProvisionModelSupport {
|
||||||
mapCodeLabel(e.getQualifier()),
|
mapCodeLabel(e.getQualifier()),
|
||||||
e.getRefidentifier(),
|
e.getRefidentifier(),
|
||||||
e.getQuery()))
|
e.getQuery()))
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
.orElse(Lists.newArrayList());
|
.orElse(Lists.newArrayList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
private static List<Context> asContext(List<eu.dnetlib.dhp.schema.oaf.Context> ctxList,
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
|
@ -32,14 +33,13 @@ import org.junit.jupiter.api.io.TempDir;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
|
||||||
|
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class SolrConfigExploreTest {
|
public class SolrConfigExploreTest {
|
||||||
|
@ -91,7 +91,7 @@ public class SolrConfigExploreTest {
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
conf.setAppName(XmlIndexingJobTest.class.getSimpleName());
|
||||||
conf.registerKryoClasses(new Class[] {
|
conf.registerKryoClasses(new Class[] {
|
||||||
SerializableSolrInputDocument.class
|
SerializableSolrInputDocument.class
|
||||||
});
|
});
|
||||||
|
|
||||||
conf.setMaster("local[1]");
|
conf.setMaster("local[1]");
|
||||||
|
@ -101,10 +101,10 @@ public class SolrConfigExploreTest {
|
||||||
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
conf.set("spark.sql.warehouse.dir", workingDir.resolve("spark").toString());
|
||||||
|
|
||||||
spark = SparkSession
|
spark = SparkSession
|
||||||
.builder()
|
.builder()
|
||||||
.appName(SolrConfigExploreTest.class.getSimpleName())
|
.appName(SolrConfigExploreTest.class.getSimpleName())
|
||||||
.config(conf)
|
.config(conf)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
|
|
||||||
// random unassigned HTTP port
|
// random unassigned HTTP port
|
||||||
final int jettyPort = 0;
|
final int jettyPort = 0;
|
||||||
|
@ -134,35 +134,35 @@ public class SolrConfigExploreTest {
|
||||||
|
|
||||||
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
log.info(new ConfigSetAdminRequest.List().process(miniCluster.getSolrClient()).toString());
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
CollectionAdminRequest.ClusterStatus
|
CollectionAdminRequest.ClusterStatus
|
||||||
.getClusterStatus()
|
.getClusterStatus()
|
||||||
.process(miniCluster.getSolrClient())
|
.process(miniCluster.getSolrClient())
|
||||||
.toString());
|
.toString());
|
||||||
|
|
||||||
NamedList<Object> res = createCollection(
|
NamedList<Object> res = createCollection(
|
||||||
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
miniCluster.getSolrClient(), SHADOW_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||||
res.forEach(o -> log.info(o.toString()));
|
res.forEach(o -> log.info(o.toString()));
|
||||||
|
|
||||||
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
// miniCluster.getSolrClient().setDefaultCollection(SHADOW_COLLECTION);
|
||||||
|
|
||||||
res = createCollection(
|
res = createCollection(
|
||||||
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
miniCluster.getSolrClient(), PUBLIC_COLLECTION, 4, 2, 20, CONFIG_NAME);
|
||||||
res.forEach(o -> log.info(o.toString()));
|
res.forEach(o -> log.info(o.toString()));
|
||||||
|
|
||||||
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
admin = new SolrAdminApplication(miniCluster.getZkClient().getZkServerAddress());
|
||||||
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
CollectionAdminResponse rsp = (CollectionAdminResponse) admin
|
||||||
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
.createAlias(ProvisionConstants.PUBLIC_ALIAS_NAME, PUBLIC_COLLECTION);
|
||||||
assertEquals(0, rsp.getStatus());
|
assertEquals(0, rsp.getStatus());
|
||||||
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
rsp = (CollectionAdminResponse) admin.createAlias(ProvisionConstants.SHADOW_ALIAS_NAME, SHADOW_COLLECTION);
|
||||||
assertEquals(0, rsp.getStatus());
|
assertEquals(0, rsp.getStatus());
|
||||||
|
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
CollectionAdminRequest.ClusterStatus
|
CollectionAdminRequest.ClusterStatus
|
||||||
.getClusterStatus()
|
.getClusterStatus()
|
||||||
.process(miniCluster.getSolrClient())
|
.process(miniCluster.getSolrClient())
|
||||||
.toString());
|
.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +180,8 @@ public class SolrConfigExploreTest {
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||||
.run(isLookupClient);
|
.run(isLookupClient);
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
Assertions
|
||||||
|
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||||
|
|
||||||
String[] queryStrings = {
|
String[] queryStrings = {
|
||||||
"cancer",
|
"cancer",
|
||||||
|
@ -200,7 +201,8 @@ public class SolrConfigExploreTest {
|
||||||
// System.out.println(rsp.getExplainMap());
|
// System.out.println(rsp.getExplainMap());
|
||||||
|
|
||||||
for (SolrDocument doc : rsp.getResults()) {
|
for (SolrDocument doc : rsp.getResults()) {
|
||||||
log.info(
|
log
|
||||||
|
.info(
|
||||||
doc.get("score") + "\t" +
|
doc.get("score") + "\t" +
|
||||||
doc.get("__indexrecordidentifier") + "\t" +
|
doc.get("__indexrecordidentifier") + "\t" +
|
||||||
doc.get("resultidentifier") + "\t" +
|
doc.get("resultidentifier") + "\t" +
|
||||||
|
@ -216,7 +218,7 @@ public class SolrConfigExploreTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
protected static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||||
modParams.set("name", name);
|
modParams.set("name", name);
|
||||||
|
|
|
@ -85,7 +85,8 @@ public class SolrConfigTest extends SolrTest {
|
||||||
|
|
||||||
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
new XmlIndexingJob(spark, inputPath, SHADOW_FORMAT, ProvisionConstants.SHADOW_ALIAS_NAME, batchSize)
|
||||||
.run(isLookupClient);
|
.run(isLookupClient);
|
||||||
Assertions.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
Assertions
|
||||||
|
.assertEquals(0, miniCluster.getSolrClient().commit(ProvisionConstants.SHADOW_ALIAS_NAME).getStatus());
|
||||||
|
|
||||||
String[] queryStrings = {
|
String[] queryStrings = {
|
||||||
"cancer",
|
"cancer",
|
||||||
|
|
Loading…
Reference in New Issue