forked from D-Net/dnet-hadoop
- Implemented Mapping from Scholix to Solr dataModel
- Moved date normalize cleaning from Saxon Function to GraphCleaningFunctions - added Scholix records to test feeding
This commit is contained in:
parent
56f880c89d
commit
bf6c8ccc79
|
@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
|
|||
|
||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
@ -36,6 +38,15 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
|
||||
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
|
||||
|
||||
public static final String BLANK = "";
|
||||
|
||||
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
|
||||
|
||||
private static final String[] normalizeDateFormats = {
|
||||
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
|
||||
};
|
||||
|
||||
|
||||
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
||||
if (value instanceof Datasource) {
|
||||
// nothing to clean here
|
||||
|
@ -459,6 +470,20 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
|||
return Optional.ofNullable(cleanDate(date));
|
||||
}
|
||||
|
||||
public static String normalizeDate(String s) {
|
||||
final String date = StringUtils.isNotBlank(s) ? s.trim() : BLANK;
|
||||
|
||||
for (String format : normalizeDateFormats) {
|
||||
try {
|
||||
Date parse = new SimpleDateFormat(format).parse(date);
|
||||
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
|
||||
return res;
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
return BLANK;
|
||||
}
|
||||
|
||||
public static String cleanDate(final String inputDate) {
|
||||
|
||||
if (StringUtils.isBlank(inputDate)) {
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.text.ParseException;
|
|||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import net.sf.saxon.expr.XPathContext;
|
||||
|
@ -14,15 +15,6 @@ import net.sf.saxon.value.SequenceType;
|
|||
import net.sf.saxon.value.StringValue;
|
||||
|
||||
public class NormalizeDate extends AbstractExtensionFunction {
|
||||
|
||||
private static final String[] normalizeDateFormats = {
|
||||
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
|
||||
};
|
||||
|
||||
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
|
||||
|
||||
public static final String BLANK = "";
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "normalizeDate";
|
||||
|
@ -31,10 +23,10 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
|||
@Override
|
||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||
if (arguments == null || arguments.length == 0) {
|
||||
return new StringValue(BLANK);
|
||||
return new StringValue(GraphCleaningFunctions.BLANK);
|
||||
}
|
||||
String s = arguments[0].head().getStringValue();
|
||||
return new StringValue(_normalizeDate(s));
|
||||
return new StringValue(GraphCleaningFunctions.normalizeDate(s));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -58,18 +50,4 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
|||
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
|
||||
return SequenceType.SINGLE_STRING;
|
||||
}
|
||||
|
||||
private String _normalizeDate(String s) {
|
||||
final String date = StringUtils.isNotBlank(s) ? s.trim() : BLANK;
|
||||
|
||||
for (String format : normalizeDateFormats) {
|
||||
try {
|
||||
Date parse = new SimpleDateFormat(format).parse(date);
|
||||
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
|
||||
return res;
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
return BLANK;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,13 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import com.fasterxml.jackson.databind.type.MapType;
|
||||
import com.fasterxml.jackson.databind.type.TypeFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.params.CollectionParams;
|
||||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.dom4j.Document;
|
||||
|
@ -17,8 +23,12 @@ import org.stringtemplate.v4.ST;
|
|||
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
|
@ -44,10 +54,56 @@ public class SolrUtil {
|
|||
|
||||
private static final String CONF_BASE_PATH ="/eu/dnetlib/dhp/oa/provision/conf";
|
||||
|
||||
private static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
|
||||
public static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
|
||||
|
||||
private static final String SCHEMA_TEMPLATE_PATH= "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
|
||||
|
||||
|
||||
private static String createURLRequest = "http://%s:%s/solr/admin/collections?action=CREATE&name=%s&numShards=%s&replicationFactor=%s&maxShardsPerNode=%s&collection.configName=%s";
|
||||
|
||||
private static String generateCreateIndexRequest(final String host,
|
||||
final String port,
|
||||
final String collectionName,
|
||||
final String numShard,
|
||||
final String replicationFactor,
|
||||
final String collectionConfigName,
|
||||
final String maxShardsPerNode) {
|
||||
return String.format(createURLRequest, host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
|
||||
}
|
||||
|
||||
public static boolean createSolrIndex(final String host,
|
||||
final String port,
|
||||
final String collectionName,
|
||||
final String numShard,
|
||||
final String replicationFactor,
|
||||
final String maxShardsPerNode,
|
||||
final String collectionConfigName) throws Exception {
|
||||
|
||||
final String uri = generateCreateIndexRequest(host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
|
||||
|
||||
URL url = new URL(uri);
|
||||
System.out.println(uri);
|
||||
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||
connection.setRequestMethod("GET");
|
||||
int status = connection.getResponseCode();
|
||||
System.out.println("status = " + status);
|
||||
|
||||
BufferedReader in = new BufferedReader(
|
||||
new InputStreamReader(connection.getInputStream()));
|
||||
String inputLine;
|
||||
StringBuffer content = new StringBuffer();
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
content.append(inputLine);
|
||||
}
|
||||
in.close();
|
||||
|
||||
|
||||
log.debug("content = " + content);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static void uploadZookeperConfig(final SolrZkClient zkClient,
|
||||
final String coreName,
|
||||
final boolean overwrite,
|
||||
|
@ -114,7 +170,21 @@ public class SolrUtil {
|
|||
final String st = loadFileInClassPath(CONF_BASE_PATH+"/solrconfig.xml.st");
|
||||
final ST solrConfig = new ST(st, DELIMITER, DELIMITER);
|
||||
p.forEach(solrConfig::add);
|
||||
return solrConfig.toString();
|
||||
return solrConfig.render();
|
||||
}
|
||||
|
||||
public static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
|
||||
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
|
||||
ModifiableSolrParams modParams = new ModifiableSolrParams();
|
||||
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
|
||||
modParams.set("name", name);
|
||||
modParams.set("numShards", numShards);
|
||||
modParams.set("replicationFactor", replicationFactor);
|
||||
modParams.set("collection.configName", configName);
|
||||
modParams.set("maxShardsPerNode", maxShardsPerNode);
|
||||
QueryRequest request = new QueryRequest(modParams);
|
||||
request.setPath("/admin/collections");
|
||||
return client.request(request);
|
||||
}
|
||||
|
||||
private static Map<String, byte[]> buildConfiguration(final String layout)
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
package eu.dnetlib.dhp.oa.provision.scholix;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||
import eu.dnetlib.dhp.schema.sx.scholix.*;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class ScholixToSolr {
|
||||
final static ObjectMapper MAPPER = new ObjectMapper();
|
||||
|
||||
|
||||
public static SolrInputDocument toSolrDocument(final String json) {
|
||||
try {
|
||||
final Scholix input = MAPPER.readValue(json, Scholix.class);
|
||||
final SolrInputDocument output = new SolrInputDocument();
|
||||
|
||||
fillEntityField(output,input.getSource(), "source");
|
||||
fillEntityField(output,input.getTarget(), "target");
|
||||
final String cleanDate= GraphCleaningFunctions.cleanDate(input.getPublicationDate());
|
||||
|
||||
if(cleanDate!= null)
|
||||
output.addField("publication_date", GraphCleaningFunctions.normalizeDate(cleanDate));
|
||||
|
||||
if (input.getRelationship()!= null && input.getRelationship().getName()!= null)
|
||||
output.addField("relation_name", input.getRelationship().getName());
|
||||
else
|
||||
return null;
|
||||
if (input.getRelationship()!= null && input.getRelationship().getInverse()!= null)
|
||||
output.addField("relation_inverse", input.getRelationship().getInverse());
|
||||
|
||||
if (input.getLinkprovider()!= null) {
|
||||
final List<String> linkProviders = input.getLinkprovider().stream()
|
||||
.map(ScholixEntityId::getName)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
output.addField("link_provider",linkProviders);
|
||||
}
|
||||
if(input.getPublisher()!= null) {
|
||||
final List<String> publishers = input.getPublisher().stream()
|
||||
.map(ScholixEntityId::getName)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
output.addField("publisher_name", publishers);
|
||||
}
|
||||
|
||||
output.addField("__indexrecordidentifier", input.getIdentifier());
|
||||
output.addField("__result", json);
|
||||
return output;
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error on convert Scholix");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void fillEntityField(final SolrInputDocument document, final ScholixResource resource, final String prefix) {
|
||||
|
||||
document.addField(prefix+"_identifier",resource.getDnetIdentifier());
|
||||
document.addField(prefix+"_type", resource.getObjectType());
|
||||
document.addField(prefix+"_publication_date", resource.getPublicationDate());
|
||||
document.addField(prefix+"_subtype", resource.getObjectSubType());
|
||||
|
||||
|
||||
List<String> resourcePIDs = resource.getIdentifier().stream()
|
||||
.map(ScholixIdentifier::getIdentifier)
|
||||
.collect(Collectors.toList());
|
||||
document.addField(prefix+"_pid", resourcePIDs);
|
||||
|
||||
List<String> resourceSchemas = resource.getIdentifier().stream()
|
||||
.map(ScholixIdentifier::getSchema)
|
||||
.collect(Collectors.toList());
|
||||
document.addField(prefix+"_schema", resourceSchemas);
|
||||
|
||||
|
||||
if (resource.getPublisher() != null) {
|
||||
|
||||
final List<String> publishers = resource.getPublisher().stream().map(ScholixEntityId::getName).collect(Collectors.toList());
|
||||
if (publishers.size()>0)
|
||||
document.addField(prefix+"_publisher", publishers);
|
||||
}
|
||||
|
||||
|
||||
if (resource.getCollectedFrom() != null) {
|
||||
|
||||
final List<String> collectedFrom = resource.getCollectedFrom().stream()
|
||||
.map(ScholixCollectedFrom::getProvider)
|
||||
.filter(Objects::nonNull)
|
||||
.map(ScholixEntityId::getName)
|
||||
.collect(Collectors.toList());
|
||||
if (collectedFrom.size()>0)
|
||||
document.addField(prefix+"_collected_from", collectedFrom);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import eu.dnetlib.dhp.oa.provision.scholix.ScholixToSolr;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||
public class ScholixIndexingTest extends SolrTest{
|
||||
|
||||
private static String LAYOUT_PATH="/eu/dnetlib/dhp/oa/provision/SMF_layout.xml";
|
||||
|
||||
|
||||
/**
|
||||
* This test verifies that the schema will be generated correctly
|
||||
* by get the profile of the metadataFormat and generating solr schema.xml
|
||||
* we expect that the fiedl in the metadataFormat are all in the field solr schema
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
@Order(1)
|
||||
void testSchemaCreation() throws Exception {
|
||||
|
||||
final String layout = loadSMFLayout();
|
||||
assertNotNull(layout);
|
||||
assertTrue(StringUtils.isNotBlank(layout));
|
||||
|
||||
final String scheme = SolrUtil.getSchemaXML(loadSMFLayout());
|
||||
assertNotNull(scheme);
|
||||
assertTrue(StringUtils.isNotBlank(scheme));
|
||||
|
||||
|
||||
final Document fields = parseDocument(layout);
|
||||
List<Node> params = fields.selectNodes("//FIELD");
|
||||
final List<String> exptectedFieldName = new ArrayList<>();
|
||||
for (Node param : params) {
|
||||
Element element = (Element) param;
|
||||
String name = element.attributeValue("name");
|
||||
exptectedFieldName.add(name.toLowerCase());
|
||||
}
|
||||
assertTrue(exptectedFieldName.size()>0);
|
||||
|
||||
|
||||
final Document parsedScheme = parseDocument(scheme);
|
||||
params = parsedScheme.selectNodes("//field");
|
||||
final List<String> createdFieldName = new ArrayList<>();
|
||||
for (Node param : params) {
|
||||
|
||||
Element element = (Element) param;
|
||||
String name = element.attributeValue("name");
|
||||
createdFieldName.add(name.toLowerCase());
|
||||
}
|
||||
assertTrue(createdFieldName.size()>0);
|
||||
|
||||
exptectedFieldName.stream().map(createdFieldName::contains).forEach(Assertions::assertTrue);
|
||||
}
|
||||
|
||||
/***
|
||||
* Test the creation of the index works
|
||||
* we test if all the files are uploaded into
|
||||
* the zookeeper instance of SOLR under it's
|
||||
* collection name
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
@Order(2)
|
||||
public void testCreateCollection() throws Exception {
|
||||
final String collectionName ="SMF-index-scholix";
|
||||
SolrUtil.uploadZookeperConfig(miniCluster.getZkClient(),collectionName,true, loadSMFLayout() );
|
||||
|
||||
assertTrue(miniCluster.getZkClient().exists("/configs/"+collectionName, true));
|
||||
List<String> items = miniCluster.getZkClient().getChildren("/configs/"+collectionName, null, true);
|
||||
|
||||
List<String> configurationFiles =
|
||||
Files.list(
|
||||
Paths.get(
|
||||
Objects.requireNonNull(getClass().getResource(SolrUtil.CONF_FILE_BASE_PATH)).getPath()))
|
||||
.map(Path::getFileName)
|
||||
.map(Path::toString)
|
||||
.collect(Collectors.toList());
|
||||
configurationFiles.add("schema.xml");
|
||||
configurationFiles.add("solrconfig.xml");
|
||||
configurationFiles.forEach(s->assertTrue(items.contains(s)));
|
||||
|
||||
SolrUtil.createCollection(miniCluster.getSolrClient(), "Scholix", 4,1,2,collectionName);
|
||||
|
||||
log.debug("Collection Created");
|
||||
final Map<String, String> queryParamMap = new HashMap<>();
|
||||
queryParamMap.put("q", "*:*");
|
||||
|
||||
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
|
||||
final QueryResponse response =miniCluster.getSolrClient().query("Scholix", queryParams);
|
||||
final SolrDocumentList documents = response.getResults();
|
||||
assertEquals(0, documents.getNumFound());
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Order(3)
|
||||
public void testFeedingSolrDocument() throws Exception {
|
||||
|
||||
|
||||
InputStream gzipStream = new GZIPInputStream(Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/scholix_records.gz")));
|
||||
Reader decoder = new InputStreamReader(gzipStream, StandardCharsets.UTF_8);
|
||||
BufferedReader buffered = new BufferedReader(decoder);
|
||||
String line = buffered.readLine();
|
||||
|
||||
final CloudSolrClient client = miniCluster.getSolrClient();
|
||||
client.setDefaultCollection("Scholix");
|
||||
int added = 0;
|
||||
while (line!= null) {
|
||||
|
||||
|
||||
final SolrInputDocument solrDocument = ScholixToSolr.toSolrDocument(line);
|
||||
|
||||
client.add(solrDocument);
|
||||
added ++;
|
||||
line = buffered.readLine();
|
||||
}
|
||||
|
||||
client.commit();
|
||||
|
||||
log.debug(String.format("Feed %d documents",added));
|
||||
|
||||
|
||||
final SolrDocumentList documents = executeQuery("*:*");
|
||||
assertEquals(added, documents.getNumFound());
|
||||
|
||||
|
||||
|
||||
documents.stream().map(s-> s.getFirstValue("source_pid").toString()).forEach(System.out::println);
|
||||
|
||||
SolrDocumentList source_pids = executeQuery("source_pid:\"10.15468/dl.u47azs\"");
|
||||
|
||||
System.out.println("source_pid.getNumFound() = " + source_pids.getNumFound());
|
||||
|
||||
source_pids.stream().map(s -> s.getFieldValue("source_pid")).forEach(System.out::println);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private SolrDocumentList executeQuery(final String query) throws SolrServerException, IOException {
|
||||
|
||||
final Map<String, String> queryParamMap = new HashMap<>();
|
||||
queryParamMap.put("q", query);
|
||||
|
||||
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
|
||||
final QueryResponse response =miniCluster.getSolrClient().query("Scholix", queryParams);
|
||||
return response.getResults();
|
||||
}
|
||||
|
||||
/***
|
||||
* Utility for parsing XML
|
||||
* @param xml
|
||||
* @return Dom4J Document
|
||||
* @throws DocumentException
|
||||
*/
|
||||
private Document parseDocument(final String xml) throws DocumentException {
|
||||
return new SAXReader().read(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
private String loadSMFLayout() throws IOException {
|
||||
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(LAYOUT_PATH)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -1,26 +1,11 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
||||
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.Element;
|
||||
import org.dom4j.Node;
|
||||
import org.dom4j.io.SAXReader;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class SolrAdminApplicationTest extends SolrTest {
|
||||
|
||||
|
@ -51,82 +36,4 @@ class SolrAdminApplicationTest extends SolrTest {
|
|||
|
||||
assertEquals(0, rsp.getStatus());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This test verifies that the schema will be generated correctly
|
||||
* by get the profile of the metadataFormat and generating solr schema.xml
|
||||
* we expect that the fiedl in the metadataFormat are all in the field solr schema
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
void testSchemaCreation() throws Exception {
|
||||
|
||||
|
||||
final String layout =getSMFLayout();
|
||||
|
||||
assertNotNull(layout);
|
||||
assertTrue(StringUtils.isNotBlank(layout));
|
||||
|
||||
final String scheme = SolrUtil.getSchemaXML(getSMFLayout());
|
||||
|
||||
assertNotNull(scheme);
|
||||
assertTrue(StringUtils.isNotBlank(scheme));
|
||||
|
||||
|
||||
final Document fields = parseDocument(layout);
|
||||
|
||||
List<Node> params = fields.selectNodes("//FIELD");
|
||||
|
||||
final List<String> exptectedFieldName = new ArrayList<>();
|
||||
for (Node param : params) {
|
||||
|
||||
Element element = (Element) param;
|
||||
String name = element.attributeValue("name");
|
||||
exptectedFieldName.add(name.toLowerCase());
|
||||
}
|
||||
|
||||
|
||||
assertTrue(exptectedFieldName.size()>0);
|
||||
|
||||
|
||||
final Document parsedScheme = parseDocument(scheme);
|
||||
|
||||
params = parsedScheme.selectNodes("//field");
|
||||
|
||||
final List<String> createdFieldName = new ArrayList<>();
|
||||
for (Node param : params) {
|
||||
|
||||
Element element = (Element) param;
|
||||
String name = element.attributeValue("name");
|
||||
createdFieldName.add(name.toLowerCase());
|
||||
}
|
||||
assertTrue(createdFieldName.size()>0);
|
||||
exptectedFieldName.stream().map(createdFieldName::contains).forEach(Assertions::assertTrue);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateCollection() throws IOException {
|
||||
miniCluster.getZkClient();
|
||||
SolrUtil.uploadZookeperConfig(miniCluster.getZkClient(),"SMF-index-scholix",true,getSMFLayout() );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
private Document parseDocument(final String xml) throws DocumentException {
|
||||
return new SAXReader().read(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
|
||||
private String getSMFLayout() throws IOException {
|
||||
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/SMF_layout.xml")));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,25 +1,31 @@
|
|||
<LAYOUT name="index">
|
||||
<FIELDS><!-- SOURCE FIELD -->
|
||||
<FIELD indexable="true" name="source_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_type" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="false" name="source_publication_date" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_subType" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_pid" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_schema" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="source_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/><!-- TARGET FIELD -->
|
||||
<FIELD indexable="true" name="target_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_type" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_subType" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_pid" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_schema" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="target_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="false" name="target_publication_date" stored="true" stat="false" tokenizable="false" value="None"/><!-- RELATION FIELD -->
|
||||
<FIELD indexable="true" name="publicationDate" multivalued="false" stored="true" stat="false" type="pdate" value="None"/>
|
||||
<FIELDS>
|
||||
|
||||
<!-- SOURCE FIELD -->
|
||||
<FIELD indexable="true" name="source_identifier" multivalued="false" stored="false" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_type" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="false" name="source_publication_date" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_subtype" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_pid" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_schema" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="source_publisher" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="source_collected_from" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
|
||||
<!-- TARGET FIELD -->
|
||||
<FIELD indexable="true" name="target_identifier" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_type" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="false" name="target_publication_date" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_subtype" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_pid" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_schema" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="target_publisher" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="target_collected_from" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
|
||||
|
||||
<!-- RELATION FIELD -->
|
||||
<FIELD indexable="true" name="publication_date" multivalued="false" stored="true" stat="false" type="date" value="None"/>
|
||||
<FIELD indexable="true" name="relation_name" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="relation_inverse" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
|
||||
<FIELD indexable="true" name="publisher_name" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="linkprovider" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="publisher_name" multivalued="true" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
<FIELD indexable="true" name="link_provider" multivalued="true" tokenizable="ture" stored="true" stat="false" xpath="None"/>
|
||||
</FIELDS>
|
||||
</LAYOUT>
|
Binary file not shown.
Loading…
Reference in New Issue