code refactor

This commit is contained in:
Sandro La Bruzzo 2022-10-06 09:12:14 +02:00
parent bf6c8ccc79
commit 6d5cda1a03
8 changed files with 422 additions and 398 deletions

View File

@ -43,10 +43,9 @@ public class GraphCleaningFunctions extends CleaningFunctions {
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private static final String[] normalizeDateFormats = {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
public static <T extends Oaf> T fixVocabularyNames(T value) {
if (value instanceof Datasource) {
// nothing to clean here

View File

@ -5,9 +5,9 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.trans.XPathException;

View File

@ -59,9 +59,6 @@ public class SolrAdminApplication implements Closeable {
final String zkHost = isLookup.getZkHost();
log.info("zkHost: {}", zkHost);
final String collection = ProvisionConstants.getCollectionName(format);
log.info("collection: {}", collection);

View File

@ -1,8 +1,24 @@
package eu.dnetlib.dhp.oa.provision;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.type.MapType;
import com.fasterxml.jackson.databind.type.TypeFactory;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.QueryRequest;
@ -21,217 +37,211 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.stringtemplate.v4.ST;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.type.MapType;
import com.fasterxml.jackson.databind.type.TypeFactory;
public class SolrUtil {
/**
* The log.
*/
private static final Logger log = LoggerFactory.getLogger(SolrUtil.class);
/**
* The log.
*/
private static final Logger log = LoggerFactory.getLogger(SolrUtil.class);
/**
* The Constant CONFIGS_PATH.
*/
private static final String CONFIGS_PATH = "/configs";
/**
* The Constant CONFIGS_PATH.
*/
private static final String CONFIGS_PATH = "/configs";
private static final char DELIMITER = '$';
private static final char DELIMITER = '$';
private static final String CONF_BASE_PATH ="/eu/dnetlib/dhp/oa/provision/conf";
private static final String CONF_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf";
public static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
public static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
private static final String SCHEMA_TEMPLATE_PATH= "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
private static final String SCHEMA_TEMPLATE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
private static String createURLRequest = "http://%s:%s/solr/admin/collections?action=CREATE&name=%s&numShards=%s&replicationFactor=%s&maxShardsPerNode=%s&collection.configName=%s";
private static String createURLRequest = "http://%s:%s/solr/admin/collections?action=CREATE&name=%s&numShards=%s&replicationFactor=%s&maxShardsPerNode=%s&collection.configName=%s";
private static String generateCreateIndexRequest(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String collectionConfigName,
final String maxShardsPerNode) {
return String
.format(
createURLRequest, host, port, collectionName, numShard, replicationFactor, maxShardsPerNode,
collectionConfigName);
}
private static String generateCreateIndexRequest(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String collectionConfigName,
final String maxShardsPerNode) {
return String.format(createURLRequest, host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
}
public static boolean createSolrIndex(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String maxShardsPerNode,
final String collectionConfigName) throws Exception {
public static boolean createSolrIndex(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String maxShardsPerNode,
final String collectionConfigName) throws Exception {
final String uri = generateCreateIndexRequest(
host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
final String uri = generateCreateIndexRequest(host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
URL url = new URL(uri);
System.out.println(uri);
URL url = new URL(uri);
System.out.println(uri);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
int status = connection.getResponseCode();
System.out.println("status = " + status);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
int status = connection.getResponseCode();
System.out.println("status = " + status);
BufferedReader in = new BufferedReader(
new InputStreamReader(connection.getInputStream()));
String inputLine;
StringBuffer content = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
content.append(inputLine);
}
in.close();
BufferedReader in = new BufferedReader(
new InputStreamReader(connection.getInputStream()));
String inputLine;
StringBuffer content = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
content.append(inputLine);
}
in.close();
log.debug("content = " + content);
return true;
}
log.debug("content = " + content);
public static void uploadZookeperConfig(final SolrZkClient zkClient,
final String coreName,
final boolean overwrite,
final String layout) {
return true;
}
final String basepath = CONFIGS_PATH + "/" + coreName;
public static void uploadZookeperConfig(final SolrZkClient zkClient,
final String coreName,
final boolean overwrite,
final String layout){
log.info("uploading solr configuration to ZK for index collection: " + coreName);
try {
if (overwrite && zkClient.getSolrZooKeeper().exists(basepath, false) != null) {
log.info("cleanup ZK configuration: " + coreName);
for (String child : zkClient.getSolrZooKeeper().getChildren(basepath, false)) {
final String path = basepath + "/" + child;
log.debug("cleanup ZK file: " + path);
zkClient.delete(path, -1, true);
}
zkClient.delete(basepath, -1, true);
}
if (!zkClient.exists(basepath, true)) {
log.info("upload ZK configuration: " + coreName);
zkClient.makePath(basepath, true);
uploadConfiguration(zkClient, basepath, buildConfiguration(layout));
}
log.info("upload ZK configuration complete");
} catch (Exception e) {
throw new RuntimeException("unable to upload solr configuration", e);
}
}
final String basepath = CONFIGS_PATH + "/" + coreName;
private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath,
final Map<String, byte[]> resources) throws KeeperException,
InterruptedException, IOException {
log.info("uploading solr configuration to ZK for index collection: " + coreName);
try {
if (overwrite && zkClient.getSolrZooKeeper().exists(basepath, false) != null) {
log.info("cleanup ZK configuration: " + coreName);
for (String child : zkClient.getSolrZooKeeper().getChildren(basepath, false)) {
final String path = basepath + "/" + child;
log.debug("cleanup ZK file: " + path);
zkClient.delete(path, -1, true);
}
zkClient.delete(basepath, -1, true);
}
if (!zkClient.exists(basepath, true)) {
log.info("upload ZK configuration: " + coreName);
zkClient.makePath(basepath, true);
uploadConfiguration(zkClient, basepath, buildConfiguration(layout));
}
log.info("upload ZK configuration complete");
} catch (Exception e) {
throw new RuntimeException("unable to upload solr configuration", e);
}
}
if (!zkClient.exists(basePath, true)) {
zkClient.makePath(basePath, true);
}
private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath, final Map<String, byte[]> resources) throws KeeperException,
InterruptedException, IOException {
for (final Map.Entry<String, byte[]> e : resources.entrySet()) {
String path = basePath + "/" + e.getKey();
log.debug("upload ZK configuration: " + path);
zkClient.create(path, e.getValue(), CreateMode.PERSISTENT, true);
}
}
if (!zkClient.exists(basePath, true)) {
zkClient.makePath(basePath, true);
}
private static String loadFileInClassPath(final String aPath) {
try {
return IOUtils
.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
} catch (IOException e) {
return null;
}
}
for (final Map.Entry<String, byte[]> e : resources.entrySet()) {
String path = basePath + "/" + e.getKey();
log.debug("upload ZK configuration: " + path);
zkClient.create(path, e.getValue(), CreateMode.PERSISTENT, true);
}
}
public static Map<String, String> getServiceProperties() throws IOException {
final String properties = loadFileInClassPath(CONF_BASE_PATH + "/service_properties.json");
final ObjectMapper mapper = new ObjectMapper();
TypeFactory typeFactory = mapper.getTypeFactory();
MapType mapType = typeFactory.constructMapType(HashMap.class, String.class, String.class);
return mapper.readValue(properties, mapType);
}
public static String getConfig() throws Exception {
final Map<String, String> p = getServiceProperties();
final String st = loadFileInClassPath(CONF_BASE_PATH + "/solrconfig.xml.st");
final ST solrConfig = new ST(st, DELIMITER, DELIMITER);
p.forEach(solrConfig::add);
return solrConfig.render();
}
private static String loadFileInClassPath(final String aPath) {
try {
return IOUtils.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
} catch (IOException e) {
return null;
}
}
public static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name);
modParams.set("numShards", numShards);
modParams.set("replicationFactor", replicationFactor);
modParams.set("collection.configName", configName);
modParams.set("maxShardsPerNode", maxShardsPerNode);
QueryRequest request = new QueryRequest(modParams);
request.setPath("/admin/collections");
return client.request(request);
}
public static Map<String,String> getServiceProperties() throws IOException {
final String properties = loadFileInClassPath(CONF_BASE_PATH+"/service_properties.json");
final ObjectMapper mapper = new ObjectMapper();
TypeFactory typeFactory = mapper.getTypeFactory();
MapType mapType = typeFactory.constructMapType(HashMap.class, String.class, String.class);
return mapper.readValue(properties, mapType);
}
private static Map<String, byte[]> buildConfiguration(final String layout)
throws Exception {
Map<String, byte[]> res = new HashMap<>();
public static String getConfig() throws Exception {
final Map<String, String> p = getServiceProperties();
final String st = loadFileInClassPath(CONF_BASE_PATH+"/solrconfig.xml.st");
final ST solrConfig = new ST(st, DELIMITER, DELIMITER);
p.forEach(solrConfig::add);
return solrConfig.render();
}
try {
log.debug("adding schema.xml to the resource map");
res.put("schema.xml", getSchemaXML(layout).getBytes());
public static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws Exception {
ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name);
modParams.set("numShards", numShards);
modParams.set("replicationFactor", replicationFactor);
modParams.set("collection.configName", configName);
modParams.set("maxShardsPerNode", maxShardsPerNode);
QueryRequest request = new QueryRequest(modParams);
request.setPath("/admin/collections");
return client.request(request);
}
res.put("solrconfig.xml", getConfig().getBytes());
log.debug("adding solrconfig.xml to the resource map");
private static Map<String, byte[]> buildConfiguration(final String layout)
throws Exception {
Files
.list(
Paths.get(Objects.requireNonNull(SolrUtil.class.getResource(CONF_FILE_BASE_PATH)).getPath()))
.map(Path::getFileName)
.forEach(s -> {
log.debug(String.format("put file from path %s", CONF_FILE_BASE_PATH + s));
res
.put(
String.valueOf(s),
Objects
.requireNonNull(loadFileInClassPath(CONF_FILE_BASE_PATH + s))
.getBytes(StandardCharsets.UTF_8));
});
Map<String, byte[]> res = new HashMap<>();
return res;
} catch (Throwable e) {
throw new Exception("failed to build configuration", e);
}
}
try {
log.debug("adding schema.xml to the resource map");
res.put("schema.xml", getSchemaXML(layout).getBytes());
public static String getSchemaXML(final String layout) throws Exception {
res.put("solrconfig.xml", getConfig().getBytes());
log.debug("adding solrconfig.xml to the resource map");
final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
Files.list(
Paths.get(Objects.requireNonNull(SolrUtil.class.getResource(CONF_FILE_BASE_PATH)).getPath()))
.map(Path::getFileName)
.forEach(s-> {
log.debug(String.format("put file from path %s",CONF_FILE_BASE_PATH + s));
res.put(String.valueOf(s),
Transformer transformer = TransformerFactory
.newInstance()
.newTransformer(
new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
transformer.setParameter("textFieldType", "text_common");
Objects.requireNonNull(loadFileInClassPath(CONF_FILE_BASE_PATH + s)).getBytes(StandardCharsets.UTF_8));}
);
final DocumentResult result = new DocumentResult();
return res;
} catch (Throwable e) {
throw new Exception("failed to build configuration", e);
}
}
transformer.transform(new DocumentSource(fields), result);
String xml = result.getDocument().asXML();
log.debug("new index schema:\n" + xml);
public static String getSchemaXML(final String layout) throws Exception {
final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
Transformer transformer = TransformerFactory.newInstance().newTransformer(new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
transformer.setParameter("textFieldType", "text_common");
final DocumentResult result = new DocumentResult();
transformer.transform(new DocumentSource(fields), result);
String xml = result.getDocument().asXML();
log.debug("new index schema:\n" + xml);
return xml;
}
return xml;
}
}

View File

@ -1,10 +1,5 @@
package eu.dnetlib.dhp.oa.provision.scholix;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.sx.scholix.*;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
package eu.dnetlib.dhp.oa.provision.scholix;
import java.io.IOException;
import java.time.LocalDate;
@ -12,95 +7,111 @@ import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.sx.scholix.*;
public class ScholixToSolr {
final static ObjectMapper MAPPER = new ObjectMapper();
final static ObjectMapper MAPPER = new ObjectMapper();
public static SolrInputDocument toSolrDocument(final String json) {
try {
final Scholix input = MAPPER.readValue(json, Scholix.class);
final SolrInputDocument output = new SolrInputDocument();
public static SolrInputDocument toSolrDocument(final String json) {
try {
final Scholix input = MAPPER.readValue(json, Scholix.class);
final SolrInputDocument output = new SolrInputDocument();
fillEntityField(output, input.getSource(), "source");
fillEntityField(output, input.getTarget(), "target");
final String cleanDate = GraphCleaningFunctions.cleanDate(input.getPublicationDate());
fillEntityField(output,input.getSource(), "source");
fillEntityField(output,input.getTarget(), "target");
final String cleanDate= GraphCleaningFunctions.cleanDate(input.getPublicationDate());
if (cleanDate != null)
output.addField("publication_date", GraphCleaningFunctions.normalizeDate(cleanDate));
if(cleanDate!= null)
output.addField("publication_date", GraphCleaningFunctions.normalizeDate(cleanDate));
if (input.getRelationship() != null && input.getRelationship().getName() != null)
output.addField("relation_name", input.getRelationship().getName());
else
return null;
if (input.getRelationship() != null && input.getRelationship().getInverse() != null)
output.addField("relation_inverse", input.getRelationship().getInverse());
if (input.getRelationship()!= null && input.getRelationship().getName()!= null)
output.addField("relation_name", input.getRelationship().getName());
else
return null;
if (input.getRelationship()!= null && input.getRelationship().getInverse()!= null)
output.addField("relation_inverse", input.getRelationship().getInverse());
if (input.getLinkprovider() != null) {
final List<String> linkProviders = input
.getLinkprovider()
.stream()
.map(ScholixEntityId::getName)
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (input.getLinkprovider()!= null) {
final List<String> linkProviders = input.getLinkprovider().stream()
.map(ScholixEntityId::getName)
.filter(Objects::nonNull)
.collect(Collectors.toList());
output.addField("link_provider", linkProviders);
}
if (input.getPublisher() != null) {
final List<String> publishers = input
.getPublisher()
.stream()
.map(ScholixEntityId::getName)
.filter(Objects::nonNull)
.collect(Collectors.toList());
output.addField("publisher_name", publishers);
}
output.addField("link_provider",linkProviders);
}
if(input.getPublisher()!= null) {
final List<String> publishers = input.getPublisher().stream()
.map(ScholixEntityId::getName)
.filter(Objects::nonNull)
.collect(Collectors.toList());
output.addField("publisher_name", publishers);
}
output.addField("__indexrecordidentifier", input.getIdentifier());
output.addField("__result", json);
return output;
output.addField("__indexrecordidentifier", input.getIdentifier());
output.addField("__result", json);
return output;
} catch (IOException e) {
throw new RuntimeException("Error on convert Scholix");
}
}
} catch (IOException e) {
throw new RuntimeException("Error on convert Scholix");
}
}
private static void fillEntityField(final SolrInputDocument document, final ScholixResource resource,
final String prefix) {
document.addField(prefix + "_identifier", resource.getDnetIdentifier());
document.addField(prefix + "_type", resource.getObjectType());
document.addField(prefix + "_publication_date", resource.getPublicationDate());
document.addField(prefix + "_subtype", resource.getObjectSubType());
private static void fillEntityField(final SolrInputDocument document, final ScholixResource resource, final String prefix) {
List<String> resourcePIDs = resource
.getIdentifier()
.stream()
.map(ScholixIdentifier::getIdentifier)
.collect(Collectors.toList());
document.addField(prefix + "_pid", resourcePIDs);
document.addField(prefix+"_identifier",resource.getDnetIdentifier());
document.addField(prefix+"_type", resource.getObjectType());
document.addField(prefix+"_publication_date", resource.getPublicationDate());
document.addField(prefix+"_subtype", resource.getObjectSubType());
List<String> resourceSchemas = resource
.getIdentifier()
.stream()
.map(ScholixIdentifier::getSchema)
.collect(Collectors.toList());
document.addField(prefix + "_schema", resourceSchemas);
if (resource.getPublisher() != null) {
List<String> resourcePIDs = resource.getIdentifier().stream()
.map(ScholixIdentifier::getIdentifier)
.collect(Collectors.toList());
document.addField(prefix+"_pid", resourcePIDs);
final List<String> publishers = resource
.getPublisher()
.stream()
.map(ScholixEntityId::getName)
.collect(Collectors.toList());
if (publishers.size() > 0)
document.addField(prefix + "_publisher", publishers);
}
List<String> resourceSchemas = resource.getIdentifier().stream()
.map(ScholixIdentifier::getSchema)
.collect(Collectors.toList());
document.addField(prefix+"_schema", resourceSchemas);
if (resource.getPublisher() != null) {
final List<String> publishers = resource.getPublisher().stream().map(ScholixEntityId::getName).collect(Collectors.toList());
if (publishers.size()>0)
document.addField(prefix+"_publisher", publishers);
}
if (resource.getCollectedFrom() != null) {
final List<String> collectedFrom = resource.getCollectedFrom().stream()
.map(ScholixCollectedFrom::getProvider)
.filter(Objects::nonNull)
.map(ScholixEntityId::getName)
.collect(Collectors.toList());
if (collectedFrom.size()>0)
document.addField(prefix+"_collected_from", collectedFrom);
}
}
if (resource.getCollectedFrom() != null) {
final List<String> collectedFrom = resource
.getCollectedFrom()
.stream()
.map(ScholixCollectedFrom::getProvider)
.filter(Objects::nonNull)
.map(ScholixEntityId::getName)
.collect(Collectors.toList());
if (collectedFrom.size() > 0)
document.addField(prefix + "_collected_from", collectedFrom);
}
}
}

View File

@ -1,6 +1,17 @@
package eu.dnetlib.dhp.oa.provision;
import eu.dnetlib.dhp.oa.provision.scholix.ScholixToSolr;
import static org.junit.jupiter.api.Assertions.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrServerException;
@ -17,176 +28,157 @@ import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.junit.jupiter.api.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import static org.junit.jupiter.api.Assertions.*;
import eu.dnetlib.dhp.oa.provision.scholix.ScholixToSolr;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class ScholixIndexingTest extends SolrTest{
public class ScholixIndexingTest extends SolrTest {
private static String LAYOUT_PATH="/eu/dnetlib/dhp/oa/provision/SMF_layout.xml";
private static String LAYOUT_PATH = "/eu/dnetlib/dhp/oa/provision/SMF_layout.xml";
/**
* This test verifies that the schema will be generated correctly
* by get the profile of the metadataFormat and generating solr schema.xml
* we expect that the fiedl in the metadataFormat are all in the field solr schema
* @throws Exception
*/
@Test
@Order(1)
void testSchemaCreation() throws Exception {
/**
* This test verifies that the schema will be generated correctly
* by get the profile of the metadataFormat and generating solr schema.xml
* we expect that the fiedl in the metadataFormat are all in the field solr schema
* @throws Exception
*/
@Test
@Order(1)
void testSchemaCreation() throws Exception {
final String layout = loadSMFLayout();
assertNotNull(layout);
assertTrue(StringUtils.isNotBlank(layout));
final String layout = loadSMFLayout();
assertNotNull(layout);
assertTrue(StringUtils.isNotBlank(layout));
final String scheme = SolrUtil.getSchemaXML(loadSMFLayout());
assertNotNull(scheme);
assertTrue(StringUtils.isNotBlank(scheme));
final String scheme = SolrUtil.getSchemaXML(loadSMFLayout());
assertNotNull(scheme);
assertTrue(StringUtils.isNotBlank(scheme));
final Document fields = parseDocument(layout);
List<Node> params = fields.selectNodes("//FIELD");
final List<String> exptectedFieldName = new ArrayList<>();
for (Node param : params) {
Element element = (Element) param;
String name = element.attributeValue("name");
exptectedFieldName.add(name.toLowerCase());
}
assertTrue(exptectedFieldName.size() > 0);
final Document parsedScheme = parseDocument(scheme);
params = parsedScheme.selectNodes("//field");
final List<String> createdFieldName = new ArrayList<>();
for (Node param : params) {
final Document fields = parseDocument(layout);
List<Node> params = fields.selectNodes("//FIELD");
final List<String> exptectedFieldName = new ArrayList<>();
for (Node param : params) {
Element element = (Element) param;
String name = element.attributeValue("name");
exptectedFieldName.add(name.toLowerCase());
}
assertTrue(exptectedFieldName.size()>0);
Element element = (Element) param;
String name = element.attributeValue("name");
createdFieldName.add(name.toLowerCase());
}
assertTrue(createdFieldName.size() > 0);
exptectedFieldName.stream().map(createdFieldName::contains).forEach(Assertions::assertTrue);
}
final Document parsedScheme = parseDocument(scheme);
params = parsedScheme.selectNodes("//field");
final List<String> createdFieldName = new ArrayList<>();
for (Node param : params) {
/***
* Test the creation of the index works
* we test if all the files are uploaded into
* the zookeeper instance of SOLR under it's
* collection name
* @throws Exception
*/
@Test
@Order(2)
public void testCreateCollection() throws Exception {
final String collectionName = "SMF-index-scholix";
SolrUtil.uploadZookeperConfig(miniCluster.getZkClient(), collectionName, true, loadSMFLayout());
Element element = (Element) param;
String name = element.attributeValue("name");
createdFieldName.add(name.toLowerCase());
}
assertTrue(createdFieldName.size()>0);
assertTrue(miniCluster.getZkClient().exists("/configs/" + collectionName, true));
List<String> items = miniCluster.getZkClient().getChildren("/configs/" + collectionName, null, true);
exptectedFieldName.stream().map(createdFieldName::contains).forEach(Assertions::assertTrue);
}
List<String> configurationFiles = Files
.list(
Paths
.get(
Objects.requireNonNull(getClass().getResource(SolrUtil.CONF_FILE_BASE_PATH)).getPath()))
.map(Path::getFileName)
.map(Path::toString)
.collect(Collectors.toList());
configurationFiles.add("schema.xml");
configurationFiles.add("solrconfig.xml");
configurationFiles.forEach(s -> assertTrue(items.contains(s)));
/***
* Test the creation of the index works
* we test if all the files are uploaded into
* the zookeeper instance of SOLR under it's
* collection name
* @throws Exception
*/
@Test
@Order(2)
public void testCreateCollection() throws Exception {
final String collectionName ="SMF-index-scholix";
SolrUtil.uploadZookeperConfig(miniCluster.getZkClient(),collectionName,true, loadSMFLayout() );
SolrUtil.createCollection(miniCluster.getSolrClient(), "Scholix", 4, 1, 2, collectionName);
assertTrue(miniCluster.getZkClient().exists("/configs/"+collectionName, true));
List<String> items = miniCluster.getZkClient().getChildren("/configs/"+collectionName, null, true);
log.debug("Collection Created");
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", "*:*");
List<String> configurationFiles =
Files.list(
Paths.get(
Objects.requireNonNull(getClass().getResource(SolrUtil.CONF_FILE_BASE_PATH)).getPath()))
.map(Path::getFileName)
.map(Path::toString)
.collect(Collectors.toList());
configurationFiles.add("schema.xml");
configurationFiles.add("solrconfig.xml");
configurationFiles.forEach(s->assertTrue(items.contains(s)));
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
final QueryResponse response = miniCluster.getSolrClient().query("Scholix", queryParams);
final SolrDocumentList documents = response.getResults();
assertEquals(0, documents.getNumFound());
SolrUtil.createCollection(miniCluster.getSolrClient(), "Scholix", 4,1,2,collectionName);
}
log.debug("Collection Created");
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", "*:*");
@Test
@Order(3)
public void testFeedingSolrDocument() throws Exception {
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
final QueryResponse response =miniCluster.getSolrClient().query("Scholix", queryParams);
final SolrDocumentList documents = response.getResults();
assertEquals(0, documents.getNumFound());
InputStream gzipStream = new GZIPInputStream(
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/scholix_records.gz")));
Reader decoder = new InputStreamReader(gzipStream, StandardCharsets.UTF_8);
BufferedReader buffered = new BufferedReader(decoder);
String line = buffered.readLine();
}
final CloudSolrClient client = miniCluster.getSolrClient();
client.setDefaultCollection("Scholix");
int added = 0;
while (line != null) {
final SolrInputDocument solrDocument = ScholixToSolr.toSolrDocument(line);
@Test
@Order(3)
public void testFeedingSolrDocument() throws Exception {
client.add(solrDocument);
added++;
line = buffered.readLine();
}
client.commit();
InputStream gzipStream = new GZIPInputStream(Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/scholix_records.gz")));
Reader decoder = new InputStreamReader(gzipStream, StandardCharsets.UTF_8);
BufferedReader buffered = new BufferedReader(decoder);
String line = buffered.readLine();
log.debug(String.format("Feed %d documents", added));
final CloudSolrClient client = miniCluster.getSolrClient();
client.setDefaultCollection("Scholix");
int added = 0;
while (line!= null) {
final SolrDocumentList documents = executeQuery("*:*");
assertEquals(added, documents.getNumFound());
documents.stream().map(s -> s.getFirstValue("source_pid").toString()).forEach(System.out::println);
final SolrInputDocument solrDocument = ScholixToSolr.toSolrDocument(line);
SolrDocumentList source_pids = executeQuery("source_pid:\"10.15468/dl.u47azs\"");
client.add(solrDocument);
added ++;
line = buffered.readLine();
}
System.out.println("source_pid.getNumFound() = " + source_pids.getNumFound());
client.commit();
source_pids.stream().map(s -> s.getFieldValue("source_pid")).forEach(System.out::println);
log.debug(String.format("Feed %d documents",added));
}
private SolrDocumentList executeQuery(final String query) throws SolrServerException, IOException {
final SolrDocumentList documents = executeQuery("*:*");
assertEquals(added, documents.getNumFound());
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", query);
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
final QueryResponse response = miniCluster.getSolrClient().query("Scholix", queryParams);
return response.getResults();
}
/***
* Utility for parsing XML
* @param xml
* @return Dom4J Document
* @throws DocumentException
*/
private Document parseDocument(final String xml) throws DocumentException {
return new SAXReader().read(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
}
documents.stream().map(s-> s.getFirstValue("source_pid").toString()).forEach(System.out::println);
SolrDocumentList source_pids = executeQuery("source_pid:\"10.15468/dl.u47azs\"");
System.out.println("source_pid.getNumFound() = " + source_pids.getNumFound());
source_pids.stream().map(s -> s.getFieldValue("source_pid")).forEach(System.out::println);
}
private SolrDocumentList executeQuery(final String query) throws SolrServerException, IOException {
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", query);
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
final QueryResponse response =miniCluster.getSolrClient().query("Scholix", queryParams);
return response.getResults();
}
/***
* Utility for parsing XML
* @param xml
* @return Dom4J Document
* @throws DocumentException
*/
private Document parseDocument(final String xml) throws DocumentException {
return new SAXReader().read(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
}
private String loadSMFLayout() throws IOException {
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(LAYOUT_PATH)));
}
private String loadSMFLayout() throws IOException {
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(LAYOUT_PATH)));
}
}

View File

@ -1,12 +1,12 @@
package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
class SolrAdminApplicationTest extends SolrTest {
@Test

View File

@ -352,7 +352,9 @@
</goals>
<configuration>
<tasks>
<!--suppress UnresolvedMavenProperty -->
<property name="assembly-resources.loc" value="${maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path}" />
<!--suppress UnresolvedMavenProperty -->
<unjar src="${assembly-resources.loc}" dest="${project.build.directory}/assembly-resources" />
</tasks>
</configuration>
@ -427,9 +429,12 @@
<configuration>
<executable>ssh</executable>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>rm -rf ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; mkdir -p ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/</argument>
</arguments>
</configuration>
@ -443,9 +448,11 @@
<configuration>
<executable>scp</executable>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>-P ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<argument>target/${oozie.package.file.name}.tar.gz</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}:${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/${oozie.package.file.name}.tar.gz</argument>
</arguments>
</configuration>
@ -460,11 +467,15 @@
<executable>ssh</executable>
<!-- <outputFile>target/redirected_upload.log</outputFile> -->
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument>tar -zxf oozie-package.tar.gz; </argument>
<!--suppress UnresolvedMavenProperty -->
<argument>rm ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/oozie-package.tar.gz; </argument>
<argument>./upload_workflow.sh</argument>
</arguments>
@ -495,9 +506,12 @@
<!-- this file will be used by test verification profile reading job identifier -->
<outputFile>${oozie.execution.log.file.location}</outputFile>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument>./run_workflow.sh</argument>
</arguments>
@ -512,6 +526,7 @@
<configuration>
<executable>cat</executable>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${oozie.execution.log.file.location}</argument>
</arguments>
</configuration>