dnet-hadoop/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrUtil.java

246 lines
8.4 KiB
Java

package eu.dnetlib.dhp.oa.provision;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.dom4j.Document;
import org.dom4j.io.DocumentResult;
import org.dom4j.io.DocumentSource;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.stringtemplate.v4.ST;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.type.MapType;
import com.fasterxml.jackson.databind.type.TypeFactory;
public class SolrUtil {
/**
* The log.
*/
private static final Logger log = LoggerFactory.getLogger(SolrUtil.class);
/**
* The Constant CONFIGS_PATH.
*/
private static final String CONFIGS_PATH = "/configs";
private static final char DELIMITER = '$';
public static final String CONF_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/";
// public static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
public static final String LIST_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/file_list";
private static final String SCHEMA_TEMPLATE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
private static String createURLRequest = "http://%s:%s/solr/admin/collections?action=CREATE&name=%s&numShards=%s&replicationFactor=%s&maxShardsPerNode=%s&collection.configName=%s";
private static String generateCreateIndexRequest(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String collectionConfigName,
final String maxShardsPerNode) {
return String
.format(
createURLRequest, host, port, collectionName, numShard, replicationFactor, maxShardsPerNode,
collectionConfigName);
}
public static boolean createSolrIndex(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String maxShardsPerNode,
final String collectionConfigName) throws Exception {
final String uri = generateCreateIndexRequest(
host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
URL url = new URL(uri);
System.out.println(uri);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
int status = connection.getResponseCode();
System.out.println("status = " + status);
BufferedReader in = new BufferedReader(
new InputStreamReader(connection.getInputStream()));
String inputLine;
StringBuffer content = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
content.append(inputLine);
}
in.close();
log.debug("content = " + content);
return true;
}
public static void uploadZookeperConfig(final SolrZkClient zkClient,
final String coreName,
final boolean overwrite,
final String layout) {
final String basepath = CONFIGS_PATH + "/" + coreName;
log.info("uploading solr configuration to ZK for index collection: " + coreName);
try {
if (overwrite && zkClient.getSolrZooKeeper().exists(basepath, false) != null) {
log.info("cleanup ZK configuration: " + coreName);
for (String child : zkClient.getSolrZooKeeper().getChildren(basepath, false)) {
final String path = basepath + "/" + child;
log.debug("cleanup ZK file: " + path);
zkClient.delete(path, -1, true);
}
zkClient.delete(basepath, -1, true);
}
if (!zkClient.exists(basepath, true)) {
log.info("upload ZK configuration: " + coreName);
zkClient.makePath(basepath, true);
uploadConfiguration(zkClient, basepath, buildConfiguration(layout));
}
log.info("upload ZK configuration complete");
} catch (Exception e) {
throw new RuntimeException("unable to upload solr configuration", e);
}
}
private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath,
final Map<String, byte[]> resources) throws KeeperException,
InterruptedException, IOException {
if (!zkClient.exists(basePath, true)) {
zkClient.makePath(basePath, true);
}
for (final Map.Entry<String, byte[]> e : resources.entrySet()) {
String path = basePath + "/" + e.getKey();
log.debug("upload ZK configuration: " + path);
zkClient.create(path, e.getValue(), CreateMode.PERSISTENT, true);
}
}
private static String loadFileInClassPath(final String aPath) {
System.out.println("LOAD FILE FROM PATH: " + aPath);
try {
return IOUtils
.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
} catch (IOException e) {
return null;
}
}
public static Map<String, String> getServiceProperties() throws IOException {
final String properties = loadFileInClassPath(CONF_BASE_PATH + "service_properties.json");
final ObjectMapper mapper = new ObjectMapper();
TypeFactory typeFactory = mapper.getTypeFactory();
MapType mapType = typeFactory.constructMapType(HashMap.class, String.class, String.class);
return mapper.readValue(properties, mapType);
}
public static String getConfig() throws Exception {
final Map<String, String> p = getServiceProperties();
final String st = loadFileInClassPath(CONF_BASE_PATH + "solrconfig.xml.st");
final ST solrConfig = new ST(st, DELIMITER, DELIMITER);
p.forEach(solrConfig::add);
return solrConfig.render();
}
public static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws SolrServerException, IOException {
ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name);
modParams.set("numShards", numShards);
modParams.set("replicationFactor", replicationFactor);
modParams.set("collection.configName", configName);
modParams.set("maxShardsPerNode", maxShardsPerNode);
QueryRequest request = new QueryRequest(modParams);
request.setPath("/admin/collections");
return client.request(request);
}
private static Map<String, byte[]> buildConfiguration(final String layout)
throws Exception {
Map<String, byte[]> res = new HashMap<>();
try {
log.debug("adding schema.xml to the resource map");
res.put("schema.xml", getSchemaXML(layout).getBytes());
res.put("solrconfig.xml", getConfig().getBytes());
log.debug("adding solrconfig.xml to the resource map");
String data = IOUtils
.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(LIST_FILE_BASE_PATH)));
Arrays.stream(data.split("\n")).forEach(s -> {
final String name = s.replace(CONF_BASE_PATH + "files/", "");
res
.put(
name,
Objects.requireNonNull(loadFileInClassPath(s)).getBytes(StandardCharsets.UTF_8));
});
return res;
} catch (Throwable e) {
throw new Exception("failed to build configuration", e);
}
}
public static String getSchemaXML(final String layout) throws Exception {
final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
Transformer transformer = TransformerFactory
.newInstance()
.newTransformer(
new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
transformer.setParameter("textFieldType", "text_common");
final DocumentResult result = new DocumentResult();
transformer.transform(new DocumentSource(fields), result);
String xml = result.getDocument().asXML();
log.debug("new index schema:\n" + xml);
return xml;
}
}