forked from D-Net/dnet-hadoop
code refactor
This commit is contained in:
parent
bf6c8ccc79
commit
6d5cda1a03
|
@ -46,7 +46,6 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
|
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to clean here
|
// nothing to clean here
|
||||||
|
|
|
@ -5,9 +5,9 @@ import java.text.ParseException;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||||
import net.sf.saxon.expr.XPathContext;
|
import net.sf.saxon.expr.XPathContext;
|
||||||
import net.sf.saxon.om.Sequence;
|
import net.sf.saxon.om.Sequence;
|
||||||
import net.sf.saxon.trans.XPathException;
|
import net.sf.saxon.trans.XPathException;
|
||||||
|
|
|
@ -59,9 +59,6 @@ public class SolrAdminApplication implements Closeable {
|
||||||
final String zkHost = isLookup.getZkHost();
|
final String zkHost = isLookup.getZkHost();
|
||||||
log.info("zkHost: {}", zkHost);
|
log.info("zkHost: {}", zkHost);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final String collection = ProvisionConstants.getCollectionName(format);
|
final String collection = ProvisionConstants.getCollectionName(format);
|
||||||
log.info("collection: {}", collection);
|
log.info("collection: {}", collection);
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,24 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import java.io.BufferedReader;
|
||||||
import com.fasterxml.jackson.databind.type.MapType;
|
import java.io.ByteArrayInputStream;
|
||||||
import com.fasterxml.jackson.databind.type.TypeFactory;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import javax.xml.transform.Transformer;
|
||||||
|
import javax.xml.transform.TransformerFactory;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
|
@ -21,22 +37,9 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.stringtemplate.v4.ST;
|
import org.stringtemplate.v4.ST;
|
||||||
|
|
||||||
import javax.xml.transform.Transformer;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import javax.xml.transform.TransformerFactory;
|
import com.fasterxml.jackson.databind.type.MapType;
|
||||||
import java.io.BufferedReader;
|
import com.fasterxml.jackson.databind.type.TypeFactory;
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.net.HttpURLConnection;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class SolrUtil {
|
public class SolrUtil {
|
||||||
|
|
||||||
|
@ -58,7 +61,6 @@ public class SolrUtil {
|
||||||
|
|
||||||
private static final String SCHEMA_TEMPLATE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
|
private static final String SCHEMA_TEMPLATE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
|
||||||
|
|
||||||
|
|
||||||
private static String createURLRequest = "http://%s:%s/solr/admin/collections?action=CREATE&name=%s&numShards=%s&replicationFactor=%s&maxShardsPerNode=%s&collection.configName=%s";
|
private static String createURLRequest = "http://%s:%s/solr/admin/collections?action=CREATE&name=%s&numShards=%s&replicationFactor=%s&maxShardsPerNode=%s&collection.configName=%s";
|
||||||
|
|
||||||
private static String generateCreateIndexRequest(final String host,
|
private static String generateCreateIndexRequest(final String host,
|
||||||
|
@ -68,7 +70,10 @@ public class SolrUtil {
|
||||||
final String replicationFactor,
|
final String replicationFactor,
|
||||||
final String collectionConfigName,
|
final String collectionConfigName,
|
||||||
final String maxShardsPerNode) {
|
final String maxShardsPerNode) {
|
||||||
return String.format(createURLRequest, host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
|
return String
|
||||||
|
.format(
|
||||||
|
createURLRequest, host, port, collectionName, numShard, replicationFactor, maxShardsPerNode,
|
||||||
|
collectionConfigName);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean createSolrIndex(final String host,
|
public static boolean createSolrIndex(final String host,
|
||||||
|
@ -79,7 +84,8 @@ public class SolrUtil {
|
||||||
final String maxShardsPerNode,
|
final String maxShardsPerNode,
|
||||||
final String collectionConfigName) throws Exception {
|
final String collectionConfigName) throws Exception {
|
||||||
|
|
||||||
final String uri = generateCreateIndexRequest(host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
|
final String uri = generateCreateIndexRequest(
|
||||||
|
host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
|
||||||
|
|
||||||
URL url = new URL(uri);
|
URL url = new URL(uri);
|
||||||
System.out.println(uri);
|
System.out.println(uri);
|
||||||
|
@ -98,7 +104,6 @@ public class SolrUtil {
|
||||||
}
|
}
|
||||||
in.close();
|
in.close();
|
||||||
|
|
||||||
|
|
||||||
log.debug("content = " + content);
|
log.debug("content = " + content);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -133,7 +138,8 @@ public class SolrUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath, final Map<String, byte[]> resources) throws KeeperException,
|
private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath,
|
||||||
|
final Map<String, byte[]> resources) throws KeeperException,
|
||||||
InterruptedException, IOException {
|
InterruptedException, IOException {
|
||||||
|
|
||||||
if (!zkClient.exists(basePath, true)) {
|
if (!zkClient.exists(basePath, true)) {
|
||||||
|
@ -147,10 +153,10 @@ public class SolrUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static String loadFileInClassPath(final String aPath) {
|
private static String loadFileInClassPath(final String aPath) {
|
||||||
try {
|
try {
|
||||||
return IOUtils.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
|
return IOUtils
|
||||||
|
.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -164,7 +170,6 @@ public class SolrUtil {
|
||||||
return mapper.readValue(properties, mapType);
|
return mapper.readValue(properties, mapType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String getConfig() throws Exception {
|
public static String getConfig() throws Exception {
|
||||||
final Map<String, String> p = getServiceProperties();
|
final Map<String, String> p = getServiceProperties();
|
||||||
final String st = loadFileInClassPath(CONF_BASE_PATH + "/solrconfig.xml.st");
|
final String st = loadFileInClassPath(CONF_BASE_PATH + "/solrconfig.xml.st");
|
||||||
|
@ -190,7 +195,6 @@ public class SolrUtil {
|
||||||
private static Map<String, byte[]> buildConfiguration(final String layout)
|
private static Map<String, byte[]> buildConfiguration(final String layout)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
|
||||||
|
|
||||||
Map<String, byte[]> res = new HashMap<>();
|
Map<String, byte[]> res = new HashMap<>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -200,15 +204,20 @@ public class SolrUtil {
|
||||||
res.put("solrconfig.xml", getConfig().getBytes());
|
res.put("solrconfig.xml", getConfig().getBytes());
|
||||||
log.debug("adding solrconfig.xml to the resource map");
|
log.debug("adding solrconfig.xml to the resource map");
|
||||||
|
|
||||||
Files.list(
|
Files
|
||||||
|
.list(
|
||||||
Paths.get(Objects.requireNonNull(SolrUtil.class.getResource(CONF_FILE_BASE_PATH)).getPath()))
|
Paths.get(Objects.requireNonNull(SolrUtil.class.getResource(CONF_FILE_BASE_PATH)).getPath()))
|
||||||
.map(Path::getFileName)
|
.map(Path::getFileName)
|
||||||
.forEach(s -> {
|
.forEach(s -> {
|
||||||
log.debug(String.format("put file from path %s", CONF_FILE_BASE_PATH + s));
|
log.debug(String.format("put file from path %s", CONF_FILE_BASE_PATH + s));
|
||||||
res.put(String.valueOf(s),
|
res
|
||||||
|
.put(
|
||||||
|
String.valueOf(s),
|
||||||
|
|
||||||
Objects.requireNonNull(loadFileInClassPath(CONF_FILE_BASE_PATH + s)).getBytes(StandardCharsets.UTF_8));}
|
Objects
|
||||||
);
|
.requireNonNull(loadFileInClassPath(CONF_FILE_BASE_PATH + s))
|
||||||
|
.getBytes(StandardCharsets.UTF_8));
|
||||||
|
});
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
|
@ -216,13 +225,14 @@ public class SolrUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String getSchemaXML(final String layout) throws Exception {
|
public static String getSchemaXML(final String layout) throws Exception {
|
||||||
|
|
||||||
final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
|
final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
|
||||||
|
|
||||||
|
Transformer transformer = TransformerFactory
|
||||||
Transformer transformer = TransformerFactory.newInstance().newTransformer(new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
|
.newInstance()
|
||||||
|
.newTransformer(
|
||||||
|
new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
|
||||||
transformer.setParameter("textFieldType", "text_common");
|
transformer.setParameter("textFieldType", "text_common");
|
||||||
|
|
||||||
final DocumentResult result = new DocumentResult();
|
final DocumentResult result = new DocumentResult();
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
package eu.dnetlib.dhp.oa.provision.scholix;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
package eu.dnetlib.dhp.oa.provision.scholix;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
|
||||||
import eu.dnetlib.dhp.schema.sx.scholix.*;
|
|
||||||
import org.apache.solr.common.SolrDocument;
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
|
@ -12,10 +7,17 @@ import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||||
|
import eu.dnetlib.dhp.schema.sx.scholix.*;
|
||||||
|
|
||||||
public class ScholixToSolr {
|
public class ScholixToSolr {
|
||||||
final static ObjectMapper MAPPER = new ObjectMapper();
|
final static ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
|
||||||
public static SolrInputDocument toSolrDocument(final String json) {
|
public static SolrInputDocument toSolrDocument(final String json) {
|
||||||
try {
|
try {
|
||||||
final Scholix input = MAPPER.readValue(json, Scholix.class);
|
final Scholix input = MAPPER.readValue(json, Scholix.class);
|
||||||
|
@ -36,7 +38,9 @@ public class ScholixToSolr {
|
||||||
output.addField("relation_inverse", input.getRelationship().getInverse());
|
output.addField("relation_inverse", input.getRelationship().getInverse());
|
||||||
|
|
||||||
if (input.getLinkprovider() != null) {
|
if (input.getLinkprovider() != null) {
|
||||||
final List<String> linkProviders = input.getLinkprovider().stream()
|
final List<String> linkProviders = input
|
||||||
|
.getLinkprovider()
|
||||||
|
.stream()
|
||||||
.map(ScholixEntityId::getName)
|
.map(ScholixEntityId::getName)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -44,7 +48,9 @@ public class ScholixToSolr {
|
||||||
output.addField("link_provider", linkProviders);
|
output.addField("link_provider", linkProviders);
|
||||||
}
|
}
|
||||||
if (input.getPublisher() != null) {
|
if (input.getPublisher() != null) {
|
||||||
final List<String> publishers = input.getPublisher().stream()
|
final List<String> publishers = input
|
||||||
|
.getPublisher()
|
||||||
|
.stream()
|
||||||
.map(ScholixEntityId::getName)
|
.map(ScholixEntityId::getName)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -60,37 +66,44 @@ public class ScholixToSolr {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void fillEntityField(final SolrInputDocument document, final ScholixResource resource,
|
||||||
private static void fillEntityField(final SolrInputDocument document, final ScholixResource resource, final String prefix) {
|
final String prefix) {
|
||||||
|
|
||||||
document.addField(prefix + "_identifier", resource.getDnetIdentifier());
|
document.addField(prefix + "_identifier", resource.getDnetIdentifier());
|
||||||
document.addField(prefix + "_type", resource.getObjectType());
|
document.addField(prefix + "_type", resource.getObjectType());
|
||||||
document.addField(prefix + "_publication_date", resource.getPublicationDate());
|
document.addField(prefix + "_publication_date", resource.getPublicationDate());
|
||||||
document.addField(prefix + "_subtype", resource.getObjectSubType());
|
document.addField(prefix + "_subtype", resource.getObjectSubType());
|
||||||
|
|
||||||
|
List<String> resourcePIDs = resource
|
||||||
List<String> resourcePIDs = resource.getIdentifier().stream()
|
.getIdentifier()
|
||||||
|
.stream()
|
||||||
.map(ScholixIdentifier::getIdentifier)
|
.map(ScholixIdentifier::getIdentifier)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
document.addField(prefix + "_pid", resourcePIDs);
|
document.addField(prefix + "_pid", resourcePIDs);
|
||||||
|
|
||||||
List<String> resourceSchemas = resource.getIdentifier().stream()
|
List<String> resourceSchemas = resource
|
||||||
|
.getIdentifier()
|
||||||
|
.stream()
|
||||||
.map(ScholixIdentifier::getSchema)
|
.map(ScholixIdentifier::getSchema)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
document.addField(prefix + "_schema", resourceSchemas);
|
document.addField(prefix + "_schema", resourceSchemas);
|
||||||
|
|
||||||
|
|
||||||
if (resource.getPublisher() != null) {
|
if (resource.getPublisher() != null) {
|
||||||
|
|
||||||
final List<String> publishers = resource.getPublisher().stream().map(ScholixEntityId::getName).collect(Collectors.toList());
|
final List<String> publishers = resource
|
||||||
|
.getPublisher()
|
||||||
|
.stream()
|
||||||
|
.map(ScholixEntityId::getName)
|
||||||
|
.collect(Collectors.toList());
|
||||||
if (publishers.size() > 0)
|
if (publishers.size() > 0)
|
||||||
document.addField(prefix + "_publisher", publishers);
|
document.addField(prefix + "_publisher", publishers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (resource.getCollectedFrom() != null) {
|
if (resource.getCollectedFrom() != null) {
|
||||||
|
|
||||||
final List<String> collectedFrom = resource.getCollectedFrom().stream()
|
final List<String> collectedFrom = resource
|
||||||
|
.getCollectedFrom()
|
||||||
|
.stream()
|
||||||
.map(ScholixCollectedFrom::getProvider)
|
.map(ScholixCollectedFrom::getProvider)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(ScholixEntityId::getName)
|
.map(ScholixEntityId::getName)
|
||||||
|
@ -101,6 +114,4 @@ public class ScholixToSolr {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,17 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.provision.scholix.ScholixToSolr;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
@ -17,23 +28,13 @@ import org.dom4j.Node;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
import org.junit.jupiter.api.*;
|
import org.junit.jupiter.api.*;
|
||||||
|
|
||||||
import java.io.*;
|
import eu.dnetlib.dhp.oa.provision.scholix.ScholixToSolr;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||||
public class ScholixIndexingTest extends SolrTest {
|
public class ScholixIndexingTest extends SolrTest {
|
||||||
|
|
||||||
private static String LAYOUT_PATH = "/eu/dnetlib/dhp/oa/provision/SMF_layout.xml";
|
private static String LAYOUT_PATH = "/eu/dnetlib/dhp/oa/provision/SMF_layout.xml";
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This test verifies that the schema will be generated correctly
|
* This test verifies that the schema will be generated correctly
|
||||||
* by get the profile of the metadataFormat and generating solr schema.xml
|
* by get the profile of the metadataFormat and generating solr schema.xml
|
||||||
|
@ -52,7 +53,6 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
assertNotNull(scheme);
|
assertNotNull(scheme);
|
||||||
assertTrue(StringUtils.isNotBlank(scheme));
|
assertTrue(StringUtils.isNotBlank(scheme));
|
||||||
|
|
||||||
|
|
||||||
final Document fields = parseDocument(layout);
|
final Document fields = parseDocument(layout);
|
||||||
List<Node> params = fields.selectNodes("//FIELD");
|
List<Node> params = fields.selectNodes("//FIELD");
|
||||||
final List<String> exptectedFieldName = new ArrayList<>();
|
final List<String> exptectedFieldName = new ArrayList<>();
|
||||||
|
@ -63,7 +63,6 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
}
|
}
|
||||||
assertTrue(exptectedFieldName.size() > 0);
|
assertTrue(exptectedFieldName.size() > 0);
|
||||||
|
|
||||||
|
|
||||||
final Document parsedScheme = parseDocument(scheme);
|
final Document parsedScheme = parseDocument(scheme);
|
||||||
params = parsedScheme.selectNodes("//field");
|
params = parsedScheme.selectNodes("//field");
|
||||||
final List<String> createdFieldName = new ArrayList<>();
|
final List<String> createdFieldName = new ArrayList<>();
|
||||||
|
@ -94,9 +93,10 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
assertTrue(miniCluster.getZkClient().exists("/configs/" + collectionName, true));
|
assertTrue(miniCluster.getZkClient().exists("/configs/" + collectionName, true));
|
||||||
List<String> items = miniCluster.getZkClient().getChildren("/configs/" + collectionName, null, true);
|
List<String> items = miniCluster.getZkClient().getChildren("/configs/" + collectionName, null, true);
|
||||||
|
|
||||||
List<String> configurationFiles =
|
List<String> configurationFiles = Files
|
||||||
Files.list(
|
.list(
|
||||||
Paths.get(
|
Paths
|
||||||
|
.get(
|
||||||
Objects.requireNonNull(getClass().getResource(SolrUtil.CONF_FILE_BASE_PATH)).getPath()))
|
Objects.requireNonNull(getClass().getResource(SolrUtil.CONF_FILE_BASE_PATH)).getPath()))
|
||||||
.map(Path::getFileName)
|
.map(Path::getFileName)
|
||||||
.map(Path::toString)
|
.map(Path::toString)
|
||||||
|
@ -118,13 +118,12 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Order(3)
|
@Order(3)
|
||||||
public void testFeedingSolrDocument() throws Exception {
|
public void testFeedingSolrDocument() throws Exception {
|
||||||
|
|
||||||
|
InputStream gzipStream = new GZIPInputStream(
|
||||||
InputStream gzipStream = new GZIPInputStream(Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/scholix_records.gz")));
|
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/scholix_records.gz")));
|
||||||
Reader decoder = new InputStreamReader(gzipStream, StandardCharsets.UTF_8);
|
Reader decoder = new InputStreamReader(gzipStream, StandardCharsets.UTF_8);
|
||||||
BufferedReader buffered = new BufferedReader(decoder);
|
BufferedReader buffered = new BufferedReader(decoder);
|
||||||
String line = buffered.readLine();
|
String line = buffered.readLine();
|
||||||
|
@ -134,7 +133,6 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
int added = 0;
|
int added = 0;
|
||||||
while (line != null) {
|
while (line != null) {
|
||||||
|
|
||||||
|
|
||||||
final SolrInputDocument solrDocument = ScholixToSolr.toSolrDocument(line);
|
final SolrInputDocument solrDocument = ScholixToSolr.toSolrDocument(line);
|
||||||
|
|
||||||
client.add(solrDocument);
|
client.add(solrDocument);
|
||||||
|
@ -146,12 +144,9 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
|
|
||||||
log.debug(String.format("Feed %d documents", added));
|
log.debug(String.format("Feed %d documents", added));
|
||||||
|
|
||||||
|
|
||||||
final SolrDocumentList documents = executeQuery("*:*");
|
final SolrDocumentList documents = executeQuery("*:*");
|
||||||
assertEquals(added, documents.getNumFound());
|
assertEquals(added, documents.getNumFound());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
documents.stream().map(s -> s.getFirstValue("source_pid").toString()).forEach(System.out::println);
|
documents.stream().map(s -> s.getFirstValue("source_pid").toString()).forEach(System.out::println);
|
||||||
|
|
||||||
SolrDocumentList source_pids = executeQuery("source_pid:\"10.15468/dl.u47azs\"");
|
SolrDocumentList source_pids = executeQuery("source_pid:\"10.15468/dl.u47azs\"");
|
||||||
|
@ -162,7 +157,6 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private SolrDocumentList executeQuery(final String query) throws SolrServerException, IOException {
|
private SolrDocumentList executeQuery(final String query) throws SolrServerException, IOException {
|
||||||
|
|
||||||
final Map<String, String> queryParamMap = new HashMap<>();
|
final Map<String, String> queryParamMap = new HashMap<>();
|
||||||
|
@ -187,6 +181,4 @@ public class ScholixIndexingTest extends SolrTest{
|
||||||
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(LAYOUT_PATH)));
|
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(LAYOUT_PATH)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.provision;
|
package eu.dnetlib.dhp.oa.provision;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
import org.apache.solr.client.solrj.response.SolrPingResponse;
|
||||||
import org.apache.solr.client.solrj.response.UpdateResponse;
|
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
|
|
||||||
class SolrAdminApplicationTest extends SolrTest {
|
class SolrAdminApplicationTest extends SolrTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -352,7 +352,9 @@
|
||||||
</goals>
|
</goals>
|
||||||
<configuration>
|
<configuration>
|
||||||
<tasks>
|
<tasks>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<property name="assembly-resources.loc" value="${maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path}" />
|
<property name="assembly-resources.loc" value="${maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path}" />
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<unjar src="${assembly-resources.loc}" dest="${project.build.directory}/assembly-resources" />
|
<unjar src="${assembly-resources.loc}" dest="${project.build.directory}/assembly-resources" />
|
||||||
</tasks>
|
</tasks>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
@ -427,9 +429,12 @@
|
||||||
<configuration>
|
<configuration>
|
||||||
<executable>ssh</executable>
|
<executable>ssh</executable>
|
||||||
<arguments>
|
<arguments>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
|
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
|
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
|
||||||
<argument>-o StrictHostKeyChecking=no</argument>
|
<argument>-o StrictHostKeyChecking=no</argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>rm -rf ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; mkdir -p ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/</argument>
|
<argument>rm -rf ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; mkdir -p ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/</argument>
|
||||||
</arguments>
|
</arguments>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
@ -443,9 +448,11 @@
|
||||||
<configuration>
|
<configuration>
|
||||||
<executable>scp</executable>
|
<executable>scp</executable>
|
||||||
<arguments>
|
<arguments>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>-P ${dhp.hadoop.frontend.port.ssh}</argument>
|
<argument>-P ${dhp.hadoop.frontend.port.ssh}</argument>
|
||||||
<argument>-o StrictHostKeyChecking=no</argument>
|
<argument>-o StrictHostKeyChecking=no</argument>
|
||||||
<argument>target/${oozie.package.file.name}.tar.gz</argument>
|
<argument>target/${oozie.package.file.name}.tar.gz</argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}:${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/${oozie.package.file.name}.tar.gz</argument>
|
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}:${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/${oozie.package.file.name}.tar.gz</argument>
|
||||||
</arguments>
|
</arguments>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
@ -460,11 +467,15 @@
|
||||||
<executable>ssh</executable>
|
<executable>ssh</executable>
|
||||||
<!-- <outputFile>target/redirected_upload.log</outputFile> -->
|
<!-- <outputFile>target/redirected_upload.log</outputFile> -->
|
||||||
<arguments>
|
<arguments>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
|
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
|
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
|
||||||
<argument>-o StrictHostKeyChecking=no</argument>
|
<argument>-o StrictHostKeyChecking=no</argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
|
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
|
||||||
<argument>tar -zxf oozie-package.tar.gz; </argument>
|
<argument>tar -zxf oozie-package.tar.gz; </argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>rm ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/oozie-package.tar.gz; </argument>
|
<argument>rm ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/oozie-package.tar.gz; </argument>
|
||||||
<argument>./upload_workflow.sh</argument>
|
<argument>./upload_workflow.sh</argument>
|
||||||
</arguments>
|
</arguments>
|
||||||
|
@ -495,9 +506,12 @@
|
||||||
<!-- this file will be used by test verification profile reading job identifier -->
|
<!-- this file will be used by test verification profile reading job identifier -->
|
||||||
<outputFile>${oozie.execution.log.file.location}</outputFile>
|
<outputFile>${oozie.execution.log.file.location}</outputFile>
|
||||||
<arguments>
|
<arguments>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
|
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
|
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
|
||||||
<argument>-o StrictHostKeyChecking=no</argument>
|
<argument>-o StrictHostKeyChecking=no</argument>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
|
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
|
||||||
<argument>./run_workflow.sh</argument>
|
<argument>./run_workflow.sh</argument>
|
||||||
</arguments>
|
</arguments>
|
||||||
|
@ -512,6 +526,7 @@
|
||||||
<configuration>
|
<configuration>
|
||||||
<executable>cat</executable>
|
<executable>cat</executable>
|
||||||
<arguments>
|
<arguments>
|
||||||
|
<!--suppress UnresolvedMavenProperty -->
|
||||||
<argument>${oozie.execution.log.file.location}</argument>
|
<argument>${oozie.execution.log.file.location}</argument>
|
||||||
</arguments>
|
</arguments>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
Loading…
Reference in New Issue