Compare commits

...

7 Commits

Author SHA1 Message Date
Sandro La Bruzzo ffa8cdf981 fixed error on loading files on solr, in cluster is not possible to iterate files inside jar 2022-10-18 10:45:40 +02:00
Sandro La Bruzzo 818a936468 Merge remote-tracking branch 'origin/beta' into scholix_to_solr 2022-10-11 10:58:53 +02:00
Sandro La Bruzzo 4b8739e45b - Implemented oozie workflows and Java Classes to feed into solr index 2022-10-11 10:58:17 +02:00
Sandro La Bruzzo 7784b3d9c4 Merge remote-tracking branch 'origin/beta' into scholix_to_solr 2022-10-06 09:25:58 +02:00
Sandro La Bruzzo 6d5cda1a03 code refactor 2022-10-06 09:12:14 +02:00
Sandro La Bruzzo bf6c8ccc79 - Implemented Mapping from Scholix to Solr dataModel
- Moved date normalize cleaning from Saxon Function to GraphCleaningFunctions
- added Scholix records to test feeding
2022-10-06 08:49:20 +02:00
Sandro La Bruzzo 56f880c89d Added functionality to create index collection inside dhp-graph provision 2022-10-03 15:53:03 +02:00
27 changed files with 3179 additions and 33 deletions

View File

@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
@ -36,6 +38,14 @@ public class GraphCleaningFunctions extends CleaningFunctions {
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
public static final String BLANK = "";
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
private static final String[] normalizeDateFormats = {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
public static <T extends Oaf> T fixVocabularyNames(T value) {
if (value instanceof Datasource) {
// nothing to clean here
@ -459,6 +469,20 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return Optional.ofNullable(cleanDate(date));
}
public static String normalizeDate(String s) {
final String date = StringUtils.isNotBlank(s) ? s.trim() : BLANK;
for (String format : normalizeDateFormats) {
try {
Date parse = new SimpleDateFormat(format).parse(date);
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
return res;
} catch (ParseException e) {
}
}
return BLANK;
}
public static String cleanDate(final String inputDate) {
if (StringUtils.isBlank(inputDate)) {

View File

@ -7,6 +7,7 @@ import java.util.Date;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.trans.XPathException;
@ -14,15 +15,6 @@ import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue;
public class NormalizeDate extends AbstractExtensionFunction {
private static final String[] normalizeDateFormats = {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
private static final String normalizeOutFormat = "yyyy-MM-dd'T'hh:mm:ss'Z'";
public static final String BLANK = "";
@Override
public String getName() {
return "normalizeDate";
@ -31,10 +23,10 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null || arguments.length == 0) {
return new StringValue(BLANK);
return new StringValue(GraphCleaningFunctions.BLANK);
}
String s = arguments[0].head().getStringValue();
return new StringValue(_normalizeDate(s));
return new StringValue(GraphCleaningFunctions.normalizeDate(s));
}
@Override
@ -58,18 +50,4 @@ public class NormalizeDate extends AbstractExtensionFunction {
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
return SequenceType.SINGLE_STRING;
}
private String _normalizeDate(String s) {
final String date = StringUtils.isNotBlank(s) ? s.trim() : BLANK;
for (String format : normalizeDateFormats) {
try {
Date parse = new SimpleDateFormat(format).parse(date);
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
return res;
} catch (ParseException e) {
}
}
return BLANK;
}
}

View File

@ -142,6 +142,21 @@ object DataciteToOAFTransformation {
}
}
/***
* Use the vocabulary dnet:publication_resource to find a synonym to one of these terms and get the instance.type.
* Using the dnet:result_typologies vocabulary, we look up the instance.type synonym
* to generate one of the following main entities:
* - publication
* - dataset
* - software
* otherresearchproduct
* @param resourceType
* @param resourceTypeGeneral
* @param schemaOrg
* @param vocabularies
* @return
*/
def getTypeQualifier(
resourceType: String,
resourceTypeGeneral: String,

View File

@ -45,6 +45,10 @@
</build>
<dependencies>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>stringtemplate</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@ -23,7 +23,7 @@ public class SolrAdminApplication implements Closeable {
private static final Logger log = LoggerFactory.getLogger(SolrAdminApplication.class);
enum Action {
DELETE_BY_QUERY, COMMIT
DELETE_BY_QUERY, COMMIT, CREATE
}
private final CloudSolrClient solrClient;
@ -56,6 +56,8 @@ public class SolrAdminApplication implements Closeable {
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
final String fields = isLookup.getLayoutSource(format);
final String zkHost = isLookup.getZkHost();
log.info("zkHost: {}", zkHost);
@ -63,7 +65,7 @@ public class SolrAdminApplication implements Closeable {
log.info("collection: {}", collection);
try (SolrAdminApplication app = new SolrAdminApplication(zkHost)) {
app.execute(action, collection, query, commit);
app.execute(action, collection, query, commit, fields);
}
}
@ -73,10 +75,10 @@ public class SolrAdminApplication implements Closeable {
}
public SolrResponse commit(String collection) throws IOException, SolrServerException {
return execute(Action.COMMIT, collection, null, true);
return execute(Action.COMMIT, collection, null, true, null);
}
public SolrResponse execute(Action action, String collection, String query, boolean commit)
public SolrResponse execute(Action action, String collection, String query, boolean commit, final String fields)
throws IOException, SolrServerException {
switch (action) {
@ -88,6 +90,12 @@ public class SolrAdminApplication implements Closeable {
return rsp;
case COMMIT:
return solrClient.commit(collection);
case CREATE:
SolrUtil
.uploadZookeperConfig(this.solrClient.getZkStateReader().getZkClient(), collection, true, fields);
SolrUtil.createCollection(this.solrClient, collection, 48, 1, 12, collection);
return null;
default:
throw new IllegalArgumentException("action not managed: " + action);
}

View File

@ -0,0 +1,245 @@
package eu.dnetlib.dhp.oa.provision;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.dom4j.Document;
import org.dom4j.io.DocumentResult;
import org.dom4j.io.DocumentSource;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.stringtemplate.v4.ST;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.type.MapType;
import com.fasterxml.jackson.databind.type.TypeFactory;
public class SolrUtil {
/**
* The log.
*/
private static final Logger log = LoggerFactory.getLogger(SolrUtil.class);
/**
* The Constant CONFIGS_PATH.
*/
private static final String CONFIGS_PATH = "/configs";
private static final char DELIMITER = '$';
public static final String CONF_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/";
// public static final String CONF_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/";
public static final String LIST_FILE_BASE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/files/file_list";
private static final String SCHEMA_TEMPLATE_PATH = "/eu/dnetlib/dhp/oa/provision/conf/schemaTemplate.xslt";
private static String createURLRequest = "http://%s:%s/solr/admin/collections?action=CREATE&name=%s&numShards=%s&replicationFactor=%s&maxShardsPerNode=%s&collection.configName=%s";
private static String generateCreateIndexRequest(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String collectionConfigName,
final String maxShardsPerNode) {
return String
.format(
createURLRequest, host, port, collectionName, numShard, replicationFactor, maxShardsPerNode,
collectionConfigName);
}
public static boolean createSolrIndex(final String host,
final String port,
final String collectionName,
final String numShard,
final String replicationFactor,
final String maxShardsPerNode,
final String collectionConfigName) throws Exception {
final String uri = generateCreateIndexRequest(
host, port, collectionName, numShard, replicationFactor, maxShardsPerNode, collectionConfigName);
URL url = new URL(uri);
System.out.println(uri);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("GET");
int status = connection.getResponseCode();
System.out.println("status = " + status);
BufferedReader in = new BufferedReader(
new InputStreamReader(connection.getInputStream()));
String inputLine;
StringBuffer content = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
content.append(inputLine);
}
in.close();
log.debug("content = " + content);
return true;
}
public static void uploadZookeperConfig(final SolrZkClient zkClient,
final String coreName,
final boolean overwrite,
final String layout) {
final String basepath = CONFIGS_PATH + "/" + coreName;
log.info("uploading solr configuration to ZK for index collection: " + coreName);
try {
if (overwrite && zkClient.getSolrZooKeeper().exists(basepath, false) != null) {
log.info("cleanup ZK configuration: " + coreName);
for (String child : zkClient.getSolrZooKeeper().getChildren(basepath, false)) {
final String path = basepath + "/" + child;
log.debug("cleanup ZK file: " + path);
zkClient.delete(path, -1, true);
}
zkClient.delete(basepath, -1, true);
}
if (!zkClient.exists(basepath, true)) {
log.info("upload ZK configuration: " + coreName);
zkClient.makePath(basepath, true);
uploadConfiguration(zkClient, basepath, buildConfiguration(layout));
}
log.info("upload ZK configuration complete");
} catch (Exception e) {
throw new RuntimeException("unable to upload solr configuration", e);
}
}
private static void uploadConfiguration(final SolrZkClient zkClient, final String basePath,
final Map<String, byte[]> resources) throws KeeperException,
InterruptedException, IOException {
if (!zkClient.exists(basePath, true)) {
zkClient.makePath(basePath, true);
}
for (final Map.Entry<String, byte[]> e : resources.entrySet()) {
String path = basePath + "/" + e.getKey();
log.debug("upload ZK configuration: " + path);
zkClient.create(path, e.getValue(), CreateMode.PERSISTENT, true);
}
}
private static String loadFileInClassPath(final String aPath) {
System.out.println("LOAD FILE FROM PATH: " + aPath);
try {
return IOUtils
.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(aPath)), Charset.defaultCharset());
} catch (IOException e) {
return null;
}
}
public static Map<String, String> getServiceProperties() throws IOException {
final String properties = loadFileInClassPath(CONF_BASE_PATH + "service_properties.json");
final ObjectMapper mapper = new ObjectMapper();
TypeFactory typeFactory = mapper.getTypeFactory();
MapType mapType = typeFactory.constructMapType(HashMap.class, String.class, String.class);
return mapper.readValue(properties, mapType);
}
public static String getConfig() throws Exception {
final Map<String, String> p = getServiceProperties();
final String st = loadFileInClassPath(CONF_BASE_PATH + "solrconfig.xml.st");
final ST solrConfig = new ST(st, DELIMITER, DELIMITER);
p.forEach(solrConfig::add);
return solrConfig.render();
}
public static NamedList<Object> createCollection(CloudSolrClient client, String name, int numShards,
int replicationFactor, int maxShardsPerNode, String configName) throws SolrServerException, IOException {
ModifiableSolrParams modParams = new ModifiableSolrParams();
modParams.set(CoreAdminParams.ACTION, CollectionParams.CollectionAction.CREATE.name());
modParams.set("name", name);
modParams.set("numShards", numShards);
modParams.set("replicationFactor", replicationFactor);
modParams.set("collection.configName", configName);
modParams.set("maxShardsPerNode", maxShardsPerNode);
QueryRequest request = new QueryRequest(modParams);
request.setPath("/admin/collections");
return client.request(request);
}
private static Map<String, byte[]> buildConfiguration(final String layout)
throws Exception {
Map<String, byte[]> res = new HashMap<>();
try {
log.debug("adding schema.xml to the resource map");
res.put("schema.xml", getSchemaXML(layout).getBytes());
res.put("solrconfig.xml", getConfig().getBytes());
log.debug("adding solrconfig.xml to the resource map");
String data = IOUtils
.toString(Objects.requireNonNull(SolrUtil.class.getResourceAsStream(LIST_FILE_BASE_PATH)));
Arrays.stream(data.split("\n")).forEach(s -> {
final String name = s.replace(CONF_BASE_PATH + "files/", "");
res
.put(
name,
Objects.requireNonNull(loadFileInClassPath(s)).getBytes(StandardCharsets.UTF_8));
});
return res;
} catch (Throwable e) {
throw new Exception("failed to build configuration", e);
}
}
public static String getSchemaXML(final String layout) throws Exception {
final Document fields = new SAXReader().read(new ByteArrayInputStream(layout.getBytes(StandardCharsets.UTF_8)));
Transformer transformer = TransformerFactory
.newInstance()
.newTransformer(
new DocumentSource(new SAXReader().read(SolrUtil.class.getResourceAsStream(SCHEMA_TEMPLATE_PATH))));
transformer.setParameter("textFieldType", "text_common");
final DocumentResult result = new DocumentResult();
transformer.transform(new DocumentSource(fields), result);
String xml = result.getDocument().asXML();
log.debug("new index schema:\n" + xml);
return xml;
}
}

View File

@ -0,0 +1,121 @@
package eu.dnetlib.dhp.oa.provision.scholix;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.solr.common.SolrInputDocument;
import org.apache.spark.api.java.function.MapFunction;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import eu.dnetlib.dhp.schema.sx.scholix.*;
public class ScholixToSolr implements MapFunction<String, SolrInputDocument> {
final static ObjectMapper MAPPER = new ObjectMapper();
public static SerializableSolrInputDocument toSolrDocument(final String json) {
try {
final Scholix input = MAPPER.readValue(json, Scholix.class);
final SerializableSolrInputDocument output = new SerializableSolrInputDocument();
fillEntityField(output, input.getSource(), "source");
fillEntityField(output, input.getTarget(), "target");
final String cleanDate = GraphCleaningFunctions.cleanDate(input.getPublicationDate());
if (cleanDate != null)
output.addField("publication_date", GraphCleaningFunctions.normalizeDate(cleanDate));
if (input.getRelationship() != null && input.getRelationship().getName() != null)
output.addField("relation_name", input.getRelationship().getName());
else
return null;
if (input.getRelationship() != null && input.getRelationship().getInverse() != null)
output.addField("relation_inverse", input.getRelationship().getInverse());
if (input.getLinkprovider() != null) {
final List<String> linkProviders = input
.getLinkprovider()
.stream()
.map(ScholixEntityId::getName)
.filter(Objects::nonNull)
.collect(Collectors.toList());
output.addField("link_provider", linkProviders);
}
if (input.getPublisher() != null) {
final List<String> publishers = input
.getPublisher()
.stream()
.map(ScholixEntityId::getName)
.filter(Objects::nonNull)
.collect(Collectors.toList());
output.addField("publisher_name", publishers);
}
output.addField("__indexrecordidentifier", input.getIdentifier());
output.addField("__result", json);
return output;
} catch (IOException e) {
throw new RuntimeException("Error on convert Scholix");
}
}
private static void fillEntityField(final SerializableSolrInputDocument document, final ScholixResource resource,
final String prefix) {
document.addField(prefix + "_identifier", resource.getDnetIdentifier());
document.addField(prefix + "_type", resource.getObjectType());
document.addField(prefix + "_publication_date", resource.getPublicationDate());
document.addField(prefix + "_subtype", resource.getObjectSubType());
List<String> resourcePIDs = resource
.getIdentifier()
.stream()
.map(ScholixIdentifier::getIdentifier)
.collect(Collectors.toList());
document.addField(prefix + "_pid", resourcePIDs);
List<String> resourceSchemas = resource
.getIdentifier()
.stream()
.map(ScholixIdentifier::getSchema)
.collect(Collectors.toList());
document.addField(prefix + "_schema", resourceSchemas);
if (resource.getPublisher() != null) {
final List<String> publishers = resource
.getPublisher()
.stream()
.map(ScholixEntityId::getName)
.collect(Collectors.toList());
if (publishers.size() > 0)
document.addField(prefix + "_publisher", publishers);
}
if (resource.getCollectedFrom() != null) {
final List<String> collectedFrom = resource
.getCollectedFrom()
.stream()
.map(ScholixCollectedFrom::getProvider)
.filter(Objects::nonNull)
.map(ScholixEntityId::getName)
.collect(Collectors.toList());
if (collectedFrom.size() > 0)
document.addField(prefix + "_collected_from", collectedFrom);
}
}
@Override
public SerializableSolrInputDocument call(String s) throws Exception {
return toSolrDocument(s);
}
}

View File

@ -0,0 +1,102 @@
package eu.dnetlib.dhp.sx.provision;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils;
import org.apache.solr.common.SolrInputDocument;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.lucidworks.spark.util.SolrSupport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.provision.ProvisionConstants;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.scholix.ScholixToSolr;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
public class SparkIndexCollectionOnSOLR {
private static final Integer DEFAULT_BATCH_SIZE = 1000;
// LOGGER initialized
private static final Logger log = LoggerFactory.getLogger(SparkIndexCollectionOnSOLR.class);
public static void main(String[] args) throws IOException, ParseException {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
Objects
.requireNonNull(
SparkIndexCollectionOnSOLR.class
.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/index_solr_parameters.json"))));
parser.parseArgument(args);
final String cluster = parser.get("cluster");
log.info("Cluster is {}", cluster);
final String format = parser.get("format");
log.info("Index format name is {}", format);
final String isLookupUrl = parser.get("isURL");
log.info("isURL is {}", isLookupUrl);
final String inputPath = parser.get("inputPath");
log.info("inputPath: {}", inputPath);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
final Integer batchSize = Optional
.ofNullable(parser.get("batchSize"))
.map(Integer::valueOf)
.orElse(DEFAULT_BATCH_SIZE);
log.info("batchSize: {}", batchSize);
final SparkConf conf = new SparkConf();
conf.registerKryoClasses(new Class[] {
SerializableSolrInputDocument.class
});
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
final ISLookupClient isLookup = new ISLookupClient(ISLookupClientFactory.getLookUpService(isLookupUrl));
final String zkHost = isLookup.getZkHost();
log.info("zkHost: {}", zkHost);
final String collection = ProvisionConstants.getCollectionName(format);
log.info("collection: {}", collection);
feedScholixToSOLRIndex(spark, inputPath, collection, batchSize, zkHost);
});
}
public static void feedScholixToSOLRIndex(final SparkSession spark, final String inputPath, final String collection,
Integer batchSize, final String zkHost) {
final JavaRDD<SolrInputDocument> docs = spark
.read()
.text(inputPath)
.as(Encoders.STRING())
.map(new ScholixToSolr(), Encoders.kryo(SolrInputDocument.class))
.toJavaRDD();
SolrSupport.indexDocs(zkHost, collection, batchSize, docs.rdd());
}
}

View File

@ -0,0 +1,67 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
<currencyConfig version="1.0">
<rates>
<!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
<rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
<rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
<rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
<rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
<rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
<rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
<rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
<rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
<rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
<rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
<rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
<rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
<rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
<rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
<rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
<rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
<rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
<rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
<rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
<rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
<rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
<rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
<rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
<rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
<rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
<rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
<rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
<rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
<rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
<rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
<rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
<rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
<rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
<rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
<rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
<rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
<rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
<!-- Cross-rates for some common currencies -->
<rate from="EUR" to="GBP" rate="0.869914" />
<rate from="EUR" to="NOK" rate="7.800095" />
<rate from="GBP" to="NOK" rate="8.966508" />
</rates>
</currencyConfig>

View File

@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
See http://wiki.apache.org/solr/QueryElevationComponent for more info
-->
<elevate>
<!-- Query elevation examples
<query text="foo bar">
<doc id="1" />
<doc id="2" />
<doc id="3" />
</query>
for use with techproducts example
<query text="ipod">
<doc id="MA147LL/A" /> put the actual ipod at the top
<doc id="IW-02" exclude="true" /> exclude this cable
</query>
-->
</elevate>

View File

@ -0,0 +1,6 @@
/eu/dnetlib/dhp/oa/provision/conf/files/currency.xml
/eu/dnetlib/dhp/oa/provision/conf/files/elevate.xml
/eu/dnetlib/dhp/oa/provision/conf/files/params.json
/eu/dnetlib/dhp/oa/provision/conf/files/protwords.txt
/eu/dnetlib/dhp/oa/provision/conf/files/stopwords.txt
/eu/dnetlib/dhp/oa/provision/conf/files/synonyms.txt

View File

@ -0,0 +1,20 @@
{"params":{
"query":{
"defType":"edismax",
"q.alt":"*:*",
"rows":"10",
"fl":"*,score",
"":{"v":0}
},
"facets":{
"facet":"on",
"facet.mincount": "1",
"":{"v":0}
},
"velocity":{
"wt": "velocity",
"v.template":"browse",
"v.layout": "layout",
"":{"v":0}
}
}}

View File

@ -0,0 +1,21 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# Use a protected word file to protect against the stemmer reducing two
# unrelated words to the same base word.
# Some non-words that normally won't be encountered,
# just to test that they won't be stemmed.
dontstems
zwhacky

View File

@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
s
such
t
that
the
their
then
there
these
they
this
to
was
will
with

View File

@ -0,0 +1,29 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
#some test synonym mappings unlikely to appear in real input text
aaafoo => aaabar
bbbfoo => bbbfoo bbbbar
cccfoo => cccbar cccbaz
fooaaa,baraaa,bazaaa
# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
#after us won't split it into two words.
# Synonym mappings can be used for spelling correction too
pixima => pixma

View File

@ -0,0 +1,549 @@
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="//FIELDS">
<xsl:param name="textFieldType" select="string('text_common')"/>
<xsl:variable name="smallcase" select="'abcdefghijklmnopqrstuvwxyz'"/>
<xsl:variable name="uppercase" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'"/>
<!--
D-Net index schema template
CHANGELOG
0.1 : first release
0.2 : added preserveOriginal="1" for text field type in the index analyzer and catenateWords="1" for the query analyzer
0.3 : changed language for SnowballPorterFilterFactory to language="German2" (index/query) in the text field type
0.4 : added solr.ASCIIFoldingFilterFactory filter (index/query) in the text field type
0.5 : added long_keyword field type, to be used for objIdentifiers
0.6 : added field types for spellchecking
0.7 : added parameter for text field type
0.8 : added field _version_, needed by Solr 4.0.0 for the transaction log
0.9 : added type: text_en_splitting
0.91 : added type: ngramtext
0.92 : added schema optimizations, removing unnecessary stored fields
0.93 : added attribute preserveOriginal="1" to fieldtype ngramtext (query analysis) to improve matches
0.94 : updated and simplified ngramtext fieldtype
0.95 : update to solr 4.4, removed attribute "compress" from field definition, ngramfield doesn't support NGramFilterFactory anymore
0.96 : update to solr 4.9
0.97 : introduced field type string_ci supporting case insensitivity.
1.0 : updated to solr 6.6.0
-->
<schema name="dnet" version="1.0">
<!-- Valid attributes for fields:
name: mandatory - the name for the field
type: mandatory - the name of a field type from the
fieldTypes section
indexed: true if this field should be indexed (searchable or sortable)
stored: true if this field should be retrievable
docValues: true if this field should have doc values. Doc values are
useful (required, if you are using *Point fields) for faceting,
grouping, sorting and function queries. Doc values will make the index
faster to load, more NRT-friendly and more memory-efficient.
They however come with some limitations: they are currently only
supported by StrField, UUIDField, all Trie*Fields and *PointFields,
and depending on the field type, they might require the field to be
single-valued, be required or have a default value (check the
documentation of the field type you're interested in for more information)
multiValued: true if this field may contain multiple values per document
omitNorms: (expert) set to true to omit the norms associated with
this field (this disables length normalization and index-time
boosting for the field, and saves some memory). Only full-text
fields or fields that need an index-time boost need norms.
Norms are omitted for primitive (non-analyzed) types by default.
termVectors: [false] set to true to store the term vector for a
given field.
When using MoreLikeThis, fields used for similarity should be
stored for best performance.
termPositions: Store position information with the term vector.
This will increase storage costs.
termOffsets: Store offset information with the term vector. This
will increase storage costs.
required: The field is required. It will throw an error if the
value does not exist
default: a value that should be used if no value is specified
when adding a document.
-->
<!-- field names should consist of alphanumeric or underscore characters only and
not start with a digit. This is not currently strictly enforced,
but other field names will not have first class support from all components
and back compatibility is not guaranteed. Names with both leading and
trailing underscores (e.g. _version_) are reserved.
-->
<xsl:for-each select="./FIELD">
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
<xsl:variable name="fieldtype">
<xsl:choose>
<xsl:when test="@type"><xsl:value-of select="@type"/></xsl:when>
<xsl:when test="@tokenizable='false'">string</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$textFieldType"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="isMultivalued">
<xsl:choose>
<xsl:when test="@multivalued='false'">false</xsl:when>
<xsl:otherwise>true</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="isStored">
<xsl:choose>
<xsl:when test="@stored='true'">true</xsl:when>
<xsl:otherwise>false</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<field name="{$fieldname}" type="{$fieldtype}" indexed="{@indexable}" stored="{normalize-space($isStored)}" multiValued="{normalize-space($isMultivalued)}"/>
</xsl:for-each>
<field name="__indexrecordidentifier" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="__deleted" type="boolean" indexed="true" stored="false" default="false" omitNorms="true" omitTermFreqAndPositions="true"/>
<field name="__dsid" type="string" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/>
<field name="__dsversion" type="pdate" indexed="true" stored="true" omitNorms="true" omitTermFreqAndPositions="true"/>
<field name="__result" type="string" indexed="false" stored="true" multiValued="false" docValues="false"/>
<field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false" />
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
<!-- field for ping -->
<field name="text" type="{$textFieldType}" indexed="false" stored="false"/>
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
<uniqueKey>__indexrecordidentifier</uniqueKey>
<xsl:for-each select="./FIELD[@copy = 'true']">
<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)"/>
<copyField source="{$fieldname}" dest="__all"/>
</xsl:for-each>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster searching.
<copyField source="sourceFieldName" dest="destinationFieldName"/>
-->
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real
behavior of the fieldType.
Class names starting with "solr" refer to java classes in a
standard package such as org.apache.solr.analysis
-->
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
It supports doc values but in that case the field needs to be
single-valued and either required or have a default value.
-->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
currently supported on types that are sorted internally as strings
and on numeric types.
This includes "string","boolean", "int", "float", "long", "date", "double",
including the "Trie" and "Point" variants.
- If sortMissingLast="true", then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order (asc or desc).
- If sortMissingFirst="true", then a sort on this field will cause documents
without the field to come before documents with the field,
regardless of the requested sort order.
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
then default lucene sorting will be used which places docs without the
field first in an ascending sort and last in a descending sort.
-->
<!--
Numeric field types that index values using KD-trees. *Point fields are faster and more efficient than Trie* fields both, at
search time and at index time, but some features are still not supported.
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
-->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
<!--
Default numeric field types. For faster range queries, consider *PointFields (pint/pfloat/plong/pdouble), or the
tint/tfloat/tlong/tdouble types.
-->
<fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
<fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
<fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
<fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
<!--
Numeric field types that index each value at various levels of precision
to accelerate range queries when the number of values between the range
endpoints is large. See the javadoc for NumericRangeQuery for internal
implementation details.
Smaller precisionStep values (specified in bits) will lead to more tokens
indexed per value, slightly larger index size, and faster range queries.
A precisionStep of 0 disables indexing at different precision levels.
Consider using pint/pfloat/plong/pdouble instead of Trie* fields if possible
-->
<fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
is a more restricted form of the canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime
The trailing "Z" designates UTC time and is mandatory.
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other components are mandatory.
Expressions can also be used to denote calculations that should be
performed relative to "NOW" to determine the value, ie...
NOW/HOUR
... Round to the start of the current hour
NOW-1DAY
... Exactly 1 day prior to now
NOW/DAY+6MONTHS+3DAYS
... 6 months and 3 days in the future from the start of
the current day
Consult the TrieDateField javadocs for more information.
-->
<!-- KD-tree versions of date fields -->
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
<fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>
<fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldType name="binary" class="solr.BinaryField"/>
<!-- The "RandomSortField" is not used to store or search any
data. You can declare fields of this type it in your schema
to generate pseudo-random orderings of your docs for sorting
or function purposes. The ordering is generated based on the field
name and the version of the index. As long as the index version
remains unchanged, and the same field name is reused,
the ordering of the docs will be consistent.
If you want different psuedo-random orderings of documents,
for the same version of the index, use a dynamicField and
change the field name in the request.
-->
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<!-- solr.TextField allows the specification of custom text analyzers
specified as a tokenizer and a list of token filters. Different
analyzers may be specified for indexing and querying.
The optional positionIncrementGap puts space between multiple fields of
this type on the same document, with the purpose of preventing false phrase
matching across fields.
For more info on customizing your analyzer chain, please see
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-->
<!-- One can also specify an existing Analyzer class that has a
default constructor via the class attribute on the analyzer element.
Example:
<fieldType name="text_greek" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
</fieldType>
-->
<!-- A text field that only splits on whitespace for exact matching of words -->
<!-- <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="ngramtext" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="25"/>
<filter class="solr.TrimFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="personName" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="personNamePrefix" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="30" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<!-- A general text field that has reasonable, generic
cross-language defaults: it tokenizes with StandardTokenizer,
removes stop words from case-insensitive "stopwords.txt"
(empty by default), and down cases. At query time only, it
also applies synonyms.
-->
<fieldType name="text_common" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/>
-->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<!-- A text field with defaults appropriate for English, plus
aggressive word-splitting and autophrase features enabled.
This field is just like text_en, except it adds
WordDelimiterGraphFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and
non-alphanumeric chars. This means certain compound word
cases will work, for example query "wi fi" will match
document "WiFi" or "wi-fi".
-->
<!-- <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> -->
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal.
-->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<!-- <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> -->
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Just like text_common except it reverses the characters of
each token, to enable more efficient leading wildcard queries.
-->
<!-- <dynamicField name="*_txt_rev" type="text_common_rev" indexed="true" stored="true"/> -->
<fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> -->
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
</analyzer>
</fieldType>
<fieldType name="string_ci" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!--
Example of using PathHierarchyTokenizerFactory at index time, so
queries for paths match documents at that path, or in descendent paths
-->
<!-- <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> -->
<fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>
<!--
Example of using PathHierarchyTokenizerFactory at query time, so
queries for paths match documents at that path, or in ancestor paths
-->
<!-- <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> -->
<fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
</fieldType>
<!-- since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright. -->
<fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />
<!-- This point type indexes the coordinates as separate fields (subFields)
If subFieldType is defined, it references a type, and a dynamic field
definition is created matching *___<typename>. Alternately, if
subFieldSuffix is defined, that is used to create the subFields.
Example: if subFieldType="double", then the coordinates would be
indexed in fields myloc_0___double,myloc_1___double.
Example: if subFieldSuffix="_d" then the coordinates would be indexed
in fields myloc_0_d,myloc_1_d
The subFields are an implementation detail of the fieldType, and end
users normally should not need to know about them.
-->
<!-- <dynamicField name="*_point" type="point" indexed="true" stored="true"/> -->
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
<!-- A specialized field for geospatial search filters and distance sorting. -->
<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/>
<!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
For more information about this and other Spatial fields new to Solr 4, see:
http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
-->
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
</schema>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,14 @@
{
"id":"solr",
"address":"localhost:9983",
"port":"8983",
"webContext":"solr",
"numShards":"4",
"replicationFactor":"1",
"maxShardsPerNode":"4",
"host":"localhost",
"luceneMatchVersion":"7.5.0",
"feedingShutdownTolerance":"30000",
"feedingBufferFlushThreshold":"1000",
"feedingSimulationMode":"false"
}

View File

@ -0,0 +1,23 @@
<FIELDS><!-- SOURCE FIELD -->
<FIELD indexable="true" name="source_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_type" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="false" name="source_publication_date" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_subType" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_pid" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_schema" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
<FIELD indexable="true" name="source_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/><!-- TARGET FIELD -->
<FIELD indexable="true" name="target_identifier" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_type" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_subType" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_pid" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_schema" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_publisher" tokenizable="true" stored="true" stat="false" xpath="None"/>
<FIELD indexable="true" name="target_collected_from" tokenizable="true" stored="true" stat="false" xpath="None"/>
<FIELD indexable="false" name="target_publication_date" stored="true" stat="false" tokenizable="false" value="None"/><!-- RELATION FIELD -->
<FIELD indexable="true" name="publicationDate" multivalued="false" stored="true" stat="false" type="pdate" value="None"/>
<FIELD indexable="true" name="relation_name" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="relation_inverse" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="publisher_name" tokenizable="ture" stored="true" stat="false" xpath="None"/>
<FIELD indexable="true" name="linkprovider" tokenizable="ture" stored="true" stat="false" xpath="None"/>
</FIELDS>

View File

@ -0,0 +1,32 @@
[
{
"paramName":"c",
"paramLongName":"cluster",
"paramDescription":"should be cluster1 or cluster2",
"paramRequired":true
},
{
"paramName":"is",
"paramLongName":"isURL",
"paramDescription":"the Information Service LookUp URL",
"paramRequired":true
},
{
"paramName":"ip",
"paramLongName":"inputPath",
"paramDescription":"the source input path",
"paramRequired":true
},
{
"paramName":"b",
"paramLongName":"batchSize",
"paramDescription":"the batch size param",
"paramRequired":false
},
{
"paramName":"f",
"paramLongName":"format",
"paramDescription":"index metadata format name",
"paramRequired":true
}
]

View File

@ -0,0 +1,14 @@
<configuration>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,113 @@
<workflow-app name="Index Scholexplorer Infospace" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>sourcePath</name>
<description>the sourcePath of the json RDDs</description>
</property>
<property>
<name>isLookupUrl</name>
<description>URL for the isLookup service</description>
</property>
<property>
<name>solrDeletionQuery</name>
<value>*:*</value>
<description>query used in the deleted by query operation</description>
</property>
<property>
<name>format</name>
<description>metadata format name (SMF)</description>
</property>
</parameters>
<start to="indexScholix"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="drop_solr_collection">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--format</arg><arg>${format}</arg>
<arg>--action</arg><arg>DELETE_BY_QUERY</arg>
<arg>--query</arg><arg>${solrDeletionQuery}</arg>
<arg>--commit</arg><arg>true</arg>
</java>
<ok to="create_solr_index"/>
<error to="Kill"/>
</action>
<action name="create_solr_index">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--format</arg><arg>${format}</arg>
<arg>--action</arg><arg>CREATE</arg>
</java>
<ok to="indexScholix"/>
<error to="Kill"/>
</action>
<action name="indexScholix">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Index summary</name>
<class>eu.dnetlib.dhp.sx.provision.SparkIndexCollectionOnSOLR</class>
<jar>dhp-graph-provision-${projectVersion}.jar</jar>
<spark-opts>
--conf spark.dynamicAllocation.enabled=true
--conf spark.shuffle.service.enabled=true
--executor-memory=${sparkExecutorMemory}
--conf spark.dynamicAllocation.maxExecutors="16"
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--cluster</arg><arg>yarn</arg>
<arg>--isURL</arg><arg>${isLookupUrl}</arg>
<arg>--inputPath</arg><arg>${sourcePath}</arg>
<arg>--format</arg><arg>${format}</arg>
</spark>
<ok to="commit_solr_collection"/>
<error to="Kill"/>
</action>
<action name="commit_solr_collection">
<java>
<configuration>
<property>
<name>oozie.launcher.mapreduce.user.classpath.first</name>
<value>true</value>
</property>
</configuration>
<main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--format</arg><arg>${format}</arg>
<arg>--action</arg><arg>COMMIT</arg>
</java>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,185 @@
package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.*;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.junit.jupiter.api.*;
import eu.dnetlib.dhp.oa.provision.scholix.ScholixToSolr;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class ScholixIndexingTest extends SolrTest {
private static String LAYOUT_PATH = "/eu/dnetlib/dhp/oa/provision/SMF_layout.xml";
/**
* This test verifies that the schema will be generated correctly
* by get the profile of the metadataFormat and generating solr schema.xml
* we expect that the fiedl in the metadataFormat are all in the field solr schema
* @throws Exception
*/
@Test
@Order(1)
void testSchemaCreation() throws Exception {
final String layout = loadSMFLayout();
assertNotNull(layout);
assertTrue(StringUtils.isNotBlank(layout));
final String scheme = SolrUtil.getSchemaXML(loadSMFLayout());
assertNotNull(scheme);
assertTrue(StringUtils.isNotBlank(scheme));
final Document fields = parseDocument(layout);
List<Node> params = fields.selectNodes("//FIELD");
final List<String> exptectedFieldName = new ArrayList<>();
for (Node param : params) {
Element element = (Element) param;
String name = element.attributeValue("name");
exptectedFieldName.add(name.toLowerCase());
}
assertTrue(exptectedFieldName.size() > 0);
final Document parsedScheme = parseDocument(scheme);
params = parsedScheme.selectNodes("//field");
final List<String> createdFieldName = new ArrayList<>();
for (Node param : params) {
Element element = (Element) param;
String name = element.attributeValue("name");
createdFieldName.add(name.toLowerCase());
}
assertTrue(createdFieldName.size() > 0);
exptectedFieldName.stream().map(createdFieldName::contains).forEach(Assertions::assertTrue);
}
/***
* Test the creation of the index works
* we test if all the files are uploaded into
* the zookeeper instance of SOLR under it's
* collection name
* @throws Exception
*/
@Test
@Order(2)
public void testCreateCollection() throws Exception {
final String collectionName = "SMF-index-scholix";
SolrUtil.uploadZookeperConfig(miniCluster.getZkClient(), collectionName, true, loadSMFLayout());
assertTrue(miniCluster.getZkClient().exists("/configs/" + collectionName, true));
List<String> items = miniCluster.getZkClient().getChildren("/configs/" + collectionName, null, true);
List<String> configurationFiles = Files
.list(
Paths
.get(
Objects.requireNonNull(getClass().getResource(SolrUtil.CONF_BASE_PATH + "files/")).getPath()))
.map(Path::getFileName)
.filter(p -> !p.getFileName().toString().equalsIgnoreCase("file_list"))
.map(Path::toString)
.collect(Collectors.toList());
configurationFiles.add("schema.xml");
configurationFiles.add("solrconfig.xml");
configurationFiles.forEach(s -> assertTrue(items.contains(s)));
SolrUtil.createCollection(miniCluster.getSolrClient(), "Scholix", 4, 1, 2, collectionName);
log.debug("Collection Created");
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", "*:*");
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
final QueryResponse response = miniCluster.getSolrClient().query("Scholix", queryParams);
final SolrDocumentList documents = response.getResults();
assertEquals(0, documents.getNumFound());
}
@Test
@Order(3)
public void testFeedingSolrDocument() throws Exception {
InputStream gzipStream = new GZIPInputStream(
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/provision/scholix_records.gz")));
Reader decoder = new InputStreamReader(gzipStream, StandardCharsets.UTF_8);
BufferedReader buffered = new BufferedReader(decoder);
String line = buffered.readLine();
final CloudSolrClient client = miniCluster.getSolrClient();
client.setDefaultCollection("Scholix");
int added = 0;
while (line != null) {
final SolrInputDocument solrDocument = ScholixToSolr.toSolrDocument(line);
client.add(solrDocument);
added++;
line = buffered.readLine();
}
client.commit();
log.debug(String.format("Feed %d documents", added));
final SolrDocumentList documents = executeQuery("*:*");
assertEquals(added, documents.getNumFound());
documents.stream().map(s -> s.getFirstValue("source_pid").toString()).forEach(System.out::println);
SolrDocumentList source_pids = executeQuery("source_pid:\"10.15468/dl.u47azs\"");
System.out.println("source_pid.getNumFound() = " + source_pids.getNumFound());
source_pids.stream().map(s -> s.getFieldValue("source_pid")).forEach(System.out::println);
}
private SolrDocumentList executeQuery(final String query) throws SolrServerException, IOException {
final Map<String, String> queryParamMap = new HashMap<>();
queryParamMap.put("q", query);
MapSolrParams queryParams = new MapSolrParams(queryParamMap);
final QueryResponse response = miniCluster.getSolrClient().query("Scholix", queryParams);
return response.getResults();
}
/***
* Utility for parsing XML
* @param xml
* @return Dom4J Document
* @throws DocumentException
*/
private Document parseDocument(final String xml) throws DocumentException {
return new SAXReader().read(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
}
private String loadSMFLayout() throws IOException {
return IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(LAYOUT_PATH)));
}
}

View File

@ -2,11 +2,9 @@
package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
class SolrAdminApplicationTest extends SolrTest {
@ -24,7 +22,7 @@ class SolrAdminApplicationTest extends SolrTest {
SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost());
UpdateResponse rsp = (UpdateResponse) admin
.execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false);
.execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false, null);
assertEquals(0, rsp.getStatus());
}
@ -38,5 +36,4 @@ class SolrAdminApplicationTest extends SolrTest {
assertEquals(0, rsp.getStatus());
}
}

View File

@ -0,0 +1,31 @@
<LAYOUT name="index">
<FIELDS>
<!-- SOURCE FIELD -->
<FIELD indexable="true" name="source_identifier" multivalued="false" stored="false" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_type" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="false" name="source_publication_date" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_subtype" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_pid" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_schema" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="source_publisher" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
<FIELD indexable="true" name="source_collected_from" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
<!-- TARGET FIELD -->
<FIELD indexable="true" name="target_identifier" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_type" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="false" name="target_publication_date" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_subtype" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_pid" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_schema" multivalued="true" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="target_publisher" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
<FIELD indexable="true" name="target_collected_from" multivalued="true" tokenizable="true" stored="true" stat="false" xpath="None"/>
<!-- RELATION FIELD -->
<FIELD indexable="true" name="publication_date" multivalued="false" stored="true" stat="false" type="date" value="None"/>
<FIELD indexable="true" name="relation_name" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="relation_inverse" multivalued="false" stored="true" stat="false" tokenizable="false" value="None"/>
<FIELD indexable="true" name="publisher_name" multivalued="true" tokenizable="ture" stored="true" stat="false" xpath="None"/>
<FIELD indexable="true" name="link_provider" multivalued="true" tokenizable="ture" stored="true" stat="false" xpath="None"/>
</FIELDS>
</LAYOUT>

View File

@ -352,7 +352,9 @@
</goals>
<configuration>
<tasks>
<!--suppress UnresolvedMavenProperty -->
<property name="assembly-resources.loc" value="${maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path}" />
<!--suppress UnresolvedMavenProperty -->
<unjar src="${assembly-resources.loc}" dest="${project.build.directory}/assembly-resources" />
</tasks>
</configuration>
@ -427,9 +429,12 @@
<configuration>
<executable>ssh</executable>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>rm -rf ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; mkdir -p ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/</argument>
</arguments>
</configuration>
@ -443,9 +448,11 @@
<configuration>
<executable>scp</executable>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>-P ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<argument>target/${oozie.package.file.name}.tar.gz</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}:${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/${oozie.package.file.name}.tar.gz</argument>
</arguments>
</configuration>
@ -460,11 +467,15 @@
<executable>ssh</executable>
<!-- <outputFile>target/redirected_upload.log</outputFile> -->
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument>tar -zxf oozie-package.tar.gz; </argument>
<!--suppress UnresolvedMavenProperty -->
<argument>rm ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/oozie-package.tar.gz; </argument>
<argument>./upload_workflow.sh</argument>
</arguments>
@ -495,9 +506,12 @@
<!-- this file will be used by test verification profile reading job identifier -->
<outputFile>${oozie.execution.log.file.location}</outputFile>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<!--suppress UnresolvedMavenProperty -->
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument>./run_workflow.sh</argument>
</arguments>
@ -512,6 +526,7 @@
<configuration>
<executable>cat</executable>
<arguments>
<!--suppress UnresolvedMavenProperty -->
<argument>${oozie.execution.log.file.location}</argument>
</arguments>
</configuration>