dedup test implementation & graph drawing tools
This commit is contained in:
parent
30aeed3803
commit
5a52aed8e1
|
@ -1,2 +1,2 @@
|
|||
# Mon May 03 16:05:14 CEST 2021
|
||||
# Mon Sep 13 14:51:29 CEST 2021
|
||||
projectPropertyKey=projectPropertyValue
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
entitiesPath = /tmp/prod_provision/graph/01_graph_raw/publication
|
||||
workingPath = /user/michele.debonis/erf_test/workingdir
|
||||
#entitiesPath = /tmp/prod_provision/graph/01_graph_raw/publication
|
||||
entitiesPath = /tmp/publications_test_dump
|
||||
workingPath = /user/michele.debonis/erf_test/workingdirtree
|
||||
dedupConfPath = /user/michele.debonis/erf_test/pubs.tree.conf.json
|
||||
numPartitions = 20
|
||||
useTree = true
|
||||
useTree = false
|
|
@ -132,6 +132,17 @@
|
|||
<groupId>com.jayway.jsonpath</groupId>
|
||||
<artifactId>json-path</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
|
|
@ -1,34 +1,22 @@
|
|||
package eu.dnetlib;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.hash.Hashing;
|
||||
import eu.dnetlib.graph.GraphProcessor;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.config.WfConfig;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.model.MapDocumentComparator;
|
||||
import eu.dnetlib.pace.tree.JsonListMatch;
|
||||
import eu.dnetlib.pace.tree.LevensteinTitle;
|
||||
import eu.dnetlib.pace.tree.SizeMatch;
|
||||
import eu.dnetlib.pace.tree.TitleVersionMatch;
|
||||
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
||||
import eu.dnetlib.pace.util.BlockProcessor;
|
||||
import eu.dnetlib.pace.util.BlockProcessorForTesting;
|
||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||
import eu.dnetlib.pace.util.Reporter;
|
||||
import eu.dnetlib.pace.utils.Utility;
|
||||
import eu.dnetlib.reporter.SparkReporter;
|
||||
import eu.dnetlib.support.Block;
|
||||
import eu.dnetlib.support.ConnectedComponent;
|
||||
import eu.dnetlib.support.Relation;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.api.java.function.PairFlatMapFunction;
|
||||
import org.apache.spark.api.java.function.PairFunction;
|
||||
import org.apache.spark.graphx.Edge;
|
||||
import org.apache.spark.rdd.RDD;
|
||||
|
@ -39,7 +27,6 @@ import org.apache.spark.sql.SparkSession;
|
|||
import org.apache.spark.util.LongAccumulator;
|
||||
import scala.Serializable;
|
||||
import scala.Tuple2;
|
||||
import scala.math.Ordering;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.*;
|
||||
|
@ -98,15 +85,15 @@ public class Deduper implements Serializable {
|
|||
}
|
||||
|
||||
public static JavaRDD<Relation> computeRelations(
|
||||
JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config) {
|
||||
JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config, boolean useTree) {
|
||||
Map<String, LongAccumulator> accumulators = Utility.constructAccumulator(config, context.sc());
|
||||
|
||||
return blocks
|
||||
.flatMapToPair(
|
||||
it -> {
|
||||
final SparkReporter reporter = new SparkReporter(accumulators);
|
||||
new BlockProcessor(config)
|
||||
.processSortedBlock(it._1(), it._2().getDocuments(), reporter);
|
||||
new BlockProcessorForTesting(config)
|
||||
.processSortedBlock(it._1(), it._2().getDocuments(), reporter, useTree);
|
||||
return reporter.getRelations().iterator();
|
||||
})
|
||||
.mapToPair(it -> new Tuple2<>(it._1() + it._2(), new Relation(it._1(), it._2(), "simRel")))
|
||||
|
@ -114,138 +101,7 @@ public class Deduper implements Serializable {
|
|||
.map(Tuple2::_2);
|
||||
}
|
||||
|
||||
public static Queue<MapDocument> prepareQueue(final Iterable<MapDocument> documents, DedupConfig config) {
|
||||
final Queue<MapDocument> queue = new PriorityQueue<>(100, new MapDocumentComparator(config.getWf().getOrderField()));
|
||||
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final int queueMaxSize = config.getWf().getQueueMaxSize();
|
||||
|
||||
documents.forEach(doc -> {
|
||||
if (queue.size() <= queueMaxSize) {
|
||||
final String id = doc.getIdentifier();
|
||||
|
||||
if (!seen.contains(id)) {
|
||||
seen.add(id);
|
||||
queue.add(doc);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
public static JavaRDD<Relation> computePublicationRelations(
|
||||
JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config) {
|
||||
|
||||
return blocks.
|
||||
flatMapToPair((PairFlatMapFunction<Tuple2<String, Block>, String, String>)
|
||||
it -> {
|
||||
List<Tuple2<String,String>> relations = new ArrayList<>();
|
||||
|
||||
if (it._2().getDocuments().size()>1) {
|
||||
|
||||
Queue<MapDocument> queue = prepareQueue(it._2().getDocuments(), config);
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
|
||||
final MapDocument pivot = queue.remove();
|
||||
final String idPivot = pivot.getIdentifier();
|
||||
|
||||
WfConfig wf = config.getWf();
|
||||
final Field fieldsPivot = pivot.values(wf.getOrderField());
|
||||
final String fieldPivot = (fieldsPivot == null) || fieldsPivot.isEmpty() ? "" : fieldsPivot.stringValue();
|
||||
|
||||
if (fieldPivot != null) {
|
||||
int i = 0;
|
||||
for (final MapDocument curr : queue) {
|
||||
final String idCurr = curr.getIdentifier();
|
||||
|
||||
if (config.getWf().getSkipList().contains(StringUtils.substringBetween(idCurr, "|", "::"))) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (i > wf.getSlidingWindowSize()) {
|
||||
break;
|
||||
}
|
||||
|
||||
final Field fieldsCurr = curr.values(wf.getOrderField());
|
||||
final String fieldCurr = (fieldsCurr == null) || fieldsCurr.isEmpty() ? null : fieldsCurr.stringValue();
|
||||
|
||||
if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
|
||||
|
||||
double score = 0.0;
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("jpath_value", "$.value");
|
||||
params.put("jpath_classid", "$.qualifier.classid");
|
||||
JsonListMatch jsonListMatch = new JsonListMatch(params);
|
||||
double result = jsonListMatch.compare(pivot.getFieldMap().get("pid"), curr.getFieldMap().get("pid"), config);
|
||||
if (result > 0.5) //if the result of the comparison is greater than the threshold
|
||||
score += 10.0; //high score because it should match when the first condition is satisfied
|
||||
else
|
||||
score += 0.0;
|
||||
|
||||
TitleVersionMatch titleVersionMatch = new TitleVersionMatch(params);
|
||||
double result1 = titleVersionMatch.compare(pivot.getFieldMap().get("title"), curr.getFieldMap().get("title"), config);
|
||||
SizeMatch sizeMatch = new SizeMatch(params);
|
||||
double result2 = sizeMatch.compare(pivot.getFieldMap().get("authors"), curr.getFieldMap().get("authors"), config);
|
||||
if ((result1 == 1.0 && result2 == 1.0) || (result1 == -1.0 && result2 == 1.0) || (result1 == 1.0 && result2 == -1.0) || (result1 == -1.0 && result2 == -1.0))
|
||||
score += 0.0;
|
||||
else
|
||||
score -= 1.0;
|
||||
|
||||
LevensteinTitle levensteinTitle = new LevensteinTitle(params);
|
||||
double result3 = levensteinTitle.compare(pivot.getFieldMap().get("title"), curr.getFieldMap().get("title"), config);
|
||||
score += result3;
|
||||
|
||||
if (score >= 0.99) {
|
||||
relations.add(new Tuple2<>(idPivot, idCurr));
|
||||
relations.add(new Tuple2<>(idCurr, idPivot));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return relations.iterator();
|
||||
})
|
||||
.mapToPair(it -> new Tuple2<>(it._1() + it._2(), new Relation(it._1(), it._2(), "simRel")))
|
||||
.reduceByKey((a,b) -> a)
|
||||
.map(Tuple2::_2);
|
||||
}
|
||||
|
||||
public static boolean comparePublications(MapDocument a, MapDocument b, DedupConfig config){
|
||||
|
||||
double score = 0.0;
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("jpath_value", "$.value");
|
||||
params.put("jpath_classid", "$.qualifier.classid");
|
||||
JsonListMatch jsonListMatch = new JsonListMatch(params);
|
||||
double result = jsonListMatch.compare(a.getFieldMap().get("pid"), b.getFieldMap().get("pid"), config);
|
||||
if (result > 0.5) //if the result of the comparison is greater than the threshold
|
||||
score += 1.0;
|
||||
else
|
||||
score += 0.0;
|
||||
|
||||
TitleVersionMatch titleVersionMatch = new TitleVersionMatch(params);
|
||||
double result1 = titleVersionMatch.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config);
|
||||
SizeMatch sizeMatch = new SizeMatch(params);
|
||||
double result2 = sizeMatch.compare(a.getFieldMap().get("authors"), b.getFieldMap().get("authors"), config);
|
||||
if ((result1 == 1.0 && result2 == 1.0) || (result1 == -1.0 && result2 == 1.0) || (result1 == 1.0 && result2 == -1.0) || (result1 == -1.0 && result2 == -1.0))
|
||||
score += 0.0;
|
||||
else
|
||||
score -= 1.0;
|
||||
|
||||
LevensteinTitle levensteinTitle = new LevensteinTitle(params);
|
||||
double result3 = levensteinTitle.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config);
|
||||
score += result3;
|
||||
|
||||
return score >= 0.99;
|
||||
|
||||
}
|
||||
|
||||
public static void createSimRels(DedupConfig dedupConf, SparkSession spark, String entitiesPath, String simRelsPath){
|
||||
public static void createSimRels(DedupConfig dedupConf, SparkSession spark, String entitiesPath, String simRelsPath, boolean useTree){
|
||||
|
||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
|
@ -261,7 +117,7 @@ public class Deduper implements Serializable {
|
|||
JavaPairRDD<String, Block> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConf);
|
||||
|
||||
// create relations by comparing only elements in the same group
|
||||
JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConf);
|
||||
JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConf, useTree);
|
||||
|
||||
// save the simrel in the workingdir
|
||||
spark
|
||||
|
|
|
@ -61,7 +61,7 @@ public class SparkCreateDedupEntity extends AbstractSparkJob {
|
|||
log.info("dedupConfPath: '{}'", dedupConfPath);
|
||||
log.info("numPartitions: '{}'", numPartitions);
|
||||
|
||||
DedupConfig dedupConf = DedupConfig.load(readResource("/jobs/parameters/createDedupEntity_parameters.json", SparkCreateDedupEntity.class));
|
||||
DedupConfig dedupConf = DedupConfig.load(readFileFromHDFS(dedupConfPath));
|
||||
|
||||
JavaPairRDD<String, String> entities = spark
|
||||
.read()
|
||||
|
|
|
@ -82,12 +82,7 @@ public class SparkCreateSimRels extends AbstractSparkJob {
|
|||
JavaPairRDD<String, Block> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConfig);
|
||||
|
||||
// create relations by comparing only elements in the same group
|
||||
JavaRDD<Relation> relations;
|
||||
|
||||
if (useTree)
|
||||
relations = Deduper.computeRelations(sc, blocks, dedupConfig);
|
||||
else
|
||||
relations = Deduper.computePublicationRelations(sc, blocks, dedupConfig);
|
||||
JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConfig, useTree);
|
||||
|
||||
// save the simrel in the workingdir
|
||||
spark
|
||||
|
|
|
@ -131,7 +131,7 @@
|
|||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
--conf spark.dynamicAllocation.enabled=false
|
||||
--conf spark.dynamicAllocation.enabled=true
|
||||
</spark-opts>
|
||||
<arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,111 @@
|
|||
package eu.dnetlib.pace;
|
||||
|
||||
import java.util.*;
|
||||
import java.awt.*;
|
||||
import java.awt.event.*;
|
||||
import javax.swing.*;
|
||||
|
||||
public class GraphDraw extends JFrame {
|
||||
int width;
|
||||
int height;
|
||||
|
||||
ArrayList<Node> nodes;
|
||||
ArrayList<edge> edges;
|
||||
|
||||
public GraphDraw() { //Constructor
|
||||
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
||||
nodes = new ArrayList<Node>();
|
||||
edges = new ArrayList<edge>();
|
||||
width = 15;
|
||||
height = 15;
|
||||
}
|
||||
|
||||
public GraphDraw(String name) { //Construct with label
|
||||
this.setTitle(name);
|
||||
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
||||
nodes = new ArrayList<Node>();
|
||||
edges = new ArrayList<edge>();
|
||||
width = 15;
|
||||
height = 15;
|
||||
}
|
||||
|
||||
class Node {
|
||||
int x, y;
|
||||
String name;
|
||||
Color color;
|
||||
|
||||
public Node(String myName, int myX, int myY) {
|
||||
x = myX;
|
||||
y = myY;
|
||||
name = myName;
|
||||
color = Color.white;
|
||||
}
|
||||
|
||||
public Node(String myName, int myX, int myY, Color myColor) {
|
||||
x = myX;
|
||||
y = myY;
|
||||
name = myName;
|
||||
color = myColor;
|
||||
}
|
||||
}
|
||||
|
||||
class edge {
|
||||
int i,j;
|
||||
|
||||
public edge(int ii, int jj) {
|
||||
i = ii;
|
||||
j = jj;
|
||||
}
|
||||
}
|
||||
|
||||
public void addNode(String name, int x, int y) {
|
||||
//add a node at pixel (x,y)
|
||||
nodes.add(new Node(name,x,y));
|
||||
this.repaint();
|
||||
}
|
||||
public void addEdge(int i, int j) {
|
||||
//add an edge between nodes i and j
|
||||
edges.add(new edge(i,j));
|
||||
this.repaint();
|
||||
}
|
||||
|
||||
public void paint(Graphics g) { // draw the nodes and edges
|
||||
FontMetrics f = g.getFontMetrics();
|
||||
int nodeHeight = Math.max(height, f.getHeight());
|
||||
g.setColor(Color.black);
|
||||
for (edge e : edges) {
|
||||
g.drawLine(nodes.get(e.i).x, nodes.get(e.i).y,
|
||||
nodes.get(e.j).x, nodes.get(e.j).y);
|
||||
}
|
||||
|
||||
for (Node n : nodes) {
|
||||
n.name = n.name.substring(0, 20);
|
||||
int nodeWidth = Math.max(width, f.stringWidth(n.name)+width/2);
|
||||
g.setColor(n.color);
|
||||
g.fillRoundRect(n.x-nodeWidth/2, n.y-nodeHeight/2,
|
||||
nodeWidth, nodeHeight, 2, 2);
|
||||
g.setColor(Color.black);
|
||||
g.drawRoundRect(n.x-nodeWidth/2, n.y-nodeHeight/2, nodeWidth, nodeHeight, 2, 2);
|
||||
|
||||
g.drawString(n.name, n.x-f.stringWidth(n.name)/2,
|
||||
n.y+f.getHeight()/5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class testGraphDraw {
|
||||
//Here is some example syntax for the GraphDraw class
|
||||
public static void main(String[] args) {
|
||||
GraphDraw frame = new GraphDraw("Test Window");
|
||||
|
||||
frame.setSize(400,300);
|
||||
|
||||
frame.setVisible(true);
|
||||
|
||||
frame.addNode("a", 50,50);
|
||||
frame.addNode("b", 100,100);
|
||||
frame.addNode("longNode", 200,200);
|
||||
frame.addEdge(0,1);
|
||||
frame.addEdge(0,2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
{
|
||||
"wf" : {
|
||||
"threshold" : "0.99",
|
||||
"dedupRun" : "001",
|
||||
"entityType" : "datasource",
|
||||
"orderField" : "name",
|
||||
"queueMaxSize" : "2000",
|
||||
"groupMaxSize" : "50",
|
||||
"slidingWindowSize" : "200",
|
||||
"idPath":"$.id",
|
||||
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
|
||||
"includeChildren" : "true",
|
||||
"maxIterations": "20"
|
||||
},
|
||||
"pace" : {
|
||||
"clustering" : [
|
||||
{ "name" : "sortedngrampairs", "fields" : [ "name" ], "params" : { "max" : 2, "ngramLen" : "3"} },
|
||||
{ "name" : "suffixprefix", "fields" : [ "name" ], "params" : { "max" : 1, "len" : "3" } },
|
||||
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
|
||||
],
|
||||
"decisionTree" : {
|
||||
"start": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "websiteurl",
|
||||
"comparator": "domainExactMatch",
|
||||
"weight": 1,
|
||||
"countIfUndefined": "false",
|
||||
"params": {}
|
||||
}
|
||||
],
|
||||
"threshold": 1,
|
||||
"aggregation": "AVG",
|
||||
"positive": "layer2",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "layer2",
|
||||
"ignoreUndefined": "true"
|
||||
},
|
||||
"layer2": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "name",
|
||||
"comparator": "jaroWinkler",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "true",
|
||||
"params": {
|
||||
}
|
||||
}
|
||||
],
|
||||
"threshold": 0.9,
|
||||
"aggregation": "AVG",
|
||||
"positive": "MATCH",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "NO_MATCH",
|
||||
"ignoreUndefined": "true"
|
||||
}
|
||||
},
|
||||
"model" : [
|
||||
{ "name" : "name", "type" : "String", "path" : "$.name" },
|
||||
{ "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl" }
|
||||
],
|
||||
"blacklists" : {
|
||||
"legalname" : []
|
||||
},
|
||||
"synonyms": {}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,401 @@
|
|||
{
|
||||
"wf": {
|
||||
"threshold": "0.99",
|
||||
"dedupRun": "001",
|
||||
"entityType": "result",
|
||||
"subEntityType": "resulttype",
|
||||
"subEntityValue": "publication",
|
||||
"orderField": "title",
|
||||
"queueMaxSize": "200",
|
||||
"groupMaxSize": "100",
|
||||
"maxChildren": "100",
|
||||
"slidingWindowSize": "50",
|
||||
"rootBuilder": [
|
||||
"result",
|
||||
"resultProject_outcome_isProducedBy",
|
||||
"resultResult_publicationDataset_isRelatedTo",
|
||||
"resultResult_similarity_isAmongTopNSimilarDocuments",
|
||||
"resultResult_similarity_hasAmongTopNSimilarDocuments",
|
||||
"resultOrganization_affiliation_isAffiliatedWith",
|
||||
"resultResult_part_hasPart",
|
||||
"resultResult_part_isPartOf",
|
||||
"resultResult_supplement_isSupplementTo",
|
||||
"resultResult_supplement_isSupplementedBy",
|
||||
"resultResult_version_isVersionOf"
|
||||
],
|
||||
"includeChildren": "true",
|
||||
"maxIterations": 20,
|
||||
"idPath": "$.id"
|
||||
},
|
||||
"pace": {
|
||||
"clustering" : [
|
||||
{ "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
|
||||
{ "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid": "0"} }
|
||||
],
|
||||
"decisionTree": {
|
||||
"start": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "pid",
|
||||
"comparator": "jsonListMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {
|
||||
"jpath_value": "$.value",
|
||||
"jpath_classid": "$.qualifier.classid"
|
||||
}
|
||||
},
|
||||
{
|
||||
"field": "pid",
|
||||
"comparator": "jsonListMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {
|
||||
"jpath_value": "$.value",
|
||||
"jpath_classid": "$.qualifier.classid",
|
||||
"crossCompare": "alternateid"
|
||||
}
|
||||
}
|
||||
],
|
||||
"threshold": 0.5,
|
||||
"aggregation": "MAX",
|
||||
"positive": "layer1",
|
||||
"negative": "layer2",
|
||||
"undefined": "layer2",
|
||||
"ignoreUndefined": "true"
|
||||
},
|
||||
"layer1": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "title",
|
||||
"comparator": "levensteinTitle",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "true",
|
||||
"params": {}
|
||||
}
|
||||
],
|
||||
"threshold": 0.9,
|
||||
"aggregation": "AVG",
|
||||
"positive": "MATCH",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "NO_MATCH",
|
||||
"ignoreUndefined": "true"
|
||||
},
|
||||
"layer2": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "title",
|
||||
"comparator": "titleVersionMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {}
|
||||
},
|
||||
{
|
||||
"field": "authors",
|
||||
"comparator": "sizeMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {}
|
||||
}
|
||||
],
|
||||
"threshold": 1.0,
|
||||
"aggregation": "AND",
|
||||
"positive": "layer3",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "layer3",
|
||||
"ignoreUndefined": "false"
|
||||
},
|
||||
"layer3": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "title",
|
||||
"comparator": "levensteinTitle",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "true",
|
||||
"params": {}
|
||||
}
|
||||
],
|
||||
"threshold": 0.99,
|
||||
"aggregation": "AVG",
|
||||
"positive": "MATCH",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "NO_MATCH",
|
||||
"ignoreUndefined": "true"
|
||||
}
|
||||
},
|
||||
"model": [
|
||||
{
|
||||
"name": "doi",
|
||||
"type": "String",
|
||||
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
|
||||
},
|
||||
{
|
||||
"name": "altdoi",
|
||||
"type": "String",
|
||||
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
|
||||
},
|
||||
{
|
||||
"name": "pid",
|
||||
"type": "JSON",
|
||||
"path": "$.instance[*].pid[*]",
|
||||
"overrideMatch": "true"
|
||||
},
|
||||
{
|
||||
"name": "alternateid",
|
||||
"type": "JSON",
|
||||
"path": "$.instance[*].alternateIdentifier[*]",
|
||||
"overrideMatch": "true"
|
||||
},
|
||||
{
|
||||
"name": "title",
|
||||
"type": "String",
|
||||
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
|
||||
"length": 250,
|
||||
"size": 5
|
||||
},
|
||||
{
|
||||
"name": "authors",
|
||||
"type": "List",
|
||||
"path": "$.author[*].fullname",
|
||||
"size": 200
|
||||
},
|
||||
{
|
||||
"name": "resulttype",
|
||||
"type": "String",
|
||||
"path": "$.resulttype.classid"
|
||||
}
|
||||
],
|
||||
"blacklists": {
|
||||
"title": [
|
||||
"(?i)^Data Management Plan",
|
||||
"^Inside Front Cover$",
|
||||
"(?i)^Poster presentations$",
|
||||
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
|
||||
"^Problems with perinatal pathology\\.?$",
|
||||
"(?i)^Cases? of Puerperal Convulsions$",
|
||||
"(?i)^Operative Gyna?ecology$",
|
||||
"(?i)^Mind the gap\\!?\\:?$",
|
||||
"^Chronic fatigue syndrome\\.?$",
|
||||
"^Cartas? ao editor Letters? to the Editor$",
|
||||
"^Note from the Editor$",
|
||||
"^Anesthesia Abstract$",
|
||||
"^Annual report$",
|
||||
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
|
||||
"(?i)^Graph and Table of Infectious Diseases?$",
|
||||
"^Presentation$",
|
||||
"(?i)^Reviews and Information on Publications$",
|
||||
"(?i)^PUBLIC HEALTH SERVICES?$",
|
||||
"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
|
||||
"(?i)^Adrese autora$",
|
||||
"(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
|
||||
"(?i)^Acknowledgement to Referees$",
|
||||
"(?i)^Behçet's disease\\.?$",
|
||||
"(?i)^Isolation and identification of restriction endonuclease.*$",
|
||||
"(?i)^CEREBROVASCULAR DISEASES?.?$",
|
||||
"(?i)^Screening for abdominal aortic aneurysms?\\.?$",
|
||||
"^Event management$",
|
||||
"(?i)^Breakfast and Crohn's disease.*\\.?$",
|
||||
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
|
||||
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
|
||||
"^Gushi hakubutsugaku$",
|
||||
"^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
|
||||
"^Intestinal spirocha?etosis$",
|
||||
"^Treatment of Rodent Ulcer$",
|
||||
"(?i)^\\W*Cloud Computing\\W*$",
|
||||
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
|
||||
"^Free Communications, Poster Presentations: Session [A-F]$",
|
||||
"^“The Historical Aspects? of Quackery\\.?”$",
|
||||
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
|
||||
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
|
||||
"(?i)^Case Report$",
|
||||
"^Boletín Informativo$",
|
||||
"(?i)^Glioblastoma Multiforme$",
|
||||
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
|
||||
"^Zaměstnanecké výhody$",
|
||||
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
|
||||
"(?i)^Carotid body tumours?\\.?$",
|
||||
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
|
||||
"^Avant-propos$",
|
||||
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
|
||||
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
|
||||
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
|
||||
"^Viñetas de Cortázar$",
|
||||
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
|
||||
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
|
||||
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
|
||||
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
|
||||
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
|
||||
"^Aus der AGMB$",
|
||||
"^Znanstveno-stručni prilozi$",
|
||||
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
|
||||
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
|
||||
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
|
||||
"^Finanční analýza podniku$",
|
||||
"^Financial analysis( of business)?$",
|
||||
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
|
||||
"^Jikken nihon shūshinsho$",
|
||||
"(?i)^CORONER('|s)(s|') INQUESTS$",
|
||||
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
|
||||
"(?i)^Consultants' contract(s)?$",
|
||||
"(?i)^Upute autorima$",
|
||||
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
|
||||
"^Joshi shin kokubun$",
|
||||
"^Kōtō shōgaku dokuhon nōson'yō$",
|
||||
"^Jinjō shōgaku shōka$",
|
||||
"^Shōgaku shūjichō$",
|
||||
"^Nihon joshi dokuhon$",
|
||||
"^Joshi shin dokuhon$",
|
||||
"^Chūtō kanbun dokuhon$",
|
||||
"^Wabun dokuhon$",
|
||||
"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
|
||||
"(?i)^cardiac rehabilitation$",
|
||||
"(?i)^Analytical summary$",
|
||||
"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
|
||||
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
|
||||
"^Prikazi i osvrti$",
|
||||
"^Rodinný dům s provozovnou$",
|
||||
"^Family house with an establishment$",
|
||||
"^Shinsei chūtō shin kokugun$",
|
||||
"^Pulmonary alveolar proteinosis(\\.?)$",
|
||||
"^Shinshū kanbun$",
|
||||
"^Viñeta(s?) de Rodríguez$",
|
||||
"(?i)^RUBRIKA UREDNIKA$",
|
||||
"^A Matching Model of the Academic Publication Market$",
|
||||
"^Yōgaku kōyō$",
|
||||
"^Internetový marketing$",
|
||||
"^Internet marketing$",
|
||||
"^Chūtō kokugo dokuhon$",
|
||||
"^Kokugo dokuhon$",
|
||||
"^Antibiotic Cover for Dental Extraction(s?)$",
|
||||
"^Strategie podniku$",
|
||||
"^Strategy of an Enterprise$",
|
||||
"(?i)^respiratory disease(s?)(\\.?)$",
|
||||
"^Award(s?) for Gallantry in Civil Defence$",
|
||||
"^Podniková kultura$",
|
||||
"^Corporate Culture$",
|
||||
"^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
|
||||
"^Pracovní motivace$",
|
||||
"^Work Motivation$",
|
||||
"^Kaitei kōtō jogaku dokuhon$",
|
||||
"^Konsolidovaná účetní závěrka$",
|
||||
"^Consolidated Financial Statements$",
|
||||
"(?i)^intracranial tumour(s?)$",
|
||||
"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
|
||||
"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
|
||||
"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
|
||||
"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
|
||||
"^Úroveň motivačního procesu jako způsobu vedení lidí$",
|
||||
"^The level of motivation process as a leadership$",
|
||||
"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
|
||||
"(?i)^news and events$",
|
||||
"(?i)^NOVOSTI I DOGAĐAJI$",
|
||||
"^Sansū no gakushū$",
|
||||
"^Posouzení informačního systému firmy a návrh změn$",
|
||||
"^Information System Assessment and Proposal for ICT Modification$",
|
||||
"^Stresové zatížení pracovníků ve vybrané profesi$",
|
||||
"^Stress load in a specific job$",
|
||||
"^Sunday: Poster Sessions, Pt.*$",
|
||||
"^Monday: Poster Sessions, Pt.*$",
|
||||
"^Wednesday: Poster Sessions, Pt.*",
|
||||
"^Tuesday: Poster Sessions, Pt.*$",
|
||||
"^Analýza reklamy$",
|
||||
"^Analysis of advertising$",
|
||||
"^Shōgaku shūshinsho$",
|
||||
"^Shōgaku sansū$",
|
||||
"^Shintei joshi kokubun$",
|
||||
"^Taishō joshi kokubun dokuhon$",
|
||||
"^Joshi kokubun$",
|
||||
"^Účetní uzávěrka a účetní závěrka v ČR$",
|
||||
"(?i)^The \"?Causes\"? of Cancer$",
|
||||
"^Normas para la publicación de artículos$",
|
||||
"^Editor('|s)(s|') [Rr]eply$",
|
||||
"^Editor(’|s)(s|’) letter$",
|
||||
"^Redaktoriaus žodis$",
|
||||
"^DISCUSSION ON THE PRECEDING PAPER$",
|
||||
"^Kōtō shōgaku shūshinsho jidōyō$",
|
||||
"^Shōgaku nihon rekishi$",
|
||||
"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
|
||||
"^Préface$",
|
||||
"^Occupational [Hh]ealth [Ss]ervices.$",
|
||||
"^In Memoriam Professor Toshiyuki TAKESHIMA$",
|
||||
"^Účetní závěrka ve vybraném podniku.*$",
|
||||
"^Financial statements in selected company$",
|
||||
"^Abdominal [Aa]ortic [Aa]neurysms.*$",
|
||||
"^Pseudomyxoma peritonei$",
|
||||
"^Kazalo autora$",
|
||||
"(?i)^uvodna riječ$",
|
||||
"^Motivace jako způsob vedení lidí$",
|
||||
"^Motivation as a leadership$",
|
||||
"^Polyfunkční dům$",
|
||||
"^Multi\\-funkcional building$",
|
||||
"^Podnikatelský plán$",
|
||||
"(?i)^Podnikatelský záměr$",
|
||||
"(?i)^Business Plan$",
|
||||
"^Oceňování nemovitostí$",
|
||||
"^Marketingová komunikace$",
|
||||
"^Marketing communication$",
|
||||
"^Sumario Analítico$",
|
||||
"^Riječ uredništva$",
|
||||
"^Savjetovanja i priredbe$",
|
||||
"^Índice$",
|
||||
"^(Starobosanski nadpisi).*$",
|
||||
"^Vzdělávání pracovníků v organizaci$",
|
||||
"^Staff training in organization$",
|
||||
"^(Life Histories of North American Geometridae).*$",
|
||||
"^Strategická analýza podniku$",
|
||||
"^Strategic Analysis of an Enterprise$",
|
||||
"^Sadržaj$",
|
||||
"^Upute suradnicima$",
|
||||
"^Rodinný dům$",
|
||||
"(?i)^Fami(l)?ly house$",
|
||||
"^Upute autorima$",
|
||||
"^Strategic Analysis$",
|
||||
"^Finanční analýza vybraného podniku$",
|
||||
"^Finanční analýza$",
|
||||
"^Riječ urednika$",
|
||||
"(?i)^Content(s?)$",
|
||||
"(?i)^Inhalt$",
|
||||
"^Jinjō shōgaku shūshinsho jidōyō$",
|
||||
"(?i)^Index$",
|
||||
"^Chūgaku kokubun kyōkasho$",
|
||||
"^Retrato de una mujer$",
|
||||
"^Retrato de un hombre$",
|
||||
"^Kōtō shōgaku dokuhon$",
|
||||
"^Shotōka kokugo$",
|
||||
"^Shōgaku dokuhon$",
|
||||
"^Jinjō shōgaku kokugo dokuhon$",
|
||||
"^Shinsei kokugo dokuhon$",
|
||||
"^Teikoku dokuhon$",
|
||||
"^Instructions to Authors$",
|
||||
"^KİTAP TAHLİLİ$",
|
||||
"^PRZEGLĄD PIŚMIENNICTWA$",
|
||||
"(?i)^Presentación$",
|
||||
"^İçindekiler$",
|
||||
"(?i)^Tabl?e of contents$",
|
||||
"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
|
||||
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
|
||||
"^Editorial( Board)?$",
|
||||
"(?i)^Editorial \\(English\\)$",
|
||||
"^Editörden$",
|
||||
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
|
||||
"^(Kiri Karl Morgensternile).*$",
|
||||
"^(\\[Eksliibris Aleksandr).*\\]$",
|
||||
"^(\\[Eksliibris Aleksandr).*$",
|
||||
"^(Eksliibris Aleksandr).*$",
|
||||
"^(Kiri A\\. de Vignolles).*$",
|
||||
"^(2 kirja Karl Morgensternile).*$",
|
||||
"^(Pirita kloostri idaosa arheoloogilised).*$",
|
||||
"^(Kiri tundmatule).*$",
|
||||
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
|
||||
"^(Eksliibris Nikolai Birukovile).*$",
|
||||
"^(Eksliibris Nikolai Issakovile).*$",
|
||||
"^(WHP Cruise Summary Information of section).*$",
|
||||
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
|
||||
"^(Measurement of the spin\\-dependent structure function).*",
|
||||
"(?i)^.*authors['’′]? reply\\.?$",
|
||||
"(?i)^.*authors['’′]? response\\.?$"
|
||||
]
|
||||
},
|
||||
"synonyms": {}
|
||||
}
|
||||
}
|
|
@ -6,10 +6,10 @@
|
|||
"subEntityType": "resulttype",
|
||||
"subEntityValue": "publication",
|
||||
"orderField": "title",
|
||||
"queueMaxSize": "2000",
|
||||
"groupMaxSize": "100",
|
||||
"maxChildren": "100",
|
||||
"slidingWindowSize": "200",
|
||||
"queueMaxSize": "5000",
|
||||
"groupMaxSize": "2000",
|
||||
"maxChildren": "1000",
|
||||
"slidingWindowSize": "50",
|
||||
"rootBuilder": [
|
||||
"result",
|
||||
"resultProject_outcome_isProducedBy",
|
||||
|
@ -29,8 +29,7 @@
|
|||
},
|
||||
"pace": {
|
||||
"clustering" : [
|
||||
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
|
||||
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
|
||||
{ "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
|
||||
{ "name" : "lowercase", "fields" : [ "doi" ], "params" : { } }
|
||||
],
|
||||
"decisionTree": {
|
||||
|
@ -129,6 +128,7 @@
|
|||
],
|
||||
"blacklists": {
|
||||
"title": [
|
||||
"(?i)^Data Management Plan",
|
||||
"^Inside Front Cover$",
|
||||
"(?i)^Poster presentations$",
|
||||
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -22,5 +22,11 @@
|
|||
"paramLongName": "dedupConfPath",
|
||||
"paramDescription": "path of the dedup configuration",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "ut",
|
||||
"paramLongName": "useTree",
|
||||
"paramDescription": "chose the tree configuration or not",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,113 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project version="4" relativePaths="false">
|
||||
<component name="ProjectRootManager" version="2" assert-keyword="true" project-jdk-name="1.8" jdk-15="true"/>
|
||||
<component name="CodeStyleManager">
|
||||
<option name="USE_DEFAULT_CODE_STYLE_SCHEME" value="true"/>
|
||||
<option name="CODE_STYLE_SCHEME" value=""/>
|
||||
</component>
|
||||
<component name="libraryTable"/>
|
||||
<component name="CompilerConfiguration">
|
||||
<option name="DEFAULT_COMPILER" value="Javac"/>
|
||||
<option name="CLEAR_OUTPUT_DIRECTORY" value="false"/>
|
||||
<!--
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="${wildcardResourcePattern}"/>
|
||||
</wildcardResourcePatterns>
|
||||
-->
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="!?*.java"/>
|
||||
</wildcardResourcePatterns>
|
||||
</component>
|
||||
<component name="JavacSettings">
|
||||
<option name="DEBUGGING_INFO" value="true"/>
|
||||
<option name="GENERATE_NO_WARNINGS" value="false"/>
|
||||
<option name="DEPRECATION" value="true"/>
|
||||
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
|
||||
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
|
||||
<option name="USE_GENERICS_COMPILER" value="false"/>
|
||||
</component>
|
||||
<component name="JikesSettings">
|
||||
<option name="DEBUGGING_INFO" value="true"/>
|
||||
<option name="DEPRECATION" value="true"/>
|
||||
<option name="GENERATE_NO_WARNINGS" value="false"/>
|
||||
<option name="GENERATE_MAKE_FILE_DEPENDENCIES" value="false"/>
|
||||
<option name="DO_FULL_DEPENDENCE_CHECK" value="false"/>
|
||||
<option name="IS_INCREMENTAL_MODE" value="false"/>
|
||||
<option name="IS_EMACS_ERRORS_MODE" value="true"/>
|
||||
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
|
||||
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
|
||||
</component>
|
||||
<component name="AntConfiguration">
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="FILTER_TARGETS" value="false"/>
|
||||
</component>
|
||||
<component name="JavadocGenerationManager">
|
||||
<option name="OUTPUT_DIRECTORY"/>
|
||||
<option name="OPTION_SCOPE" value="protected"/>
|
||||
<option name="OPTION_HIERARCHY" value="false"/>
|
||||
<option name="OPTION_NAVIGATOR" value="false"/>
|
||||
<option name="OPTION_INDEX" value="false"/>
|
||||
<option name="OPTION_SEPARATE_INDEX" value="false"/>
|
||||
<option name="OPTION_USE_1_1" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_USE" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_VERSION" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="false"/>
|
||||
<option name="OPTION_DEPRECATED_LIST" value="false"/>
|
||||
<option name="OTHER_OPTIONS"/>
|
||||
<option name="HEAP_SIZE"/>
|
||||
<option name="OPEN_IN_BROWSER" value="false"/>
|
||||
</component>
|
||||
<component name="JUnitProjectSettings">
|
||||
<option name="TEST_RUNNER" value="UI"/>
|
||||
</component>
|
||||
<component name="EntryPointsManager">
|
||||
<entry_points/>
|
||||
</component>
|
||||
<component name="DataSourceManager"/>
|
||||
<component name="ExportToHTMLSettings">
|
||||
<option name="PRINT_LINE_NUMBERS" value="false"/>
|
||||
<option name="OPEN_IN_BROWSER" value="false"/>
|
||||
<option name="OUTPUT_DIRECTORY"/>
|
||||
</component>
|
||||
<component name="ImportConfiguration">
|
||||
<option name="VENDOR"/>
|
||||
<option name="RELEASE_TAG"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CHECKOUT_AFTER_IMPORT" value="true"/>
|
||||
</component>
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<!-- module filepath="$$PROJECT_DIR$$/${pom.artifactId}.iml"/ -->
|
||||
<module filepath="$PROJECT_DIR$/dnet-dedup.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dnet-pace-core/dnet-pace-core.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dnet-dedup-test/dnet-dedup-test.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dhp-build/dhp-code-style/dhp-code-style.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dhp-build/dhp-build-assembly-resources/dhp-build-assembly-resources.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dhp-build/dhp-build-properties-maven-plugin/dhp-build-properties-maven-plugin.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dhp-build/dhp-build.iml"/>
|
||||
</modules>
|
||||
</component>
|
||||
<UsedPathMacros>
|
||||
<!--<macro name="cargo"></macro>-->
|
||||
</UsedPathMacros>
|
||||
</project>
|
|
@ -0,0 +1,418 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project version="4" relativePaths="false">
|
||||
<component name="LvcsProjectConfiguration">
|
||||
<option name="ADD_LABEL_ON_PROJECT_OPEN" value="true"/>
|
||||
<option name="ADD_LABEL_ON_PROJECT_COMPILATION" value="true"/>
|
||||
<option name="ADD_LABEL_ON_FILE_PACKAGE_COMPILATION" value="true"/>
|
||||
<option name="ADD_LABEL_ON_PROJECT_MAKE" value="true"/>
|
||||
<option name="ADD_LABEL_ON_RUNNING" value="true"/>
|
||||
<option name="ADD_LABEL_ON_DEBUGGING" value="true"/>
|
||||
<option name="ADD_LABEL_ON_UNIT_TEST_PASSED" value="true"/>
|
||||
<option name="ADD_LABEL_ON_UNIT_TEST_FAILED" value="true"/>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="MemberChooser.copyJavadoc" value="false"/>
|
||||
<property name="GoToClass.includeLibraries" value="false"/>
|
||||
<property name="MemberChooser.showClasses" value="true"/>
|
||||
<property name="MemberChooser.sorted" value="false"/>
|
||||
<property name="GoToFile.includeJavaFiles" value="false"/>
|
||||
<property name="GoToClass.toSaveIncludeLibraries" value="false"/>
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="-4" y="-4" width="1032" height="746" extended-state="6"/>
|
||||
<editor active="false"/>
|
||||
<layout>
|
||||
<window_info id="CVS" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="7"/>
|
||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="0"/>
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="1"/>
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
|
||||
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="6"/>
|
||||
<window_info id="Aspects" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="2"/>
|
||||
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="4"/>
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="sliding" type="sliding" visible="false" weight="0.4" order="0"/>
|
||||
<window_info id="Web" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="0"/>
|
||||
<window_info id="EJB" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="3"/>
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="5"/>
|
||||
</layout>
|
||||
</component>
|
||||
<component name="ErrorTreeViewConfiguration">
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="HIDE_WARNINGS" value="false"/>
|
||||
</component>
|
||||
<component name="StructureViewFactory">
|
||||
<option name="SORT_MODE" value="0"/>
|
||||
<option name="GROUP_INHERITED" value="true"/>
|
||||
<option name="AUTOSCROLL_MODE" value="true"/>
|
||||
<option name="SHOW_FIELDS" value="true"/>
|
||||
<option name="AUTOSCROLL_FROM_SOURCE" value="false"/>
|
||||
<option name="GROUP_GETTERS_AND_SETTERS" value="true"/>
|
||||
<option name="SHOW_INHERITED" value="false"/>
|
||||
<option name="HIDE_NOT_PUBLIC" value="false"/>
|
||||
</component>
|
||||
<component name="ProjectViewSettings">
|
||||
<navigator currentView="ProjectPane" flattenPackages="false" showMembers="false" showStructure="false" autoscrollToSource="false" splitterProportion="0.5"/>
|
||||
<view id="ProjectPane">
|
||||
<expanded_node type="directory" url="file://$PROJECT_DIR$"/>
|
||||
</view>
|
||||
<view id="SourcepathPane"/>
|
||||
<view id="ClasspathPane"/>
|
||||
</component>
|
||||
<component name="Commander">
|
||||
<leftPanel view="Project"/>
|
||||
<rightPanel view="Project"/>
|
||||
<splitter proportion="0.5"/>
|
||||
</component>
|
||||
<component name="AspectsView"/>
|
||||
<component name="SelectInManager"/>
|
||||
<component name="HierarchyBrowserManager">
|
||||
<option name="SHOW_PACKAGES" value="false"/>
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="SORT_ALPHABETICALLY" value="false"/>
|
||||
</component>
|
||||
<component name="TodoView" selected-index="0">
|
||||
<todo-panel id="selected-file">
|
||||
<are-packages-shown value="false"/>
|
||||
<flatten-packages value="false"/>
|
||||
<is-autoscroll-to-source value="true"/>
|
||||
</todo-panel>
|
||||
<todo-panel id="all">
|
||||
<are-packages-shown value="true"/>
|
||||
<flatten-packages value="false"/>
|
||||
<is-autoscroll-to-source value="true"/>
|
||||
</todo-panel>
|
||||
</component>
|
||||
<component name="editorManager"/>
|
||||
<component name="editorHistoryManager"/>
|
||||
<component name="DaemonCodeAnalyzer">
|
||||
<disable_hints/>
|
||||
</component>
|
||||
<component name="InspectionManager">
|
||||
<option name="AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="SPLITTER_PROPORTION" value="0.5"/>
|
||||
<profile name="Default"/>
|
||||
</component>
|
||||
<component name="BookmarkManager"/>
|
||||
<component name="DebuggerManager">
|
||||
<line_breakpoints/>
|
||||
<exception_breakpoints>
|
||||
<breakpoint_any>
|
||||
<option name="NOTIFY_CAUGHT" value="true"/>
|
||||
<option name="NOTIFY_UNCAUGHT" value="true"/>
|
||||
<option name="ENABLED" value="false"/>
|
||||
<option name="SUSPEND_VM" value="true"/>
|
||||
<option name="COUNT_FILTER_ENABLED" value="false"/>
|
||||
<option name="COUNT_FILTER" value="0"/>
|
||||
<option name="CONDITION_ENABLED" value="false"/>
|
||||
<option name="CONDITION"/>
|
||||
<option name="LOG_ENABLED" value="false"/>
|
||||
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CLASS_FILTERS_ENABLED" value="false"/>
|
||||
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
|
||||
<option name="SUSPEND_POLICY" value="SuspendAll"/>
|
||||
</breakpoint_any>
|
||||
</exception_breakpoints>
|
||||
<field_breakpoints/>
|
||||
<method_breakpoints/>
|
||||
</component>
|
||||
<component name="DebuggerSettings">
|
||||
<option name="TRACING_FILTERS_ENABLED" value="true"/>
|
||||
<option name="TOSTRING_CLASSES_ENABLED" value="false"/>
|
||||
<option name="VALUE_LOOKUP_DELAY" value="700"/>
|
||||
<option name="DEBUGGER_TRANSPORT" value="0"/>
|
||||
<option name="FORCE_CLASSIC_VM" value="true"/>
|
||||
<option name="HIDE_DEBUGGER_ON_PROCESS_TERMINATION" value="false"/>
|
||||
<option name="SKIP_SYNTHETIC_METHODS" value="true"/>
|
||||
<option name="SKIP_CONSTRUCTORS" value="false"/>
|
||||
<option name="STEP_THREAD_SUSPEND_POLICY" value="SuspendThread"/>
|
||||
<default_breakpoint_settings>
|
||||
<option name="NOTIFY_CAUGHT" value="true"/>
|
||||
<option name="NOTIFY_UNCAUGHT" value="true"/>
|
||||
<option name="WATCH_MODIFICATION" value="true"/>
|
||||
<option name="WATCH_ACCESS" value="true"/>
|
||||
<option name="WATCH_ENTRY" value="true"/>
|
||||
<option name="WATCH_EXIT" value="true"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
<option name="SUSPEND_VM" value="true"/>
|
||||
<option name="COUNT_FILTER_ENABLED" value="false"/>
|
||||
<option name="COUNT_FILTER" value="0"/>
|
||||
<option name="CONDITION_ENABLED" value="false"/>
|
||||
<option name="CONDITION"/>
|
||||
<option name="LOG_ENABLED" value="false"/>
|
||||
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CLASS_FILTERS_ENABLED" value="false"/>
|
||||
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
|
||||
<option name="SUSPEND_POLICY" value="SuspendAll"/>
|
||||
</default_breakpoint_settings>
|
||||
<filter>
|
||||
<option name="PATTERN" value="com.sun.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="java.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="javax.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="org.omg.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="sun.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="junit.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
</component>
|
||||
<component name="CompilerWorkspaceConfiguration">
|
||||
<option name="COMPILE_IN_BACKGROUND" value="false"/>
|
||||
<option name="AUTO_SHOW_ERRORS_IN_EDITOR" value="true"/>
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<activeType name="Application"/>
|
||||
<configuration selected="false" default="true" type="Applet" factoryName="Applet">
|
||||
<module name=""/>
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="HTML_FILE_NAME"/>
|
||||
<option name="HTML_USED" value="false"/>
|
||||
<option name="WIDTH" value="400"/>
|
||||
<option name="HEIGHT" value="300"/>
|
||||
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="Remote" factoryName="Remote">
|
||||
<option name="USE_SOCKET_TRANSPORT" value="true"/>
|
||||
<option name="SERVER_MODE" value="false"/>
|
||||
<option name="SHMEM_ADDRESS" value="javadebug"/>
|
||||
<option name="HOST" value="localhost"/>
|
||||
<option name="PORT" value="5005"/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="Application" factoryName="Application">
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
<option name="PROGRAM_PARAMETERS"/>
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
|
||||
<module name=""/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="JUnit" factoryName="JUnit">
|
||||
<module name=""/>
|
||||
<option name="PACKAGE_NAME"/>
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="METHOD_NAME"/>
|
||||
<option name="TEST_OBJECT" value="class"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
<option name="PARAMETERS"/>
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
|
||||
<option name="ADDITIONAL_CLASS_PATH"/>
|
||||
<option name="TEST_SEARCH_SCOPE">
|
||||
<value defaultName="wholeProject"/>
|
||||
</option>
|
||||
</configuration>
|
||||
</component>
|
||||
<component name="VcsManagerConfiguration">
|
||||
<option name="ACTIVE_VCS_NAME" value="git"/>
|
||||
<option name="STATE" value="0"/>
|
||||
</component>
|
||||
<component name="VssConfiguration">
|
||||
<CheckoutOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="DO_NOT_GET_LATEST_VERSION" value="false"/>
|
||||
<option name="REPLACE_WRITABLE" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</CheckoutOptions>
|
||||
<CheckinOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="KEEP_CHECKED_OUT" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</CheckinOptions>
|
||||
<AddOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="STORE_ONLY_LATEST_VERSION" value="false"/>
|
||||
<option name="CHECK_OUT_IMMEDIATELY" value="false"/>
|
||||
<option name="FILE_TYPE" value="0"/>
|
||||
</AddOptions>
|
||||
<UndocheckoutOptions>
|
||||
<option name="MAKE_WRITABLE" value="false"/>
|
||||
<option name="REPLACE_LOCAL_COPY" value="0"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</UndocheckoutOptions>
|
||||
<DiffOptions>
|
||||
<option name="IGNORE_WHITE_SPACE" value="false"/>
|
||||
<option name="IGNORE_CASE" value="false"/>
|
||||
</DiffOptions>
|
||||
<GetOptions>
|
||||
<option name="REPLACE_WRITABLE" value="0"/>
|
||||
<option name="MAKE_WRITABLE" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</GetOptions>
|
||||
<option name="CLIENT_PATH" value=""/>
|
||||
<option name="SRCSAFEINI_PATH" value=""/>
|
||||
<option name="USER_NAME" value=""/>
|
||||
<option name="PWD" value=""/>
|
||||
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="SHOW_ADD_OPTIONS" value="true"/>
|
||||
<option name="SHOW_UNDOCHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="SHOW_DIFF_OPTIONS" value="true"/>
|
||||
<option name="SHOW_GET_OPTIONS" value="true"/>
|
||||
<option name="USE_EXTERNAL_DIFF" value="false"/>
|
||||
<option name="EXTERNAL_DIFF_PATH" value=""/>
|
||||
<option name="REUSE_LAST_COMMENT" value="false"/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
</component>
|
||||
<component name="CheckinPanelState"/>
|
||||
<component name="WebViewSettings">
|
||||
<webview flattenPackages="false" showMembers="false" autoscrollToSource="false"/>
|
||||
</component>
|
||||
<component name="EjbViewSettings">
|
||||
<EjbView showMembers="false" autoscrollToSource="false"/>
|
||||
</component>
|
||||
<component name="AppServerRunManager"/>
|
||||
<component name="StarteamConfiguration">
|
||||
<option name="SERVER" value=""/>
|
||||
<option name="PORT" value="49201"/>
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="PROJECT" value=""/>
|
||||
<option name="VIEW" value=""/>
|
||||
<option name="ALTERNATIVE_WORKING_PATH" value=""/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
</component>
|
||||
<component name="Cvs2Configuration">
|
||||
<option name="ON_FILE_ADDING" value="0"/>
|
||||
<option name="ON_FILE_REMOVING" value="0"/>
|
||||
<option name="PRUNE_EMPTY_DIRECTORIES" value="true"/>
|
||||
<option name="SHOW_UPDATE_OPTIONS" value="true"/>
|
||||
<option name="SHOW_ADD_OPTIONS" value="true"/>
|
||||
<option name="SHOW_REMOVE_OPTIONS" value="true"/>
|
||||
<option name="MERGING_MODE" value="0"/>
|
||||
<option name="MERGE_WITH_BRANCH1_NAME" value="HEAD"/>
|
||||
<option name="MERGE_WITH_BRANCH2_NAME" value="HEAD"/>
|
||||
<option name="RESET_STICKY" value="false"/>
|
||||
<option name="CREATE_NEW_DIRECTORIES" value="true"/>
|
||||
<option name="DEFAULT_TEXT_FILE_SUBSTITUTION" value="kv"/>
|
||||
<option name="PROCESS_UNKNOWN_FILES" value="false"/>
|
||||
<option name="PROCESS_DELETED_FILES" value="false"/>
|
||||
<option name="SHOW_EDIT_DIALOG" value="true"/>
|
||||
<option name="RESERVED_EDIT" value="false"/>
|
||||
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6"/>
|
||||
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="CHECKOUT_DATE_OR_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="UPDATE_DATE_OR_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="SHOW_CHANGES_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="SHOW_OUTPUT" value="false"/>
|
||||
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false"/>
|
||||
<option name="UPDATE_GROUP_BY_PACKAGES" value="false"/>
|
||||
<option name="ADD_WATCH_INDEX" value="0"/>
|
||||
<option name="REMOVE_WATCH_INDEX" value="0"/>
|
||||
<option name="UPDATE_KEYWORD_SUBSTITUTION"/>
|
||||
<option name="MAKE_NEW_FILES_READONLY" value="false"/>
|
||||
<option name="SHOW_CORRUPTED_PROJECT_FILES" value="0"/>
|
||||
<option name="TAG_AFTER_FILE_COMMIT" value="false"/>
|
||||
<option name="TAG_AFTER_FILE_COMMIT_NAME" value=""/>
|
||||
<option name="TAG_AFTER_PROJECT_COMMIT" value="false"/>
|
||||
<option name="TAG_AFTER_PROJECT_COMMIT_NAME" value=""/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="FORCE_NON_EMPTY_COMMENT" value="false"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
|
||||
</component>
|
||||
<component name="CvsTabbedWindow"/>
|
||||
<component name="SvnConfiguration">
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="AUTO_ADD_FILES" value="0"/>
|
||||
<option name="AUTO_DEL_FILES" value="0"/>
|
||||
</component>
|
||||
<component name="PerforceConfiguration">
|
||||
<option name="PORT" value="magic:1666"/>
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="CLIENT" value=""/>
|
||||
<option name="TRACE" value="false"/>
|
||||
<option name="PERFORCE_STATUS" value="true"/>
|
||||
<option name="CHANGELIST_OPTION" value="false"/>
|
||||
<option name="SYSTEMROOT" value=""/>
|
||||
<option name="P4_EXECUTABLE" value="p4"/>
|
||||
<option name="SHOW_BRANCH_HISTORY" value="false"/>
|
||||
<option name="GENERATE_COMMENT" value="false"/>
|
||||
<option name="SYNC_OPTION" value="Sync"/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="FORCE_NON_EMPTY_COMMENT" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,232 @@
|
|||
package eu.dnetlib.pace.util;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import eu.dnetlib.pace.clustering.NGramUtils;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.config.WfConfig;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.model.MapDocumentComparator;
|
||||
import eu.dnetlib.pace.tree.JsonListMatch;
|
||||
import eu.dnetlib.pace.tree.LevensteinTitle;
|
||||
import eu.dnetlib.pace.tree.SizeMatch;
|
||||
import eu.dnetlib.pace.tree.TitleVersionMatch;
|
||||
import eu.dnetlib.pace.tree.support.FieldStats;
|
||||
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class BlockProcessorForTesting {
|
||||
|
||||
public static final List<String> accumulators= new ArrayList<>();
|
||||
|
||||
private static final Log log = LogFactory.getLog(eu.dnetlib.pace.util.BlockProcessorForTesting.class);
|
||||
|
||||
private DedupConfig dedupConf;
|
||||
|
||||
public static void constructAccumulator( final DedupConfig dedupConf) {
|
||||
accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "records per hash key = 1"));
|
||||
accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField()));
|
||||
accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConf.getWf().getOrderField(), dedupConf.getWf().getGroupMaxSize())));
|
||||
accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "skip list"));
|
||||
accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)"));
|
||||
accumulators.add(String.format("%s::%s",dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold()));
|
||||
}
|
||||
|
||||
public BlockProcessorForTesting(DedupConfig dedupConf) {
|
||||
this.dedupConf = dedupConf;
|
||||
}
|
||||
|
||||
public void processSortedBlock(final String key, final List<MapDocument> documents, final Reporter context, boolean useTree) {
|
||||
if (documents.size() > 1) {
|
||||
// log.info("reducing key: '" + key + "' records: " + q.size());
|
||||
process(prepare(documents), context, useTree);
|
||||
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "records per hash key = 1", 1);
|
||||
}
|
||||
}
|
||||
|
||||
public void process(final String key, final Iterable<MapDocument> documents, final Reporter context, boolean useTree) {
|
||||
|
||||
final Queue<MapDocument> q = prepare(documents);
|
||||
|
||||
if (q.size() > 1) {
|
||||
// log.info("reducing key: '" + key + "' records: " + q.size());
|
||||
process(simplifyQueue(q, key, context), context, useTree);
|
||||
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "records per hash key = 1", 1);
|
||||
}
|
||||
}
|
||||
|
||||
private Queue<MapDocument> prepare(final Iterable<MapDocument> documents) {
|
||||
final Queue<MapDocument> queue = new PriorityQueue<>(100, new MapDocumentComparator(dedupConf.getWf().getOrderField()));
|
||||
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final int queueMaxSize = dedupConf.getWf().getQueueMaxSize();
|
||||
|
||||
documents.forEach(doc -> {
|
||||
if (queue.size() <= queueMaxSize) {
|
||||
final String id = doc.getIdentifier();
|
||||
|
||||
if (!seen.contains(id)) {
|
||||
seen.add(id);
|
||||
queue.add(doc);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue, final String ngram, final Reporter context) {
|
||||
final Queue<MapDocument> q = new LinkedList<>();
|
||||
|
||||
String fieldRef = "";
|
||||
final List<MapDocument> tempResults = Lists.newArrayList();
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
final MapDocument result = queue.remove();
|
||||
|
||||
final String orderFieldName = dedupConf.getWf().getOrderField();
|
||||
final Field orderFieldValue = result.values(orderFieldName);
|
||||
if (!orderFieldValue.isEmpty()) {
|
||||
final String field = NGramUtils.cleanupForOrdering(orderFieldValue.stringValue());
|
||||
if (field.equals(fieldRef)) {
|
||||
tempResults.add(result);
|
||||
} else {
|
||||
populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram);
|
||||
tempResults.clear();
|
||||
tempResults.add(result);
|
||||
fieldRef = field;
|
||||
}
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField(), 1);
|
||||
}
|
||||
}
|
||||
populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
private void populateSimplifiedQueue(final Queue<MapDocument> q,
|
||||
final List<MapDocument> tempResults,
|
||||
final Reporter context,
|
||||
final String fieldRef,
|
||||
final String ngram) {
|
||||
WfConfig wf = dedupConf.getWf();
|
||||
if (tempResults.size() < wf.getGroupMaxSize()) {
|
||||
q.addAll(tempResults);
|
||||
} else {
|
||||
context.incrementCounter(wf.getEntityType(), String.format("Skipped records for count(%s) >= %s", wf.getOrderField(), wf.getGroupMaxSize()), tempResults.size());
|
||||
// log.info("Skipped field: " + fieldRef + " - size: " + tempResults.size() + " - ngram: " + ngram);
|
||||
}
|
||||
}
|
||||
|
||||
private void process(final Queue<MapDocument> queue, final Reporter context, boolean useTree) {
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
|
||||
final MapDocument pivot = queue.remove();
|
||||
final String idPivot = pivot.getIdentifier();
|
||||
|
||||
WfConfig wf = dedupConf.getWf();
|
||||
final Field fieldsPivot = pivot.values(wf.getOrderField());
|
||||
final String fieldPivot = (fieldsPivot == null) || fieldsPivot.isEmpty() ? "" : fieldsPivot.stringValue();
|
||||
|
||||
if (fieldPivot != null) {
|
||||
int i = 0;
|
||||
for (final MapDocument curr : queue) {
|
||||
final String idCurr = curr.getIdentifier();
|
||||
|
||||
if (mustSkip(idCurr)) {
|
||||
|
||||
context.incrementCounter(wf.getEntityType(), "skip list", 1);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (i > wf.getSlidingWindowSize()) {
|
||||
break;
|
||||
}
|
||||
|
||||
final Field fieldsCurr = curr.values(wf.getOrderField());
|
||||
final String fieldCurr = (fieldsCurr == null) || fieldsCurr.isEmpty() ? null : fieldsCurr.stringValue();
|
||||
|
||||
if (!idCurr.equals(idPivot) && (fieldCurr != null)) {
|
||||
|
||||
// if (new TreeProcessor(dedupConf).compare(pivot, curr) == true && publicationCompare(pivot, curr, dedupConf) == false)
|
||||
// emitOutput(true, idPivot, idCurr, context);
|
||||
//
|
||||
if(useTree)
|
||||
emitOutput(new TreeProcessor(dedupConf).compare(pivot, curr), idPivot, idCurr, context);
|
||||
else
|
||||
emitOutput(publicationCompare(pivot, curr, dedupConf), idPivot, idCurr, context);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean publicationCompare(MapDocument a, MapDocument b, DedupConfig config) {
|
||||
|
||||
double score = 0.0;
|
||||
//LAYER 1 - comparison of the PIDs json lists
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("jpath_value", "$.value");
|
||||
params.put("jpath_classid", "$.qualifier.classid");
|
||||
JsonListMatch jsonListMatch = new JsonListMatch(params);
|
||||
double result = jsonListMatch.compare(a.getFieldMap().get("pid"), b.getFieldMap().get("pid"), config);
|
||||
if (result >= 0.5) //if the result of the comparison is greater than the threshold
|
||||
score += 10.0; //high score because it should match when the first condition is satisfied
|
||||
else
|
||||
score += 0.0;
|
||||
|
||||
//LAYER 2 - comparison of the title version and the size of the authors lists
|
||||
TitleVersionMatch titleVersionMatch = new TitleVersionMatch(params);
|
||||
double result1 = titleVersionMatch.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config);
|
||||
SizeMatch sizeMatch = new SizeMatch(params);
|
||||
double result2 = sizeMatch.compare(a.getFieldMap().get("authors"), b.getFieldMap().get("authors"), config);
|
||||
if (Math.min(result1, result2) != 0)
|
||||
score+=0;
|
||||
else
|
||||
score-=2;
|
||||
|
||||
//LAYER 3 - computation of levenshtein on titles
|
||||
LevensteinTitle levensteinTitle = new LevensteinTitle(params);
|
||||
double result3 = levensteinTitle.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config);
|
||||
score += Double.isNaN(result3)?0.0:result3;
|
||||
|
||||
return score >= 0.99;
|
||||
}
|
||||
|
||||
private void emitOutput(final boolean result, final String idPivot, final String idCurr, final Reporter context) {
|
||||
|
||||
if (result) {
|
||||
writeSimilarity(context, idPivot, idCurr);
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)", 1);
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean mustSkip(final String idPivot) {
|
||||
return dedupConf.getWf().getSkipList().contains(getNsPrefix(idPivot));
|
||||
}
|
||||
|
||||
private String getNsPrefix(final String id) {
|
||||
return StringUtils.substringBetween(id, "|", "::");
|
||||
}
|
||||
|
||||
private void writeSimilarity(final Reporter context, final String from, final String to) {
|
||||
final String type = dedupConf.getWf().getEntityType();
|
||||
|
||||
context.emit(type, from, to);
|
||||
context.emit(type, to, from);
|
||||
}
|
||||
}
|
|
@ -47,7 +47,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
|
|||
@Test
|
||||
public void testNgramPairs() {
|
||||
params.put("ngramLen", 3);
|
||||
params.put("max", 1);
|
||||
params.put("max", 2);
|
||||
|
||||
final ClusteringFunction np = new NgramPairs(params);
|
||||
|
||||
|
@ -59,7 +59,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
|
|||
@Test
|
||||
public void testSortedNgramPairs() {
|
||||
params.put("ngramLen", 3);
|
||||
params.put("max", 1);
|
||||
params.put("max", 2);
|
||||
|
||||
final ClusteringFunction np = new SortedNgramPairs(params);
|
||||
|
||||
|
@ -70,6 +70,11 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
|
|||
final String s2 = "Pisa University";
|
||||
System.out.println(s2);
|
||||
System.out.println(np.apply(conf, Lists.newArrayList(title(s2))));
|
||||
|
||||
final String s3 = "Parco Tecnologico Agroalimentare Umbria";
|
||||
System.out.println(s3);
|
||||
System.out.println(np.apply(conf, Lists.newArrayList(title(s3))));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -132,6 +137,14 @@ public class ClusteringFunctionTest extends AbstractPaceTest {
|
|||
System.out.println(s);
|
||||
System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));
|
||||
|
||||
s = "JRC Open Power Plants Database (JRC-PPDB-OPEN)";
|
||||
System.out.println(s);
|
||||
System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));
|
||||
|
||||
s = "JRC Open Power Plants Database";
|
||||
System.out.println(s);
|
||||
System.out.println(sp.apply(conf, Lists.newArrayList(title(s))));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -14,7 +14,7 @@ import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
|
||||
public class ComparatorTest extends AbstractPaceFunctions {
|
||||
|
||||
private Map<String, String> params;
|
||||
|
@ -119,6 +119,42 @@ public class ComparatorTest extends AbstractPaceFunctions {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void jaroWinklerTest() {
|
||||
|
||||
final JaroWinkler jaroWinkler = new JaroWinkler(params);
|
||||
|
||||
double result = jaroWinkler.distance("Sofia", "Sofìa", conf);
|
||||
System.out.println("result = " + result);
|
||||
|
||||
result = jaroWinkler.distance("University of Victoria Dataverse", "University of Windsor Dataverse", conf);
|
||||
System.out.println("result = " + result);
|
||||
|
||||
result = jaroWinkler.distance("Victoria Dataverse", "Windsor Dataverse", conf);
|
||||
System.out.println("result = " + result);
|
||||
|
||||
final Levenstein levenstein = new Levenstein(params);
|
||||
|
||||
result = levenstein.distance("Victoria", "Windsor", conf);
|
||||
System.out.println("result = " + result);
|
||||
|
||||
//University of Victoria Dataverse
|
||||
//University of British Columbia Dataverse
|
||||
//University of Windsor Dataverse
|
||||
//University of Waterloo Dataverse
|
||||
//University of Toronto Dataverse
|
||||
//University of Ottawa Dataverse
|
||||
}
|
||||
|
||||
@Test
|
||||
public void levensteinTitleTest() {
|
||||
|
||||
final LevensteinTitle levensteinTitle = new LevensteinTitle(params);
|
||||
double result = levensteinTitle.distance("JRC: Open Power Plants Database", "JRC Open Power Plants Database (JRC-PPDB-OPEN)", conf);
|
||||
|
||||
System.out.println("result = " + result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void jsonListMatchTest(){
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ package eu.dnetlib.pace.config;
|
|||
|
||||
|
||||
import eu.dnetlib.pace.AbstractPaceTest;
|
||||
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
|
||||
import eu.dnetlib.pace.clustering.ClusteringClass;
|
||||
import eu.dnetlib.pace.clustering.ClusteringCombiner;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
|
@ -128,6 +129,23 @@ public class ConfigTest extends AbstractPaceTest {
|
|||
assertEquals("doi", combine[2].split(":")[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void filterAndCombineTest() {
|
||||
|
||||
DedupConfig dedupConf = DedupConfig.load(readFromClasspath("pub.prod.conf.json"));
|
||||
|
||||
final String json = readFromClasspath("publication.example.json");
|
||||
|
||||
final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, json);
|
||||
|
||||
Collection<String> strings = BlacklistAwareClusteringCombiner.filterAndCombine(mapDocument, dedupConf);
|
||||
|
||||
for (String s: strings) {
|
||||
System.out.println("s = " + s);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void crossCompareTest() {
|
||||
|
||||
|
|
|
@ -0,0 +1,402 @@
|
|||
|
||||
{
|
||||
"wf": {
|
||||
"threshold": "0.99",
|
||||
"dedupRun": "001",
|
||||
"entityType": "result",
|
||||
"subEntityType": "resulttype",
|
||||
"subEntityValue": "publication",
|
||||
"orderField": "title",
|
||||
"queueMaxSize": "200",
|
||||
"groupMaxSize": "100",
|
||||
"maxChildren": "100",
|
||||
"slidingWindowSize": "50",
|
||||
"rootBuilder": [
|
||||
"result",
|
||||
"resultProject_outcome_isProducedBy",
|
||||
"resultResult_publicationDataset_isRelatedTo",
|
||||
"resultResult_similarity_isAmongTopNSimilarDocuments",
|
||||
"resultResult_similarity_hasAmongTopNSimilarDocuments",
|
||||
"resultOrganization_affiliation_isAffiliatedWith",
|
||||
"resultResult_part_hasPart",
|
||||
"resultResult_part_isPartOf",
|
||||
"resultResult_supplement_isSupplementTo",
|
||||
"resultResult_supplement_isSupplementedBy",
|
||||
"resultResult_version_isVersionOf"
|
||||
],
|
||||
"includeChildren": "true",
|
||||
"maxIterations": 20,
|
||||
"idPath": "$.id"
|
||||
},
|
||||
"pace": {
|
||||
"clustering" : [
|
||||
{ "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
|
||||
{ "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid": "0"} }
|
||||
],
|
||||
"decisionTree": {
|
||||
"start": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "pid",
|
||||
"comparator": "jsonListMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {
|
||||
"jpath_value": "$.value",
|
||||
"jpath_classid": "$.qualifier.classid"
|
||||
}
|
||||
},
|
||||
{
|
||||
"field": "pid",
|
||||
"comparator": "jsonListMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {
|
||||
"jpath_value": "$.value",
|
||||
"jpath_classid": "$.qualifier.classid",
|
||||
"crossCompare": "alternateid"
|
||||
}
|
||||
}
|
||||
],
|
||||
"threshold": 0.5,
|
||||
"aggregation": "MAX",
|
||||
"positive": "layer1",
|
||||
"negative": "layer2",
|
||||
"undefined": "layer2",
|
||||
"ignoreUndefined": "true"
|
||||
},
|
||||
"layer1": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "title",
|
||||
"comparator": "levensteinTitle",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "true",
|
||||
"params": {}
|
||||
}
|
||||
],
|
||||
"threshold": 0.9,
|
||||
"aggregation": "AVG",
|
||||
"positive": "MATCH",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "NO_MATCH",
|
||||
"ignoreUndefined": "true"
|
||||
},
|
||||
"layer2": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "title",
|
||||
"comparator": "titleVersionMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {}
|
||||
},
|
||||
{
|
||||
"field": "authors",
|
||||
"comparator": "sizeMatch",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "false",
|
||||
"params": {}
|
||||
}
|
||||
],
|
||||
"threshold": 1.0,
|
||||
"aggregation": "AND",
|
||||
"positive": "layer3",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "layer3",
|
||||
"ignoreUndefined": "false"
|
||||
},
|
||||
"layer3": {
|
||||
"fields": [
|
||||
{
|
||||
"field": "title",
|
||||
"comparator": "levensteinTitle",
|
||||
"weight": 1.0,
|
||||
"countIfUndefined": "true",
|
||||
"params": {}
|
||||
}
|
||||
],
|
||||
"threshold": 0.99,
|
||||
"aggregation": "AVG",
|
||||
"positive": "MATCH",
|
||||
"negative": "NO_MATCH",
|
||||
"undefined": "NO_MATCH",
|
||||
"ignoreUndefined": "true"
|
||||
}
|
||||
},
|
||||
"model": [
|
||||
{
|
||||
"name": "doi",
|
||||
"type": "String",
|
||||
"path": "$.instance.pid[?(@.qualifier.classid == 'doi')].value"
|
||||
},
|
||||
{
|
||||
"name": "altdoi",
|
||||
"type": "String",
|
||||
"path": "$.instance.alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
|
||||
},
|
||||
{
|
||||
"name": "pid",
|
||||
"type": "JSON",
|
||||
"path": "$.instance.pid",
|
||||
"overrideMatch": "true"
|
||||
},
|
||||
{
|
||||
"name": "alternateid",
|
||||
"type": "JSON",
|
||||
"path": "$.instance.alternateIdentifier",
|
||||
"overrideMatch": "true"
|
||||
},
|
||||
{
|
||||
"name": "title",
|
||||
"type": "String",
|
||||
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
|
||||
"length": 250,
|
||||
"size": 5
|
||||
},
|
||||
{
|
||||
"name": "authors",
|
||||
"type": "List",
|
||||
"path": "$.author[*].fullname",
|
||||
"size": 200
|
||||
},
|
||||
{
|
||||
"name": "resulttype",
|
||||
"type": "String",
|
||||
"path": "$.resulttype.classid"
|
||||
}
|
||||
],
|
||||
"blacklists": {
|
||||
"title": [
|
||||
"(?i)^Data Management Plan",
|
||||
"^Inside Front Cover$",
|
||||
"(?i)^Poster presentations$",
|
||||
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
|
||||
"^Problems with perinatal pathology\\.?$",
|
||||
"(?i)^Cases? of Puerperal Convulsions$",
|
||||
"(?i)^Operative Gyna?ecology$",
|
||||
"(?i)^Mind the gap\\!?\\:?$",
|
||||
"^Chronic fatigue syndrome\\.?$",
|
||||
"^Cartas? ao editor Letters? to the Editor$",
|
||||
"^Note from the Editor$",
|
||||
"^Anesthesia Abstract$",
|
||||
"^Annual report$",
|
||||
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
|
||||
"(?i)^Graph and Table of Infectious Diseases?$",
|
||||
"^Presentation$",
|
||||
"(?i)^Reviews and Information on Publications$",
|
||||
"(?i)^PUBLIC HEALTH SERVICES?$",
|
||||
"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
|
||||
"(?i)^Adrese autora$",
|
||||
"(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
|
||||
"(?i)^Acknowledgement to Referees$",
|
||||
"(?i)^Behçet's disease\\.?$",
|
||||
"(?i)^Isolation and identification of restriction endonuclease.*$",
|
||||
"(?i)^CEREBROVASCULAR DISEASES?.?$",
|
||||
"(?i)^Screening for abdominal aortic aneurysms?\\.?$",
|
||||
"^Event management$",
|
||||
"(?i)^Breakfast and Crohn's disease.*\\.?$",
|
||||
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
|
||||
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
|
||||
"^Gushi hakubutsugaku$",
|
||||
"^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
|
||||
"^Intestinal spirocha?etosis$",
|
||||
"^Treatment of Rodent Ulcer$",
|
||||
"(?i)^\\W*Cloud Computing\\W*$",
|
||||
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
|
||||
"^Free Communications, Poster Presentations: Session [A-F]$",
|
||||
"^“The Historical Aspects? of Quackery\\.?”$",
|
||||
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
|
||||
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
|
||||
"(?i)^Case Report$",
|
||||
"^Boletín Informativo$",
|
||||
"(?i)^Glioblastoma Multiforme$",
|
||||
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
|
||||
"^Zaměstnanecké výhody$",
|
||||
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
|
||||
"(?i)^Carotid body tumours?\\.?$",
|
||||
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
|
||||
"^Avant-propos$",
|
||||
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
|
||||
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
|
||||
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
|
||||
"^Viñetas de Cortázar$",
|
||||
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
|
||||
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
|
||||
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
|
||||
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
|
||||
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
|
||||
"^Aus der AGMB$",
|
||||
"^Znanstveno-stručni prilozi$",
|
||||
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
|
||||
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
|
||||
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
|
||||
"^Finanční analýza podniku$",
|
||||
"^Financial analysis( of business)?$",
|
||||
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
|
||||
"^Jikken nihon shūshinsho$",
|
||||
"(?i)^CORONER('|s)(s|') INQUESTS$",
|
||||
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
|
||||
"(?i)^Consultants' contract(s)?$",
|
||||
"(?i)^Upute autorima$",
|
||||
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
|
||||
"^Joshi shin kokubun$",
|
||||
"^Kōtō shōgaku dokuhon nōson'yō$",
|
||||
"^Jinjō shōgaku shōka$",
|
||||
"^Shōgaku shūjichō$",
|
||||
"^Nihon joshi dokuhon$",
|
||||
"^Joshi shin dokuhon$",
|
||||
"^Chūtō kanbun dokuhon$",
|
||||
"^Wabun dokuhon$",
|
||||
"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
|
||||
"(?i)^cardiac rehabilitation$",
|
||||
"(?i)^Analytical summary$",
|
||||
"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
|
||||
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
|
||||
"^Prikazi i osvrti$",
|
||||
"^Rodinný dům s provozovnou$",
|
||||
"^Family house with an establishment$",
|
||||
"^Shinsei chūtō shin kokugun$",
|
||||
"^Pulmonary alveolar proteinosis(\\.?)$",
|
||||
"^Shinshū kanbun$",
|
||||
"^Viñeta(s?) de Rodríguez$",
|
||||
"(?i)^RUBRIKA UREDNIKA$",
|
||||
"^A Matching Model of the Academic Publication Market$",
|
||||
"^Yōgaku kōyō$",
|
||||
"^Internetový marketing$",
|
||||
"^Internet marketing$",
|
||||
"^Chūtō kokugo dokuhon$",
|
||||
"^Kokugo dokuhon$",
|
||||
"^Antibiotic Cover for Dental Extraction(s?)$",
|
||||
"^Strategie podniku$",
|
||||
"^Strategy of an Enterprise$",
|
||||
"(?i)^respiratory disease(s?)(\\.?)$",
|
||||
"^Award(s?) for Gallantry in Civil Defence$",
|
||||
"^Podniková kultura$",
|
||||
"^Corporate Culture$",
|
||||
"^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
|
||||
"^Pracovní motivace$",
|
||||
"^Work Motivation$",
|
||||
"^Kaitei kōtō jogaku dokuhon$",
|
||||
"^Konsolidovaná účetní závěrka$",
|
||||
"^Consolidated Financial Statements$",
|
||||
"(?i)^intracranial tumour(s?)$",
|
||||
"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
|
||||
"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
|
||||
"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
|
||||
"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
|
||||
"^Úroveň motivačního procesu jako způsobu vedení lidí$",
|
||||
"^The level of motivation process as a leadership$",
|
||||
"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
|
||||
"(?i)^news and events$",
|
||||
"(?i)^NOVOSTI I DOGAĐAJI$",
|
||||
"^Sansū no gakushū$",
|
||||
"^Posouzení informačního systému firmy a návrh změn$",
|
||||
"^Information System Assessment and Proposal for ICT Modification$",
|
||||
"^Stresové zatížení pracovníků ve vybrané profesi$",
|
||||
"^Stress load in a specific job$",
|
||||
"^Sunday: Poster Sessions, Pt.*$",
|
||||
"^Monday: Poster Sessions, Pt.*$",
|
||||
"^Wednesday: Poster Sessions, Pt.*",
|
||||
"^Tuesday: Poster Sessions, Pt.*$",
|
||||
"^Analýza reklamy$",
|
||||
"^Analysis of advertising$",
|
||||
"^Shōgaku shūshinsho$",
|
||||
"^Shōgaku sansū$",
|
||||
"^Shintei joshi kokubun$",
|
||||
"^Taishō joshi kokubun dokuhon$",
|
||||
"^Joshi kokubun$",
|
||||
"^Účetní uzávěrka a účetní závěrka v ČR$",
|
||||
"(?i)^The \"?Causes\"? of Cancer$",
|
||||
"^Normas para la publicación de artículos$",
|
||||
"^Editor('|s)(s|') [Rr]eply$",
|
||||
"^Editor(’|s)(s|’) letter$",
|
||||
"^Redaktoriaus žodis$",
|
||||
"^DISCUSSION ON THE PRECEDING PAPER$",
|
||||
"^Kōtō shōgaku shūshinsho jidōyō$",
|
||||
"^Shōgaku nihon rekishi$",
|
||||
"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
|
||||
"^Préface$",
|
||||
"^Occupational [Hh]ealth [Ss]ervices.$",
|
||||
"^In Memoriam Professor Toshiyuki TAKESHIMA$",
|
||||
"^Účetní závěrka ve vybraném podniku.*$",
|
||||
"^Financial statements in selected company$",
|
||||
"^Abdominal [Aa]ortic [Aa]neurysms.*$",
|
||||
"^Pseudomyxoma peritonei$",
|
||||
"^Kazalo autora$",
|
||||
"(?i)^uvodna riječ$",
|
||||
"^Motivace jako způsob vedení lidí$",
|
||||
"^Motivation as a leadership$",
|
||||
"^Polyfunkční dům$",
|
||||
"^Multi\\-funkcional building$",
|
||||
"^Podnikatelský plán$",
|
||||
"(?i)^Podnikatelský záměr$",
|
||||
"(?i)^Business Plan$",
|
||||
"^Oceňování nemovitostí$",
|
||||
"^Marketingová komunikace$",
|
||||
"^Marketing communication$",
|
||||
"^Sumario Analítico$",
|
||||
"^Riječ uredništva$",
|
||||
"^Savjetovanja i priredbe$",
|
||||
"^Índice$",
|
||||
"^(Starobosanski nadpisi).*$",
|
||||
"^Vzdělávání pracovníků v organizaci$",
|
||||
"^Staff training in organization$",
|
||||
"^(Life Histories of North American Geometridae).*$",
|
||||
"^Strategická analýza podniku$",
|
||||
"^Strategic Analysis of an Enterprise$",
|
||||
"^Sadržaj$",
|
||||
"^Upute suradnicima$",
|
||||
"^Rodinný dům$",
|
||||
"(?i)^Fami(l)?ly house$",
|
||||
"^Upute autorima$",
|
||||
"^Strategic Analysis$",
|
||||
"^Finanční analýza vybraného podniku$",
|
||||
"^Finanční analýza$",
|
||||
"^Riječ urednika$",
|
||||
"(?i)^Content(s?)$",
|
||||
"(?i)^Inhalt$",
|
||||
"^Jinjō shōgaku shūshinsho jidōyō$",
|
||||
"(?i)^Index$",
|
||||
"^Chūgaku kokubun kyōkasho$",
|
||||
"^Retrato de una mujer$",
|
||||
"^Retrato de un hombre$",
|
||||
"^Kōtō shōgaku dokuhon$",
|
||||
"^Shotōka kokugo$",
|
||||
"^Shōgaku dokuhon$",
|
||||
"^Jinjō shōgaku kokugo dokuhon$",
|
||||
"^Shinsei kokugo dokuhon$",
|
||||
"^Teikoku dokuhon$",
|
||||
"^Instructions to Authors$",
|
||||
"^KİTAP TAHLİLİ$",
|
||||
"^PRZEGLĄD PIŚMIENNICTWA$",
|
||||
"(?i)^Presentación$",
|
||||
"^İçindekiler$",
|
||||
"(?i)^Tabl?e of contents$",
|
||||
"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
|
||||
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
|
||||
"^Editorial( Board)?$",
|
||||
"(?i)^Editorial \\(English\\)$",
|
||||
"^Editörden$",
|
||||
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
|
||||
"^(Kiri Karl Morgensternile).*$",
|
||||
"^(\\[Eksliibris Aleksandr).*\\]$",
|
||||
"^(\\[Eksliibris Aleksandr).*$",
|
||||
"^(Eksliibris Aleksandr).*$",
|
||||
"^(Kiri A\\. de Vignolles).*$",
|
||||
"^(2 kirja Karl Morgensternile).*$",
|
||||
"^(Pirita kloostri idaosa arheoloogilised).*$",
|
||||
"^(Kiri tundmatule).*$",
|
||||
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
|
||||
"^(Eksliibris Nikolai Birukovile).*$",
|
||||
"^(Eksliibris Nikolai Issakovile).*$",
|
||||
"^(WHP Cruise Summary Information of section).*$",
|
||||
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
|
||||
"^(Measurement of the spin\\-dependent structure function).*",
|
||||
"(?i)^.*authors['’′]? reply\\.?$",
|
||||
"(?i)^.*authors['’′]? response\\.?$"
|
||||
]
|
||||
},
|
||||
"synonyms": {}
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "pid": [], "oaiprovenance": {"originDescription": {"metadataNamespace": "", "harvestDate": "2021-06-10T10:03:36.091Z", "baseURL": "file%3A%2F%2F%2Fvar%2Flib%2Fdnet%2Fdata%2Fsygma%2Fnew_ingestion%2Fcrossref", "datestamp": "", "altered": true, "identifier": ""}}, "relevantdate": [], "contributor": [], "id": "50|sygma_______::3bbb03e6ec8df0d219b2d2165ea1d446", "subject": [], "lastupdatetimestamp": 1628684944004, "author": [{"surname": "Pan", "fullname": "Pan, Mengwu", "pid": [], "name": "Mengwu", "rank": 1}, {"surname": "Blattner", "fullname": "Blattner, Christine", "pid": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "orcid_pending", "classname": "Open Researcher and Contributor ID", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "0000-0002-7250-5273"}], "name": "Christine", "rank": 2}], "collectedfrom": [{"value": "Sygma", "key": "10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9"}], "instance": [{"refereed": {"classid": "UNKNOWN", "classname": "Unknown", "schemename": "dnet:review_levels", "schemeid": "dnet:review_levels"}, "hostedby": {"value": "Cancers", "key": "10|issn__online::69ba871b903253074dcf4054e619afff"}, "license": {"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "value": "https://creativecommons.org/licenses/by/4.0/"}, "url": ["http://dx.doi.org/10.3390/cancers13040745"], "pid": [], "distributionlocation": "", "alternateIdentifier": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemename": "dnet:pid_types", "schemeid": "dnet:pid_types"}, "value": "10.3390/cancers13040745"}], "collectedfrom": {"value": "Sygma", "key": "10|openaire____::a8db6f6b2ce4fe72e8b2314a9a93e7d9"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "instancetype": {"classid": "0001", "classname": "Article", "schemename": "dnet:publication_resource", "schemeid": "dnet:publication_resource"}}], "resulttype": {"classid": "publication", "classname": "publication", "schemename": "dnet:result_typologies", "schemeid": "dnet:result_typologies"}, "dateofcollection": "2021-06-10T10:03:36.091Z", "fulltext": [], "dateoftransformation": "2021-07-20T16:59:21.682Z", "description": [], "format": [], "journal": {"issnPrinted": "", "vol": "13", "dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "name": "Cancers", "iss": "4", "sp": "745", "edition": "", "issnOnline": "2072-6694", "ep": "", "issnLinking": ""}, "coverage": [], "externalReference": [], "language": {"classid": "eng", "classname": "English", "schemename": "dnet:languages", "schemeid": "dnet:languages"}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemename": "dnet:access_modes", "schemeid": "dnet:access_modes"}, "country": [], "extraInfo": [], "originalId": ["10.3390/cancers13040745", "50|sygma_______::3bbb03e6ec8df0d219b2d2165ea1d446"], "source": [], "context": [], "title": [{"dataInfo": {"deletedbyinference": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "sysimport:crosswalk:repository", "schemename": "dnet:provenanceActions", "schemeid": "dnet:provenanceActions"}, "inferred": false, "inferenceprovenance": "", "invisible": false, "trust": "0.9"}, "qualifier": {"classid": "main title", "classname": "main title", "schemename": "dnet:dataCite_title", "schemeid": "dnet:dataCite_title"}, "value": "Regulation of p53 by E3s"}]}
|
18
pom.xml
18
pom.xml
|
@ -228,7 +228,8 @@
|
|||
<google.guava.version>15.0</google.guava.version>
|
||||
|
||||
<spark.version>2.2.0</spark.version>
|
||||
<jackson.version>2.9.6</jackson.version>
|
||||
<!--<jackson.version>2.9.6</jackson.version>-->
|
||||
<jackson.version>2.6.5</jackson.version>
|
||||
<mockito-core.version>3.3.3</mockito-core.version>
|
||||
|
||||
<commons.lang.version>3.5</commons.lang.version>
|
||||
|
@ -260,7 +261,7 @@
|
|||
<oozie.use.system.libpath>true</oozie.use.system.libpath>
|
||||
<properties.maven.plugin.version>2.0.1</properties.maven.plugin.version>
|
||||
<junit-jupiter.version>5.6.1</junit-jupiter.version>
|
||||
<maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>../dhp-build/dhp-build-assembly-resources/target/dhp-build-assembly-resources-4.0.6-SNAPSHOT.jar</maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>
|
||||
<maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>../dhp-build/dhp-build-assembly-resources/target/dhp-build-assembly-resources-4.1.8-SNAPSHOT.jar</maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path>
|
||||
|
||||
</properties>
|
||||
|
||||
|
@ -409,6 +410,19 @@
|
|||
<version>2.4.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<version>3.3.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<version>3.3.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
#release configuration
|
||||
#Tue Sep 29 12:04:49 CEST 2020
|
||||
scm.tagNameFormat=@{project.artifactId}-@{project.version}
|
||||
pushChanges=true
|
||||
scm.url=scm\:git\:https\://code-repo.d4science.org/D-Net/dnet-dedup.git
|
||||
preparationGoals=clean verify
|
||||
projectVersionPolicyId=default
|
||||
remoteTagging=true
|
||||
scm.commentPrefix=[maven-release-plugin]
|
||||
exec.snapshotReleasePluginAllowed=false
|
||||
completedPhase=check-poms
|
Loading…
Reference in New Issue