merged JqMapping branch into tree2
This commit is contained in:
commit
d09193a094
|
@ -1,43 +1,21 @@
|
|||
*~
|
||||
|
||||
# Compiled class file
|
||||
*.class
|
||||
|
||||
# Log file
|
||||
*.log
|
||||
|
||||
# BlueJ files
|
||||
*.ctxt
|
||||
|
||||
# Mobile Tools for Java (J2ME)
|
||||
.mtj.tmp/
|
||||
|
||||
|
||||
|
||||
*target
|
||||
|
||||
# Package Files #
|
||||
*.jar
|
||||
*.war
|
||||
*.nar
|
||||
*.ear
|
||||
*.zip
|
||||
*.tar.gz
|
||||
*.rar
|
||||
|
||||
|
||||
*.idea
|
||||
*.iml
|
||||
|
||||
.DS_Store
|
||||
**/.DS_Store
|
||||
|
||||
.project
|
||||
.idea
|
||||
*.iml
|
||||
*~
|
||||
.classpath
|
||||
/*/.classpath
|
||||
/*/*/.classpath
|
||||
.metadata
|
||||
/*/.metadata
|
||||
/*/*/.metadata
|
||||
.project
|
||||
.log
|
||||
.settings
|
||||
**/.project
|
||||
**/.classpath
|
||||
**/.settings
|
||||
|
||||
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
||||
hs_err_pid*
|
||||
/*/*/target
|
||||
/*/target
|
||||
/target
|
||||
/*/*/build
|
||||
/*/build
|
||||
/build
|
||||
spark-warehouse
|
||||
/dhp-workflows/dhp-graph-mapper/job-override.properties
|
||||
|
|
|
@ -15,32 +15,31 @@
|
|||
|
||||
<build>
|
||||
<plugins>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>2.4.3</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<!-- <plugin>-->
|
||||
<!-- <groupId>org.apache.maven.plugins</groupId>-->
|
||||
<!-- <artifactId>maven-shade-plugin</artifactId>-->
|
||||
<!-- <version>2.4.3</version>-->
|
||||
<!-- <executions>-->
|
||||
<!-- <execution>-->
|
||||
<!-- <phase>package</phase>-->
|
||||
<!-- <goals>-->
|
||||
<!-- <goal>shade</goal>-->
|
||||
<!-- </goals>-->
|
||||
<!-- <configuration>-->
|
||||
<!-- <filters>-->
|
||||
<!-- <filter>-->
|
||||
<!-- <artifact>*:*</artifact>-->
|
||||
<!-- <excludes>-->
|
||||
<!-- <exclude>META-INF/*.SF</exclude>-->
|
||||
<!-- <exclude>META-INF/*.DSA</exclude>-->
|
||||
<!-- <exclude>META-INF/*.RSA</exclude>-->
|
||||
<!-- </excludes>-->
|
||||
<!-- </filter>-->
|
||||
<!-- </filters>-->
|
||||
<!-- </configuration>-->
|
||||
<!-- </execution>-->
|
||||
<!-- </executions>-->
|
||||
<!-- </plugin>-->
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
|
@ -114,10 +113,6 @@
|
|||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaire-data-protos</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
@ -133,12 +128,6 @@
|
|||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaireplus-mapping-utils</artifactId>
|
||||
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
|
@ -150,12 +139,6 @@
|
|||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.oozie</groupId>
|
||||
<artifactId>oozie-client</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
|
|
|
@ -4,7 +4,7 @@ import eu.dnetlib.graph.GraphProcessor;
|
|||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.BlockProcessor;
|
||||
import eu.dnetlib.pace.utils.PaceUtils;
|
||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||
import eu.dnetlib.pace.utils.Utility;
|
||||
import eu.dnetlib.reporter.SparkReporter;
|
||||
import eu.dnetlib.support.ConnectedComponent;
|
||||
|
@ -99,7 +99,7 @@ public class Deduper implements Serializable {
|
|||
*/
|
||||
public static JavaPairRDD<String, MapDocument> mapToVertexes(JavaSparkContext context, JavaRDD<String> entities, DedupConfig config){
|
||||
return entities.mapToPair(it -> {
|
||||
MapDocument mapDocument = PaceUtils.asMapDocument(config, it);
|
||||
MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(config, it);
|
||||
return new Tuple2<>(mapDocument.getIdentifier(), mapDocument);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1,92 +0,0 @@
|
|||
package eu.dnetlib.pace.utils;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.googlecode.protobuf.format.JsonFormat;
|
||||
import eu.dnetlib.data.proto.OafProtos;
|
||||
import eu.dnetlib.data.proto.ResultProtos;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
|
||||
import org.apache.commons.lang3.RandomStringUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import static eu.dnetlib.proto.utils.OAFProtoUtils.*;
|
||||
import static eu.dnetlib.proto.utils.OAFProtoUtils.author;
|
||||
import static eu.dnetlib.proto.utils.OAFProtoUtils.sp;
|
||||
|
||||
public class PaceUtils implements Serializable {
|
||||
|
||||
public static MapDocument result(final Config config, final String id, final String title) {
|
||||
return result(config, id, title, null, new ArrayList<>(), null);
|
||||
}
|
||||
|
||||
public static MapDocument result(final Config config, final String id, final String title, final String date) {
|
||||
return result(config, id, title, date, new ArrayList<>(), null);
|
||||
}
|
||||
|
||||
public static MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid) {
|
||||
return result(config, id, title, date, pid, null);
|
||||
}
|
||||
|
||||
public static MapDocument result(final Config config, final String id, final String title, final String date, final String pid) {
|
||||
return result(config, id, title, date, pid, null);
|
||||
}
|
||||
|
||||
public static MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) {
|
||||
return result(config, id, title, date, Lists.newArrayList(pid), authors);
|
||||
}
|
||||
|
||||
public static MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid, final List<String> authors) {
|
||||
final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
|
||||
if (!StringUtils.isBlank(title)) {
|
||||
metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles")));
|
||||
metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles")));
|
||||
}
|
||||
if (!StringUtils.isBlank(date)) {
|
||||
metadata.setDateofacceptance(sf(date));
|
||||
}
|
||||
|
||||
final OafProtos.OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result);
|
||||
final ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder().setMetadata(metadata);
|
||||
|
||||
if (authors != null) {
|
||||
result.getMetadataBuilder().addAllAuthor(
|
||||
IntStream.range(0, authors.size())
|
||||
.mapToObj(i -> author(authors.get(i), i))
|
||||
.collect(Collectors.toCollection(LinkedList::new)));
|
||||
}
|
||||
|
||||
entity.setResult(result);
|
||||
|
||||
if (pid != null) {
|
||||
for (String p : pid) {
|
||||
if (!StringUtils.isBlank(p)) {
|
||||
entity.addPid(sp(p, "doi"));
|
||||
//entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final OafProtos.OafEntity build = entity.build();
|
||||
return ProtoDocumentBuilder.newInstance(id, build, config.model());
|
||||
}
|
||||
|
||||
public static MapDocument asMapDocument(DedupConfig conf, final String json) {
|
||||
OafProtos.OafEntity.Builder b = OafProtos.OafEntity.newBuilder();
|
||||
try {
|
||||
JsonFormat.merge(json, b);
|
||||
} catch (JsonFormat.ParseException e) {
|
||||
System.out.println("**************************** " + json);
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
return ProtoDocumentBuilder.newInstance(b.getId(), b.build(), conf.getPace().getModel());
|
||||
}
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
package eu.dnetlib.proto.utils;
|
||||
|
||||
import eu.dnetlib.data.proto.FieldTypeProtos;
|
||||
import eu.dnetlib.data.proto.OafProtos;
|
||||
|
||||
public class OAFProtoUtils {
|
||||
|
||||
|
||||
public static FieldTypeProtos.Author author(final String s, int rank) {
|
||||
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(s, false);
|
||||
final FieldTypeProtos.Author.Builder author = FieldTypeProtos.Author.newBuilder();
|
||||
if (p.isAccurate()) {
|
||||
author.setName(p.getNormalisedFirstName());
|
||||
author.setSurname(p.getNormalisedSurname());
|
||||
}
|
||||
author.setFullname(p.getNormalisedFullname());
|
||||
author.setRank(rank);
|
||||
|
||||
return author.build();
|
||||
}
|
||||
|
||||
public static FieldTypeProtos.StructuredProperty sp(final String pid, final String type) {
|
||||
FieldTypeProtos.StructuredProperty.Builder pidSp = FieldTypeProtos.StructuredProperty.newBuilder().setValue(pid)
|
||||
.setQualifier(FieldTypeProtos.Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types"));
|
||||
return pidSp.build();
|
||||
}
|
||||
|
||||
public static FieldTypeProtos.StringField.Builder sf(final String s) { return FieldTypeProtos.StringField.newBuilder().setValue(s); }
|
||||
|
||||
public static FieldTypeProtos.StructuredProperty.Builder getStruct(final String value, final FieldTypeProtos.Qualifier.Builder qualifier) {
|
||||
return FieldTypeProtos.StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
|
||||
}
|
||||
|
||||
public static FieldTypeProtos.Qualifier.Builder getQualifier(final String classname, final String schemename) {
|
||||
return FieldTypeProtos.Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename);
|
||||
}
|
||||
|
||||
public static OafProtos.OafEntity.Builder oafEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) {
|
||||
final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setId(id).setType(type);
|
||||
return entity;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,208 +0,0 @@
|
|||
package eu.dnetlib.pace;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
|
||||
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
|
||||
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
|
||||
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder;
|
||||
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
||||
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
||||
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
|
||||
import eu.dnetlib.data.proto.ResultProtos.Result;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.model.*;
|
||||
import eu.dnetlib.pace.model.gt.GTAuthor;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang.RandomStringUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.RandomUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public abstract class AbstractProtoPaceTest extends OafTest {
|
||||
|
||||
protected DedupConfig getOrganizationCurrentConf() {
|
||||
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf"));
|
||||
}
|
||||
|
||||
protected DedupConfig getOrganizationTestConf() {
|
||||
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.test.conf"));
|
||||
}
|
||||
|
||||
protected MapDocument author(final Config conf, final String id, final Oaf oaf) {
|
||||
return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model());
|
||||
}
|
||||
|
||||
protected GTAuthor getGTAuthor(final String path) {
|
||||
|
||||
final Gson gson = new Gson();
|
||||
|
||||
final String json = readFromClasspath(path);
|
||||
|
||||
final GTAuthor gta = gson.fromJson(json, GTAuthor.class);
|
||||
|
||||
return gta;
|
||||
}
|
||||
|
||||
protected String readFromClasspath(final String filename) {
|
||||
final StringWriter sw = new StringWriter();
|
||||
try {
|
||||
IOUtils.copy(getClass().getResourceAsStream(filename), sw);
|
||||
return sw.toString();
|
||||
} catch (final IOException e) {
|
||||
throw new RuntimeException("cannot load resource from classpath: " + filename);
|
||||
}
|
||||
}
|
||||
|
||||
protected MapDocument result(final Config config, final String id, final String title) {
|
||||
return result(config, id, title, null, new ArrayList<>(), null);
|
||||
}
|
||||
|
||||
protected MapDocument result(final Config config, final String id, final String title, final String date) {
|
||||
return result(config, id, title, date, new ArrayList<>(), null);
|
||||
}
|
||||
|
||||
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid) {
|
||||
return result(config, id, title, date, pid, null);
|
||||
}
|
||||
|
||||
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) {
|
||||
return result(config, id, title, date, pid, null);
|
||||
}
|
||||
|
||||
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) {
|
||||
return result(config, id, title, date, Lists.newArrayList(pid), authors);
|
||||
}
|
||||
|
||||
protected MapDocument author(final String identifier, final String area, final String firstname, final String lastname, final String fullname, final Double[] topics, final String pubID, final String pubDOI, final int rank, final String orcid, final List<String> coauthors) {
|
||||
Map<String, Field> fieldMap = new HashMap<>();
|
||||
|
||||
fieldMap.put("area", new FieldValueImpl(Type.String, "area", area));
|
||||
fieldMap.put("firstname", new FieldValueImpl(Type.String, "firstname", firstname));
|
||||
fieldMap.put("lastname", new FieldValueImpl(Type.String, "lastname", lastname));
|
||||
fieldMap.put("fullname", new FieldValueImpl(Type.String, "fullname", fullname));
|
||||
fieldMap.put("pubID", new FieldValueImpl(Type.String, "pubID", pubID));
|
||||
fieldMap.put("pubDOI", new FieldValueImpl(Type.String, "pubDOI", pubDOI));
|
||||
fieldMap.put("rank", new FieldValueImpl(Type.Int, "rank", rank));
|
||||
fieldMap.put("orcid", new FieldValueImpl(Type.String, "orcid", orcid));
|
||||
|
||||
FieldListImpl ca = new FieldListImpl("coauthors", Type.String);
|
||||
ca.addAll(coauthors.stream().map(s -> new FieldValueImpl(Type.String, "coauthors", s)).collect(Collectors.toList()));
|
||||
fieldMap.put("coauthors", ca);
|
||||
|
||||
FieldListImpl t = new FieldListImpl("topics", Type.String);
|
||||
t.addAll(Arrays.asList(topics).stream().map(d -> new FieldValueImpl(Type.String, "topics", d.toString())).collect(Collectors.toList()));
|
||||
fieldMap.put("topics", t);
|
||||
|
||||
return new MapDocument(identifier, fieldMap);
|
||||
}
|
||||
|
||||
static List<String> pidTypes = Lists.newArrayList();
|
||||
static {
|
||||
pidTypes.add("doi");
|
||||
//pidTypes.add("oai");
|
||||
//pidTypes.add("pmid");
|
||||
}
|
||||
|
||||
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid, final List<String> authors) {
|
||||
final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
|
||||
if (!StringUtils.isBlank(title)) {
|
||||
metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles")));
|
||||
metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles")));
|
||||
}
|
||||
if (!StringUtils.isBlank(date)) {
|
||||
metadata.setDateofacceptance(sf(date));
|
||||
}
|
||||
|
||||
final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result);
|
||||
final Result.Builder result = Result.newBuilder().setMetadata(metadata);
|
||||
|
||||
if (authors != null) {
|
||||
result.getMetadataBuilder().addAllAuthor(
|
||||
IntStream.range(0, authors.size())
|
||||
.mapToObj(i -> author(authors.get(i), i))
|
||||
.collect(Collectors.toCollection(LinkedList::new)));
|
||||
}
|
||||
|
||||
entity.setResult(result);
|
||||
|
||||
if (pid != null) {
|
||||
for(String p : pid) {
|
||||
if (!StringUtils.isBlank(p)) {
|
||||
entity.addPid(sp(p, pidTypes.get(RandomUtils.nextInt(0, pidTypes.size() - 1))));
|
||||
//entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final OafEntity build = entity.build();
|
||||
return ProtoDocumentBuilder.newInstance(id, build, config.model());
|
||||
}
|
||||
|
||||
private Author author(final String s, int rank) {
|
||||
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(s, false);
|
||||
final Author.Builder author = Author.newBuilder();
|
||||
if (p.isAccurate()) {
|
||||
author.setName(p.getNormalisedFirstName());
|
||||
author.setSurname(p.getNormalisedSurname());
|
||||
}
|
||||
author.setFullname(p.getNormalisedFullname());
|
||||
author.setRank(rank);
|
||||
|
||||
return author.build();
|
||||
}
|
||||
|
||||
private OafEntity.Builder oafEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) {
|
||||
final OafEntity.Builder entity = OafEntity.newBuilder().setId(id).setType(type);
|
||||
return entity;
|
||||
}
|
||||
|
||||
protected MapDocument organization(final Config config, final String id, final String legalName) {
|
||||
return organization(config, id, legalName, null);
|
||||
}
|
||||
|
||||
protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) {
|
||||
final Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder();
|
||||
if (legalName != null) {
|
||||
metadata.setLegalname(sf(legalName));
|
||||
}
|
||||
if (legalShortName != null) {
|
||||
metadata.setLegalshortname(sf(legalShortName));
|
||||
}
|
||||
|
||||
final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result);
|
||||
entity.setOrganization(Organization.newBuilder().setMetadata(metadata));
|
||||
|
||||
return ProtoDocumentBuilder.newInstance(id, entity.build(), config.model());
|
||||
}
|
||||
|
||||
private StructuredProperty sp(final String pid, final String type) {
|
||||
final Builder pidSp =
|
||||
StructuredProperty.newBuilder().setValue(pid)
|
||||
.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types"));
|
||||
return pidSp.build();
|
||||
}
|
||||
|
||||
protected Field title(final String s) {
|
||||
return new FieldValueImpl(Type.String, "title", s);
|
||||
}
|
||||
|
||||
protected static Builder getStruct(final String value, final Qualifier.Builder qualifier) {
|
||||
return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
|
||||
}
|
||||
|
||||
/*
|
||||
* protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); }
|
||||
*
|
||||
* protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return
|
||||
* Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); }
|
||||
*/
|
||||
|
||||
}
|
|
@ -5,7 +5,7 @@ import eu.dnetlib.pace.config.DedupConfig;
|
|||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.tree.support.TreeProcessor;
|
||||
import eu.dnetlib.pace.tree.support.TreeStats;
|
||||
import eu.dnetlib.pace.utils.PaceUtils;
|
||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||
import eu.dnetlib.pace.utils.Utility;
|
||||
import eu.dnetlib.support.ConnectedComponent;
|
||||
import org.apache.spark.api.java.JavaPairRDD;
|
||||
|
@ -13,7 +13,6 @@ import org.apache.spark.api.java.JavaRDD;
|
|||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import scala.Tuple2;
|
||||
|
||||
|
@ -31,7 +30,7 @@ public class DedupLocalTest extends DedupTestUtils {
|
|||
@Before
|
||||
public void setup() {
|
||||
|
||||
config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/config/organization.strict.conf", DedupLocalTest.class));
|
||||
config = DedupConfig.load(Utility.readFromClasspath("/eu/dnetlib/pace/config/organization.strict.conf.json", DedupLocalTest.class));
|
||||
treeProcessor = new TreeProcessor(config);
|
||||
|
||||
final SparkSession spark = SparkSession
|
||||
|
@ -45,7 +44,6 @@ public class DedupLocalTest extends DedupTestUtils {
|
|||
|
||||
}
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void dedupTest(){
|
||||
|
||||
|
@ -59,7 +57,6 @@ public class DedupLocalTest extends DedupTestUtils {
|
|||
|
||||
}
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void relationsTest() {
|
||||
|
||||
|
@ -115,15 +112,15 @@ public class DedupLocalTest extends DedupTestUtils {
|
|||
|
||||
}
|
||||
|
||||
@Ignore
|
||||
|
||||
@Test
|
||||
public void matchTest(){
|
||||
|
||||
String JSONEntity1 = "{\"dateoftransformation\":\"2018-06-04\",\"originalId\":[\"opendoar____::Universiti_Sains_Malaysia\"],\"collectedfrom\":[{\"value\":\"OpenDOAR\",\"key\":\"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb\"}],\"organization\":{\"metadata\":{\"eclegalbody\":{\"value\":\"false\"},\"eclegalperson\":{\"value\":\"false\"},\"ecinternationalorganization\":{\"value\":\"false\"},\"ecresearchorganization\":{\"value\":\"false\"},\"ecnonprofit\":{\"value\":\"false\"},\"ecenterprise\":{\"value\":\"false\"},\"websiteurl\":{\"value\":\"http://www.usm.my/my/\"},\"ecnutscode\":{\"value\":\"false\"},\"ecinternationalorganizationeurinterests\":{\"value\":\"false\"},\"legalname\":{\"value\":\"Universiti Sains Malaysia\"},\"country\":{\"classid\":\"MY\",\"classname\":\"Malaysia\",\"schemename\":\"dnet:countries\",\"schemeid\":\"dnet:countries\"},\"echighereducation\":{\"value\":\"false\"},\"ecsmevalidated\":{\"value\":\"false\"}}},\"dateofcollection\":\"2015-08-24\",\"type\":20,\"id\":\"20|opendoar____::04315c25b0eb56eacb967901557f86b1\"}";
|
||||
String JSONEntity2 = "{\"dateoftransformation\":\"2019-10-07\",\"originalId\":[\"corda_______::997941627\"],\"collectedfrom\":[{\"value\":\"CORDA - COmmon Research DAta Warehouse\",\"key\":\"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f\"}],\"organization\":{\"metadata\":{\"eclegalbody\":{\"value\":\"true\"},\"eclegalperson\":{\"value\":\"true\"},\"ecinternationalorganization\":{\"value\":\"false\"},\"legalshortname\":{\"value\":\"USM\"},\"ecresearchorganization\":{\"value\":\"true\"},\"ecnonprofit\":{\"value\":\"true\"},\"ecenterprise\":{\"value\":\"false\"},\"websiteurl\":{\"value\":\"http://www.usm.my/my\"},\"ecnutscode\":{\"value\":\"false\"},\"ecinternationalorganizationeurinterests\":{\"value\":\"false\"},\"legalname\":{\"value\":\"UNIVERSITI SAINS MALAYSIA*\"},\"country\":{\"classid\":\"MY\",\"classname\":\"Malaysia\",\"schemename\":\"dnet:countries\",\"schemeid\":\"dnet:countries\"},\"echighereducation\":{\"value\":\"true\"}}},\"dateofcollection\":\"2015-09-10\",\"type\":20,\"id\":\"20|corda_______::1fb0c86ddf389377454d5520d2796dad\"}";
|
||||
|
||||
MapDocument mapDoc1 = PaceUtils.asMapDocument(config, JSONEntity1);
|
||||
MapDocument mapDoc2 = PaceUtils.asMapDocument(config, JSONEntity2);
|
||||
MapDocument mapDoc1 = MapDocumentUtil.asMapDocumentWithJPath(config, JSONEntity1);
|
||||
MapDocument mapDoc2 = MapDocumentUtil.asMapDocumentWithJPath(config, JSONEntity2);
|
||||
|
||||
TreeStats treeStats = treeProcessor.evaluateTree(mapDoc1, mapDoc2);
|
||||
|
||||
|
@ -131,12 +128,12 @@ public class DedupLocalTest extends DedupTestUtils {
|
|||
|
||||
}
|
||||
|
||||
@Ignore
|
||||
|
||||
@Test
|
||||
public void parseJSONEntityTest(){
|
||||
String jsonEntity = "{\"dateoftransformation\":\"2018-09-19\",\"originalId\":[\"doajarticles::Sociedade_Brasileira_de_Reumatologia\"],\"collectedfrom\":[{\"value\":\"DOAJ-Articles\",\"key\":\"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824\"}],\"organization\":{\"metadata\":{\"eclegalbody\":{\"value\":\"false\"},\"eclegalperson\":{\"value\":\"false\"},\"ecinternationalorganization\":{\"value\":\"false\"},\"legalshortname\":{\"value\":\"Sociedade Brasileira de Reumatologia\"},\"ecresearchorganization\":{\"value\":\"false\"},\"ecnonprofit\":{\"value\":\"false\"},\"ecenterprise\":{\"value\":\"false\"},\"ecnutscode\":{\"value\":\"false\"},\"ecinternationalorganizationeurinterests\":{\"value\":\"false\"},\"legalname\":{\"value\":\"Sociedade Brasileira de Reumatologia\"},\"country\":{\"classid\":\"BR\",\"classname\":\"Brazil\",\"schemename\":\"dnet:countries\",\"schemeid\":\"dnet:countries\"},\"echighereducation\":{\"value\":\"false\"},\"ecsmevalidated\":{\"value\":\"false\"}}},\"dateofcollection\":\"2018-09-19\",\"type\":20,\"id\":\"20|doajarticles::0019ba7a22c5bc733c3206bde28ff568\"}";
|
||||
|
||||
MapDocument mapDocument = PaceUtils.asMapDocument(config, jsonEntity);
|
||||
MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(config, jsonEntity);
|
||||
|
||||
System.out.println("mapDocument = " + mapDocument);
|
||||
}
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
package eu.dnetlib.pace;
|
||||
|
||||
import org.apache.oozie.client.OozieClient;
|
||||
import org.apache.oozie.client.OozieClientException;
|
||||
import org.apache.oozie.client.WorkflowJob;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
public class DedupTestIT {
|
||||
|
||||
@Ignore
|
||||
@Test
|
||||
public void deduplicationTest() throws OozieClientException, InterruptedException {
|
||||
|
||||
//read properties to use in the oozie workflow
|
||||
Properties prop = readProperties("/eu/dnetlib/test/properties/config.properties");
|
||||
|
||||
/*OOZIE WORKFLOW CREATION AND LAUNCH*/
|
||||
// get a OozieClient for local Oozie
|
||||
OozieClient wc = new OozieClient("http://hadoop-edge3.garr-pa1.d4science.org:11000/oozie");
|
||||
|
||||
// create a workflow job configuration and set the workflow application path
|
||||
Properties conf = wc.createConfiguration();
|
||||
conf.setProperty(OozieClient.APP_PATH, "hdfs://hadoop-rm1.garr-pa1.d4science.org:8020/user/michele.debonis/oozieJob/workflow.xml");
|
||||
conf.setProperty(OozieClient.USER_NAME, "michele.debonis");
|
||||
conf.setProperty("oozie.action.sharelib.for.spark", "spark2");
|
||||
conf.setProperty("oozie.use.system.libpath", "true");
|
||||
|
||||
// setting workflow parameters
|
||||
conf.setProperty("jobTracker", "hadoop-rm3.garr-pa1.d4science.org:8032");
|
||||
conf.setProperty("nameNode", "hdfs://hadoop-rm1.garr-pa1.d4science.org:8020");
|
||||
conf.setProperty("dedupConfiguration", prop.getProperty("dedup.configuration"));
|
||||
conf.setProperty("inputSpace", prop.getProperty("input.space"));
|
||||
conf.setProperty("outputPath", prop.getProperty("output"));
|
||||
conf.setProperty("statisticsPath", prop.getProperty("dedup.statistics"));
|
||||
|
||||
// submit and start the workflow job
|
||||
String jobId = wc.run(conf);
|
||||
System.out.println("Workflow job submitted");
|
||||
|
||||
// wait until the workflow job finishes printing the status every 10 seconds
|
||||
while (wc.getJobInfo(jobId).getStatus() == WorkflowJob.Status.RUNNING) {
|
||||
System.out.println(wc.getJobInfo(jobId));;
|
||||
Thread.sleep(10 * 1000);
|
||||
}
|
||||
|
||||
// print the final status of the workflow job
|
||||
System.out.println(wc.getJobInfo(jobId));
|
||||
// System.out.println("JOB LOG = " + wc.getJobLog(jobId));
|
||||
|
||||
assertEquals(WorkflowJob.Status.SUCCEEDED, wc.getJobInfo(jobId).getStatus());
|
||||
|
||||
}
|
||||
|
||||
static Properties readProperties(final String propFile) {
|
||||
|
||||
Properties prop = new Properties();
|
||||
try {
|
||||
prop.load(DedupTestIT.class.getResourceAsStream(propFile));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return prop;
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -1,41 +1,39 @@
|
|||
package eu.dnetlib.pace.clustering;
|
||||
|
||||
import eu.dnetlib.pace.AbstractProtoPaceTest;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.model.FieldListImpl;
|
||||
import eu.dnetlib.pace.model.FieldValueImpl;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class ClusteringCombinerTest extends AbstractProtoPaceTest {
|
||||
public class ClusteringCombinerTest {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ClusteringCombinerTest.class);
|
||||
|
||||
private Config config;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
config = getOrganizationTestConf();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombine() {
|
||||
|
||||
final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
|
||||
log.info("University of Turin");
|
||||
log.info(ClusteringCombiner.combine(organization, config));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombineBlacklistAware() {
|
||||
|
||||
final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
|
||||
log.info("University of Turin");
|
||||
log.info(BlacklistAwareClusteringCombiner.filterAndCombine(organization, config));
|
||||
}
|
||||
// TODO RE IMPLEMENT Tests with the new configuration
|
||||
// private static final Log log = LogFactory.getLog(ClusteringCombinerTest.class);
|
||||
//
|
||||
// private Config config;
|
||||
//
|
||||
// @Before
|
||||
// public void setUp() {
|
||||
// config = getOrganizationTestConf();
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testCombine() {
|
||||
//
|
||||
// final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
|
||||
// log.info("University of Turin");
|
||||
// log.info(ClusteringCombiner.combine(organization, config));
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testCombineBlacklistAware() {
|
||||
//
|
||||
// final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
|
||||
// log.info("University of Turin");
|
||||
// log.info(BlacklistAwareClusteringCombiner.filterAndCombine(organization, config));
|
||||
// }
|
||||
|
||||
}
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.collect.Sets.SetView;
|
||||
import eu.dnetlib.pace.AbstractProtoPaceTest;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class ProtoDocumentBuilderTest extends AbstractProtoPaceTest {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ProtoDocumentBuilderTest.class);
|
||||
|
||||
@Test
|
||||
public void test_serialise1() {
|
||||
|
||||
final String id = "12345";
|
||||
|
||||
final Config config = getOrganizationTestConf();
|
||||
|
||||
final MapDocument document = ProtoDocumentBuilder.newInstance(id, getOrganization(id), config.model());
|
||||
|
||||
assertFalse(document.fieldNames().isEmpty());
|
||||
assertFalse(Iterables.isEmpty(document.fields()));
|
||||
|
||||
log.info("original:\n" + document);
|
||||
|
||||
final String stringDoc = MapDocumentSerializer.toString(document);
|
||||
|
||||
log.info("serialization:\n" + stringDoc);
|
||||
|
||||
final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes());
|
||||
|
||||
final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames());
|
||||
|
||||
assertTrue(diff.isEmpty());
|
||||
|
||||
log.info("decoded:\n" + decoded);
|
||||
}
|
||||
|
||||
}
|
|
@ -7,6 +7,7 @@
|
|||
"queueMaxSize" : "2000",
|
||||
"groupMaxSize" : "50",
|
||||
"slidingWindowSize" : "200",
|
||||
"idPath":"$.id",
|
||||
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
|
||||
"includeChildren" : "true",
|
||||
"maxIterations": "20"
|
||||
|
@ -185,12 +186,12 @@
|
|||
}
|
||||
},
|
||||
"model" : [
|
||||
{ "name" : "country", "type" : "String", "path" : "organization/metadata/country/classid"},
|
||||
{ "name" : "legalshortname", "type" : "String", "path" : "organization/metadata/legalshortname/value"},
|
||||
{ "name" : "legalname", "type" : "String", "path" : "organization/metadata/legalname/value" },
|
||||
{ "name" : "websiteurl", "type" : "URL", "path" : "organization/metadata/websiteurl/value" },
|
||||
{ "name" : "gridid", "type" : "String", "path" : "pid[qualifier#classid = {grid}]/value"},
|
||||
{ "name" : "originalId", "type" : "String", "path" : "id" }
|
||||
{ "name" : "country", "type" : "String", "path" : "$.organization.metadata.country.classid"},
|
||||
{ "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"},
|
||||
{ "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" },
|
||||
{ "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" },
|
||||
{ "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid.ac')].value"},
|
||||
{ "name" : "originalId", "type" : "String", "path" : "$.id" }
|
||||
],
|
||||
"blacklists" : {
|
||||
"legalname" : []
|
||||
|
@ -301,7 +302,7 @@
|
|||
"key::103": ["forschungsgemeinschaft","comunita ricerca","research community","research foundation","research association"],
|
||||
"key::104": ["commerce","ticaret","ticarət","commercio","trade","handel","comercio"],
|
||||
"key::105" : ["state", "stato", "etade", "estado", "statale", "etat", "zustand", "estado"],
|
||||
"key::106" : ["seminary", "seminario", "seminaire", "seminar"]
|
||||
"key::106" : ["seminary", "seminario", "seminaire", "seminar"],
|
||||
"key::107" : ["agricultural forestry", "af", "a f", "a&f"],
|
||||
"key::108" : ["agricultural mechanical", "am", "a m", "a&m"]
|
||||
}
|
|
@ -7,6 +7,7 @@
|
|||
"queueMaxSize" : "2000",
|
||||
"groupMaxSize" : "50",
|
||||
"slidingWindowSize" : "200",
|
||||
"idPath":"$.id",
|
||||
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
|
||||
"includeChildren" : "true",
|
||||
"maxIterations": "20"
|
||||
|
@ -24,11 +25,13 @@
|
|||
"layer3": {"fields": [{"field":"legalname", "comparator":"jaroWinklerNormalizedName", "weight":0.9, "countIfUndefined":"false", "params":{"windowSize" : 4, "threshold" : 0.7}}, {"field":"legalshortname", "comparator":"jaroWinklerNormalizedName", "weight":0.1, "countIfUndefined":"true", "params":{}}], "threshold": 0.9, "aggregation": "W_MEAN", "positive":"MATCH", "negative":"NO_MATCH", "undefined":"NO_MATCH", "ignoreUndefined": "true"}
|
||||
},
|
||||
"model" : [
|
||||
{ "name" : "country", "type" : "String", "path" : "organization/metadata/country/classid"},
|
||||
{ "name" : "legalshortname", "type" : "String", "path" : "organization/metadata/legalshortname/value"},
|
||||
{ "name" : "legalname", "type" : "String", "path" : "organization/metadata/legalname/value" },
|
||||
{ "name" : "websiteurl", "type" : "URL", "path" : "organization/metadata/websiteurl/value" },
|
||||
{ "name" : "gridid", "type" : "String", "path" : "pid[qualifier#classid = {grid}]/value"}
|
||||
|
||||
{ "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : ".organization.metadata.country.classid" },
|
||||
{ "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "true", "path" : ".organization.metadata.legalshortname.value" },
|
||||
{ "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : ".organization.metadata.legalname.value", "params" : {"windowSize" : 4, "threshold" : 0.7} },
|
||||
{ "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : ".organization.metadata.websiteurl.value", "params" : { "host" : 0.5, "path" : 0.5 } },
|
||||
{ "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : ".pid[] | select(.qualifier.classid == \"grid\") | .value" }
|
||||
|
||||
],
|
||||
"blacklists" : {
|
||||
"legalname" : ["University of Turin"]
|
||||
|
|
109
dnet-dedup.ipr
109
dnet-dedup.ipr
|
@ -1,109 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project version="4" relativePaths="false">
|
||||
<component name="ProjectRootManager" version="2" assert-keyword="true" project-jdk-name="1.8" jdk-15="true"/>
|
||||
<component name="CodeStyleManager">
|
||||
<option name="USE_DEFAULT_CODE_STYLE_SCHEME" value="true"/>
|
||||
<option name="CODE_STYLE_SCHEME" value=""/>
|
||||
</component>
|
||||
<component name="libraryTable"/>
|
||||
<component name="CompilerConfiguration">
|
||||
<option name="DEFAULT_COMPILER" value="Javac"/>
|
||||
<option name="CLEAR_OUTPUT_DIRECTORY" value="false"/>
|
||||
<!--
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="${wildcardResourcePattern}"/>
|
||||
</wildcardResourcePatterns>
|
||||
-->
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="!?*.java"/>
|
||||
</wildcardResourcePatterns>
|
||||
</component>
|
||||
<component name="JavacSettings">
|
||||
<option name="DEBUGGING_INFO" value="true"/>
|
||||
<option name="GENERATE_NO_WARNINGS" value="false"/>
|
||||
<option name="DEPRECATION" value="true"/>
|
||||
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
|
||||
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
|
||||
<option name="USE_GENERICS_COMPILER" value="false"/>
|
||||
</component>
|
||||
<component name="JikesSettings">
|
||||
<option name="DEBUGGING_INFO" value="true"/>
|
||||
<option name="DEPRECATION" value="true"/>
|
||||
<option name="GENERATE_NO_WARNINGS" value="false"/>
|
||||
<option name="GENERATE_MAKE_FILE_DEPENDENCIES" value="false"/>
|
||||
<option name="DO_FULL_DEPENDENCE_CHECK" value="false"/>
|
||||
<option name="IS_INCREMENTAL_MODE" value="false"/>
|
||||
<option name="IS_EMACS_ERRORS_MODE" value="true"/>
|
||||
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
|
||||
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
|
||||
</component>
|
||||
<component name="AntConfiguration">
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="FILTER_TARGETS" value="false"/>
|
||||
</component>
|
||||
<component name="JavadocGenerationManager">
|
||||
<option name="OUTPUT_DIRECTORY"/>
|
||||
<option name="OPTION_SCOPE" value="protected"/>
|
||||
<option name="OPTION_HIERARCHY" value="false"/>
|
||||
<option name="OPTION_NAVIGATOR" value="false"/>
|
||||
<option name="OPTION_INDEX" value="false"/>
|
||||
<option name="OPTION_SEPARATE_INDEX" value="false"/>
|
||||
<option name="OPTION_USE_1_1" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_USE" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_VERSION" value="false"/>
|
||||
<option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="false"/>
|
||||
<option name="OPTION_DEPRECATED_LIST" value="false"/>
|
||||
<option name="OTHER_OPTIONS"/>
|
||||
<option name="HEAP_SIZE"/>
|
||||
<option name="OPEN_IN_BROWSER" value="false"/>
|
||||
</component>
|
||||
<component name="JUnitProjectSettings">
|
||||
<option name="TEST_RUNNER" value="UI"/>
|
||||
</component>
|
||||
<component name="EntryPointsManager">
|
||||
<entry_points/>
|
||||
</component>
|
||||
<component name="DataSourceManager"/>
|
||||
<component name="ExportToHTMLSettings">
|
||||
<option name="PRINT_LINE_NUMBERS" value="false"/>
|
||||
<option name="OPEN_IN_BROWSER" value="false"/>
|
||||
<option name="OUTPUT_DIRECTORY"/>
|
||||
</component>
|
||||
<component name="ImportConfiguration">
|
||||
<option name="VENDOR"/>
|
||||
<option name="RELEASE_TAG"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CHECKOUT_AFTER_IMPORT" value="true"/>
|
||||
</component>
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<!-- module filepath="$$PROJECT_DIR$$/${pom.artifactId}.iml"/ -->
|
||||
<module filepath="$PROJECT_DIR$/dnet-dedup.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dnet-pace-core/dnet-pace-core.iml"/>
|
||||
<module filepath="$PROJECT_DIR$/dnet-dedup-test/dnet-dedup-test.iml"/>
|
||||
</modules>
|
||||
</component>
|
||||
<UsedPathMacros>
|
||||
<!--<macro name="cargo"></macro>-->
|
||||
</UsedPathMacros>
|
||||
</project>
|
418
dnet-dedup.iws
418
dnet-dedup.iws
|
@ -1,418 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project version="4" relativePaths="false">
|
||||
<component name="LvcsProjectConfiguration">
|
||||
<option name="ADD_LABEL_ON_PROJECT_OPEN" value="true"/>
|
||||
<option name="ADD_LABEL_ON_PROJECT_COMPILATION" value="true"/>
|
||||
<option name="ADD_LABEL_ON_FILE_PACKAGE_COMPILATION" value="true"/>
|
||||
<option name="ADD_LABEL_ON_PROJECT_MAKE" value="true"/>
|
||||
<option name="ADD_LABEL_ON_RUNNING" value="true"/>
|
||||
<option name="ADD_LABEL_ON_DEBUGGING" value="true"/>
|
||||
<option name="ADD_LABEL_ON_UNIT_TEST_PASSED" value="true"/>
|
||||
<option name="ADD_LABEL_ON_UNIT_TEST_FAILED" value="true"/>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="MemberChooser.copyJavadoc" value="false"/>
|
||||
<property name="GoToClass.includeLibraries" value="false"/>
|
||||
<property name="MemberChooser.showClasses" value="true"/>
|
||||
<property name="MemberChooser.sorted" value="false"/>
|
||||
<property name="GoToFile.includeJavaFiles" value="false"/>
|
||||
<property name="GoToClass.toSaveIncludeLibraries" value="false"/>
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="-4" y="-4" width="1032" height="746" extended-state="6"/>
|
||||
<editor active="false"/>
|
||||
<layout>
|
||||
<window_info id="CVS" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="7"/>
|
||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="0"/>
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="1"/>
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
|
||||
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="6"/>
|
||||
<window_info id="Aspects" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="2"/>
|
||||
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="4"/>
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="sliding" type="sliding" visible="false" weight="0.4" order="0"/>
|
||||
<window_info id="Web" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="0"/>
|
||||
<window_info id="EJB" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="3"/>
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="5"/>
|
||||
</layout>
|
||||
</component>
|
||||
<component name="ErrorTreeViewConfiguration">
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="HIDE_WARNINGS" value="false"/>
|
||||
</component>
|
||||
<component name="StructureViewFactory">
|
||||
<option name="SORT_MODE" value="0"/>
|
||||
<option name="GROUP_INHERITED" value="true"/>
|
||||
<option name="AUTOSCROLL_MODE" value="true"/>
|
||||
<option name="SHOW_FIELDS" value="true"/>
|
||||
<option name="AUTOSCROLL_FROM_SOURCE" value="false"/>
|
||||
<option name="GROUP_GETTERS_AND_SETTERS" value="true"/>
|
||||
<option name="SHOW_INHERITED" value="false"/>
|
||||
<option name="HIDE_NOT_PUBLIC" value="false"/>
|
||||
</component>
|
||||
<component name="ProjectViewSettings">
|
||||
<navigator currentView="ProjectPane" flattenPackages="false" showMembers="false" showStructure="false" autoscrollToSource="false" splitterProportion="0.5"/>
|
||||
<view id="ProjectPane">
|
||||
<expanded_node type="directory" url="file://$PROJECT_DIR$"/>
|
||||
</view>
|
||||
<view id="SourcepathPane"/>
|
||||
<view id="ClasspathPane"/>
|
||||
</component>
|
||||
<component name="Commander">
|
||||
<leftPanel view="Project"/>
|
||||
<rightPanel view="Project"/>
|
||||
<splitter proportion="0.5"/>
|
||||
</component>
|
||||
<component name="AspectsView"/>
|
||||
<component name="SelectInManager"/>
|
||||
<component name="HierarchyBrowserManager">
|
||||
<option name="SHOW_PACKAGES" value="false"/>
|
||||
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="SORT_ALPHABETICALLY" value="false"/>
|
||||
</component>
|
||||
<component name="TodoView" selected-index="0">
|
||||
<todo-panel id="selected-file">
|
||||
<are-packages-shown value="false"/>
|
||||
<flatten-packages value="false"/>
|
||||
<is-autoscroll-to-source value="true"/>
|
||||
</todo-panel>
|
||||
<todo-panel id="all">
|
||||
<are-packages-shown value="true"/>
|
||||
<flatten-packages value="false"/>
|
||||
<is-autoscroll-to-source value="true"/>
|
||||
</todo-panel>
|
||||
</component>
|
||||
<component name="editorManager"/>
|
||||
<component name="editorHistoryManager"/>
|
||||
<component name="DaemonCodeAnalyzer">
|
||||
<disable_hints/>
|
||||
</component>
|
||||
<component name="InspectionManager">
|
||||
<option name="AUTOSCROLL_TO_SOURCE" value="false"/>
|
||||
<option name="SPLITTER_PROPORTION" value="0.5"/>
|
||||
<profile name="Default"/>
|
||||
</component>
|
||||
<component name="BookmarkManager"/>
|
||||
<component name="DebuggerManager">
|
||||
<line_breakpoints/>
|
||||
<exception_breakpoints>
|
||||
<breakpoint_any>
|
||||
<option name="NOTIFY_CAUGHT" value="true"/>
|
||||
<option name="NOTIFY_UNCAUGHT" value="true"/>
|
||||
<option name="ENABLED" value="false"/>
|
||||
<option name="SUSPEND_VM" value="true"/>
|
||||
<option name="COUNT_FILTER_ENABLED" value="false"/>
|
||||
<option name="COUNT_FILTER" value="0"/>
|
||||
<option name="CONDITION_ENABLED" value="false"/>
|
||||
<option name="CONDITION"/>
|
||||
<option name="LOG_ENABLED" value="false"/>
|
||||
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CLASS_FILTERS_ENABLED" value="false"/>
|
||||
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
|
||||
<option name="SUSPEND_POLICY" value="SuspendAll"/>
|
||||
</breakpoint_any>
|
||||
</exception_breakpoints>
|
||||
<field_breakpoints/>
|
||||
<method_breakpoints/>
|
||||
</component>
|
||||
<component name="DebuggerSettings">
|
||||
<option name="TRACING_FILTERS_ENABLED" value="true"/>
|
||||
<option name="TOSTRING_CLASSES_ENABLED" value="false"/>
|
||||
<option name="VALUE_LOOKUP_DELAY" value="700"/>
|
||||
<option name="DEBUGGER_TRANSPORT" value="0"/>
|
||||
<option name="FORCE_CLASSIC_VM" value="true"/>
|
||||
<option name="HIDE_DEBUGGER_ON_PROCESS_TERMINATION" value="false"/>
|
||||
<option name="SKIP_SYNTHETIC_METHODS" value="true"/>
|
||||
<option name="SKIP_CONSTRUCTORS" value="false"/>
|
||||
<option name="STEP_THREAD_SUSPEND_POLICY" value="SuspendThread"/>
|
||||
<default_breakpoint_settings>
|
||||
<option name="NOTIFY_CAUGHT" value="true"/>
|
||||
<option name="NOTIFY_UNCAUGHT" value="true"/>
|
||||
<option name="WATCH_MODIFICATION" value="true"/>
|
||||
<option name="WATCH_ACCESS" value="true"/>
|
||||
<option name="WATCH_ENTRY" value="true"/>
|
||||
<option name="WATCH_EXIT" value="true"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
<option name="SUSPEND_VM" value="true"/>
|
||||
<option name="COUNT_FILTER_ENABLED" value="false"/>
|
||||
<option name="COUNT_FILTER" value="0"/>
|
||||
<option name="CONDITION_ENABLED" value="false"/>
|
||||
<option name="CONDITION"/>
|
||||
<option name="LOG_ENABLED" value="false"/>
|
||||
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
|
||||
<option name="LOG_MESSAGE"/>
|
||||
<option name="CLASS_FILTERS_ENABLED" value="false"/>
|
||||
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
|
||||
<option name="SUSPEND_POLICY" value="SuspendAll"/>
|
||||
</default_breakpoint_settings>
|
||||
<filter>
|
||||
<option name="PATTERN" value="com.sun.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="java.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="javax.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="org.omg.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="sun.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
<filter>
|
||||
<option name="PATTERN" value="junit.*"/>
|
||||
<option name="ENABLED" value="true"/>
|
||||
</filter>
|
||||
</component>
|
||||
<component name="CompilerWorkspaceConfiguration">
|
||||
<option name="COMPILE_IN_BACKGROUND" value="false"/>
|
||||
<option name="AUTO_SHOW_ERRORS_IN_EDITOR" value="true"/>
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<activeType name="Application"/>
|
||||
<configuration selected="false" default="true" type="Applet" factoryName="Applet">
|
||||
<module name=""/>
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="HTML_FILE_NAME"/>
|
||||
<option name="HTML_USED" value="false"/>
|
||||
<option name="WIDTH" value="400"/>
|
||||
<option name="HEIGHT" value="300"/>
|
||||
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="Remote" factoryName="Remote">
|
||||
<option name="USE_SOCKET_TRANSPORT" value="true"/>
|
||||
<option name="SERVER_MODE" value="false"/>
|
||||
<option name="SHMEM_ADDRESS" value="javadebug"/>
|
||||
<option name="HOST" value="localhost"/>
|
||||
<option name="PORT" value="5005"/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="Application" factoryName="Application">
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
<option name="PROGRAM_PARAMETERS"/>
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
|
||||
<module name=""/>
|
||||
</configuration>
|
||||
<configuration selected="false" default="true" type="JUnit" factoryName="JUnit">
|
||||
<module name=""/>
|
||||
<option name="PACKAGE_NAME"/>
|
||||
<option name="MAIN_CLASS_NAME"/>
|
||||
<option name="METHOD_NAME"/>
|
||||
<option name="TEST_OBJECT" value="class"/>
|
||||
<option name="VM_PARAMETERS"/>
|
||||
<option name="PARAMETERS"/>
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
|
||||
<option name="ADDITIONAL_CLASS_PATH"/>
|
||||
<option name="TEST_SEARCH_SCOPE">
|
||||
<value defaultName="wholeProject"/>
|
||||
</option>
|
||||
</configuration>
|
||||
</component>
|
||||
<component name="VcsManagerConfiguration">
|
||||
<option name="ACTIVE_VCS_NAME" value="git"/>
|
||||
<option name="STATE" value="0"/>
|
||||
</component>
|
||||
<component name="VssConfiguration">
|
||||
<CheckoutOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="DO_NOT_GET_LATEST_VERSION" value="false"/>
|
||||
<option name="REPLACE_WRITABLE" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</CheckoutOptions>
|
||||
<CheckinOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="KEEP_CHECKED_OUT" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</CheckinOptions>
|
||||
<AddOptions>
|
||||
<option name="COMMENT" value=""/>
|
||||
<option name="STORE_ONLY_LATEST_VERSION" value="false"/>
|
||||
<option name="CHECK_OUT_IMMEDIATELY" value="false"/>
|
||||
<option name="FILE_TYPE" value="0"/>
|
||||
</AddOptions>
|
||||
<UndocheckoutOptions>
|
||||
<option name="MAKE_WRITABLE" value="false"/>
|
||||
<option name="REPLACE_LOCAL_COPY" value="0"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</UndocheckoutOptions>
|
||||
<DiffOptions>
|
||||
<option name="IGNORE_WHITE_SPACE" value="false"/>
|
||||
<option name="IGNORE_CASE" value="false"/>
|
||||
</DiffOptions>
|
||||
<GetOptions>
|
||||
<option name="REPLACE_WRITABLE" value="0"/>
|
||||
<option name="MAKE_WRITABLE" value="false"/>
|
||||
<option name="RECURSIVE" value="false"/>
|
||||
</GetOptions>
|
||||
<option name="CLIENT_PATH" value=""/>
|
||||
<option name="SRCSAFEINI_PATH" value=""/>
|
||||
<option name="USER_NAME" value=""/>
|
||||
<option name="PWD" value=""/>
|
||||
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="SHOW_ADD_OPTIONS" value="true"/>
|
||||
<option name="SHOW_UNDOCHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="SHOW_DIFF_OPTIONS" value="true"/>
|
||||
<option name="SHOW_GET_OPTIONS" value="true"/>
|
||||
<option name="USE_EXTERNAL_DIFF" value="false"/>
|
||||
<option name="EXTERNAL_DIFF_PATH" value=""/>
|
||||
<option name="REUSE_LAST_COMMENT" value="false"/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
</component>
|
||||
<component name="CheckinPanelState"/>
|
||||
<component name="WebViewSettings">
|
||||
<webview flattenPackages="false" showMembers="false" autoscrollToSource="false"/>
|
||||
</component>
|
||||
<component name="EjbViewSettings">
|
||||
<EjbView showMembers="false" autoscrollToSource="false"/>
|
||||
</component>
|
||||
<component name="AppServerRunManager"/>
|
||||
<component name="StarteamConfiguration">
|
||||
<option name="SERVER" value=""/>
|
||||
<option name="PORT" value="49201"/>
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="PROJECT" value=""/>
|
||||
<option name="VIEW" value=""/>
|
||||
<option name="ALTERNATIVE_WORKING_PATH" value=""/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
</component>
|
||||
<component name="Cvs2Configuration">
|
||||
<option name="ON_FILE_ADDING" value="0"/>
|
||||
<option name="ON_FILE_REMOVING" value="0"/>
|
||||
<option name="PRUNE_EMPTY_DIRECTORIES" value="true"/>
|
||||
<option name="SHOW_UPDATE_OPTIONS" value="true"/>
|
||||
<option name="SHOW_ADD_OPTIONS" value="true"/>
|
||||
<option name="SHOW_REMOVE_OPTIONS" value="true"/>
|
||||
<option name="MERGING_MODE" value="0"/>
|
||||
<option name="MERGE_WITH_BRANCH1_NAME" value="HEAD"/>
|
||||
<option name="MERGE_WITH_BRANCH2_NAME" value="HEAD"/>
|
||||
<option name="RESET_STICKY" value="false"/>
|
||||
<option name="CREATE_NEW_DIRECTORIES" value="true"/>
|
||||
<option name="DEFAULT_TEXT_FILE_SUBSTITUTION" value="kv"/>
|
||||
<option name="PROCESS_UNKNOWN_FILES" value="false"/>
|
||||
<option name="PROCESS_DELETED_FILES" value="false"/>
|
||||
<option name="SHOW_EDIT_DIALOG" value="true"/>
|
||||
<option name="RESERVED_EDIT" value="false"/>
|
||||
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6"/>
|
||||
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
|
||||
<option name="CHECKOUT_DATE_OR_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="UPDATE_DATE_OR_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="SHOW_CHANGES_REVISION_SETTINGS">
|
||||
<value>
|
||||
<option name="BRANCH" value=""/>
|
||||
<option name="DATE" value=""/>
|
||||
<option name="USE_BRANCH" value="false"/>
|
||||
<option name="USE_DATE" value="false"/>
|
||||
</value>
|
||||
</option>
|
||||
<option name="SHOW_OUTPUT" value="false"/>
|
||||
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false"/>
|
||||
<option name="UPDATE_GROUP_BY_PACKAGES" value="false"/>
|
||||
<option name="ADD_WATCH_INDEX" value="0"/>
|
||||
<option name="REMOVE_WATCH_INDEX" value="0"/>
|
||||
<option name="UPDATE_KEYWORD_SUBSTITUTION"/>
|
||||
<option name="MAKE_NEW_FILES_READONLY" value="false"/>
|
||||
<option name="SHOW_CORRUPTED_PROJECT_FILES" value="0"/>
|
||||
<option name="TAG_AFTER_FILE_COMMIT" value="false"/>
|
||||
<option name="TAG_AFTER_FILE_COMMIT_NAME" value=""/>
|
||||
<option name="TAG_AFTER_PROJECT_COMMIT" value="false"/>
|
||||
<option name="TAG_AFTER_PROJECT_COMMIT_NAME" value=""/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="FORCE_NON_EMPTY_COMMENT" value="false"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
|
||||
</component>
|
||||
<component name="CvsTabbedWindow"/>
|
||||
<component name="SvnConfiguration">
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="AUTO_ADD_FILES" value="0"/>
|
||||
<option name="AUTO_DEL_FILES" value="0"/>
|
||||
</component>
|
||||
<component name="PerforceConfiguration">
|
||||
<option name="PORT" value="magic:1666"/>
|
||||
<option name="USER" value=""/>
|
||||
<option name="PASSWORD" value=""/>
|
||||
<option name="CLIENT" value=""/>
|
||||
<option name="TRACE" value="false"/>
|
||||
<option name="PERFORCE_STATUS" value="true"/>
|
||||
<option name="CHANGELIST_OPTION" value="false"/>
|
||||
<option name="SYSTEMROOT" value=""/>
|
||||
<option name="P4_EXECUTABLE" value="p4"/>
|
||||
<option name="SHOW_BRANCH_HISTORY" value="false"/>
|
||||
<option name="GENERATE_COMMENT" value="false"/>
|
||||
<option name="SYNC_OPTION" value="Sync"/>
|
||||
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
|
||||
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
|
||||
<option name="FORCE_NON_EMPTY_COMMENT" value="true"/>
|
||||
<option name="LAST_COMMIT_MESSAGE" value=""/>
|
||||
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
|
||||
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
|
||||
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
|
||||
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
|
||||
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
|
||||
</component>
|
||||
</project>
|
|
@ -38,10 +38,6 @@
|
|||
<groupId>commons-collections</groupId>
|
||||
<artifactId>commons-collections</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.googlecode.protobuf-java-format</groupId>
|
||||
<artifactId>protobuf-java-format</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>stringtemplate</artifactId>
|
||||
|
@ -59,22 +55,22 @@
|
|||
<groupId>org.reflections</groupId>
|
||||
<artifactId>reflections</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-math3</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.jayway.jsonpath</groupId>
|
||||
<artifactId>json-path</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -1,25 +1,25 @@
|
|||
package eu.dnetlib.pace.config;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.pace.model.ClusteringDef;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.antlr.stringtemplate.StringTemplate;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.function.BiFunction;
|
||||
|
||||
|
||||
import eu.dnetlib.pace.tree.support.TreeNodeDef;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.antlr.stringtemplate.StringTemplate;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import eu.dnetlib.pace.model.ClusteringDef;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
public class DedupConfig implements Config, Serializable {
|
||||
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
package eu.dnetlib.pace.config;
|
||||
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.pace.common.AbstractPaceFunctions;
|
||||
import eu.dnetlib.pace.model.ClusteringDef;
|
||||
import eu.dnetlib.pace.model.FieldDef;
|
||||
import eu.dnetlib.pace.tree.support.TreeNodeDef;
|
||||
import eu.dnetlib.pace.util.PaceResolver;
|
||||
import org.codehaus.jackson.annotate.JsonIgnore;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
package eu.dnetlib.pace.config;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.gson.GsonBuilder;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
public class WfConfig implements Serializable {
|
||||
|
||||
|
@ -76,12 +76,17 @@ public class WfConfig implements Serializable {
|
|||
/** Maximum number of allowed children. */
|
||||
private int maxChildren = MAX_CHILDREN;
|
||||
|
||||
|
||||
/** Default maximum number of iterations. */
|
||||
private final static int MAX_ITERATIONS = 20;
|
||||
|
||||
/** Maximum number of iterations */
|
||||
private int maxIterations = MAX_ITERATIONS;
|
||||
|
||||
/** The Jquery path to retrieve the identifier */
|
||||
private String idPath = "$.id";
|
||||
|
||||
|
||||
public WfConfig() {}
|
||||
|
||||
/**
|
||||
|
@ -252,6 +257,7 @@ public class WfConfig implements Serializable {
|
|||
this.maxChildren = maxChildren;
|
||||
}
|
||||
|
||||
|
||||
public int getMaxIterations() {
|
||||
return maxIterations;
|
||||
}
|
||||
|
@ -260,6 +266,15 @@ public class WfConfig implements Serializable {
|
|||
this.maxIterations = maxIterations;
|
||||
}
|
||||
|
||||
public String getIdPath() {
|
||||
return idPath;
|
||||
}
|
||||
|
||||
public void setIdPath(String idPath) {
|
||||
this.idPath = idPath;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
|
|
|
@ -1,19 +1,15 @@
|
|||
package eu.dnetlib.pace.model;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.pace.clustering.ClusteringFunction;
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import eu.dnetlib.pace.clustering.*;
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import eu.dnetlib.pace.util.PaceResolver;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
public class ClusteringDef implements Serializable {
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.pace.config.Config;
|
||||
import eu.dnetlib.pace.config.PaceConfig;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package eu.dnetlib.pace.tree.support;
|
||||
|
||||
import eu.dnetlib.pace.util.PaceException;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
|
|
|
@ -36,6 +36,18 @@ public class BlockProcessor {
|
|||
this.dedupConf = dedupConf;
|
||||
}
|
||||
|
||||
|
||||
public void processSortedBlock(final String key, final List<MapDocument> documents, final Reporter context) {
|
||||
if (documents.size() > 1) {
|
||||
// log.info("reducing key: '" + key + "' records: " + q.size());
|
||||
//process(q, context);
|
||||
process(prepare(documents), context);
|
||||
|
||||
} else {
|
||||
context.incrementCounter(dedupConf.getWf().getEntityType(), "records per hash key = 1", 1);
|
||||
}
|
||||
}
|
||||
|
||||
public void process(final String key, final Iterable<MapDocument> documents, final Reporter context) {
|
||||
|
||||
final Queue<MapDocument> q = prepare(documents);
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
package eu.dnetlib.pace.util;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
import eu.dnetlib.pace.config.DedupConfig;
|
||||
import eu.dnetlib.pace.config.Type;
|
||||
import eu.dnetlib.pace.model.Field;
|
||||
import eu.dnetlib.pace.model.FieldListImpl;
|
||||
import eu.dnetlib.pace.model.FieldValueImpl;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import net.minidev.json.JSONArray;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public class MapDocumentUtil {
|
||||
|
||||
|
||||
private static final ObjectMapper mapper = new ObjectMapper();
|
||||
public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
|
||||
public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX);
|
||||
|
||||
|
||||
|
||||
public static MapDocument asMapDocumentWithJPath(DedupConfig conf, final String json) {
|
||||
MapDocument m = new MapDocument();
|
||||
m.setIdentifier(getJPathString(conf.getWf().getIdPath(), json));
|
||||
Map<String, Field> stringField = new HashMap<>();
|
||||
conf.getPace().getModel().forEach(fdef -> {
|
||||
switch (fdef.getType()) {
|
||||
case String:
|
||||
case Int:
|
||||
stringField.put(fdef.getName(), new FieldValueImpl(fdef.getType(), fdef.getName(), getJPathString(fdef.getPath(), json)));
|
||||
break;
|
||||
case URL:
|
||||
String uv = getJPathString(fdef.getPath(), json);
|
||||
if (!urlFilter.test(uv)) uv = "";
|
||||
stringField.put(fdef.getName(), new FieldValueImpl(fdef.getType(), fdef.getName(), uv));
|
||||
break;
|
||||
case List:
|
||||
case JSON:
|
||||
FieldListImpl fi = new FieldListImpl(fdef.getName(), fdef.getType());
|
||||
getJPathList(fdef.getPath(), json, fdef.getType())
|
||||
.stream()
|
||||
.map(item -> new FieldValueImpl(fdef.getType(), fdef.getName(), item))
|
||||
.forEach(fi::add);
|
||||
stringField.put(fdef.getName(), fi);
|
||||
break;
|
||||
}
|
||||
});
|
||||
m.setFieldMap(stringField);
|
||||
return m;
|
||||
}
|
||||
|
||||
public static List<String> getJPathList(String path, String json, Type type) {
|
||||
if (type == Type.List)
|
||||
return JsonPath.read(json, path);
|
||||
Object jresult;
|
||||
List<String> result = new ArrayList<>();
|
||||
try {
|
||||
jresult = JsonPath.read(json, path);
|
||||
} catch (Throwable e) {
|
||||
return result;
|
||||
}
|
||||
if (jresult instanceof JSONArray) {
|
||||
|
||||
((JSONArray) jresult).forEach(it -> {
|
||||
|
||||
try {
|
||||
result.add(new ObjectMapper().writeValueAsString(it));
|
||||
} catch (JsonProcessingException e) {
|
||||
|
||||
}
|
||||
}
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
if (jresult instanceof LinkedHashMap) {
|
||||
try {
|
||||
result.add(new ObjectMapper().writeValueAsString(jresult));
|
||||
} catch (JsonProcessingException e) {
|
||||
|
||||
}
|
||||
return result;
|
||||
}
|
||||
if (jresult instanceof String) {
|
||||
result.add((String) jresult);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public static String getJPathString(final String jsonPath, final String json) {
|
||||
try {
|
||||
Object o = JsonPath.read(json, jsonPath);
|
||||
if (o instanceof String)
|
||||
return (String)o;
|
||||
if (o instanceof JSONArray && ((JSONArray)o).size()>0)
|
||||
return (String)((JSONArray)o).get(0);
|
||||
return "";
|
||||
} catch (Exception e) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,6 +1,10 @@
|
|||
package eu.dnetlib.pace.config;
|
||||
|
||||
|
||||
import eu.dnetlib.pace.AbstractPaceTest;
|
||||
import eu.dnetlib.pace.model.MapDocument;
|
||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Map;
|
||||
|
@ -57,4 +61,27 @@ public class ConfigTest extends AbstractPaceTest {
|
|||
assertEquals(0, load.getPace().translationMap().keySet().size());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testAsMapDocumentJPath() throws Exception {
|
||||
|
||||
DedupConfig load = DedupConfig.load(readFromClasspath("result.pace.conf_jpath.json"));
|
||||
|
||||
|
||||
System.out.println(load.getWf().getIdPath());
|
||||
|
||||
final String result =IOUtils.toString(this.getClass().getResourceAsStream("result.json"));
|
||||
|
||||
System.out.println(result);
|
||||
final MapDocument mapDocument = MapDocumentUtil.asMapDocumentWithJPath(load, result);
|
||||
|
||||
System.out.println(mapDocument.getFieldMap());
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,48 @@
|
|||
{
|
||||
"wf" : {
|
||||
"threshold" : "0.99",
|
||||
"dedupRun" : "001",
|
||||
"entityType" : "result",
|
||||
"orderField" : "title",
|
||||
"queueMaxSize" : "2000",
|
||||
"groupMaxSize" : "10",
|
||||
"slidingWindowSize" : "200",
|
||||
"idPath": "$.entity.id",
|
||||
"rootBuilder" : [ "result" ],
|
||||
"includeChildren" : "true"
|
||||
},
|
||||
"pace" : {
|
||||
"clustering" : [
|
||||
{ "name" : "acronyms", "fields" : [ "title" ], "params" : { "max" : "1", "minLen" : "2", "maxLen" : "4"} },
|
||||
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
|
||||
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } }
|
||||
],
|
||||
"decisionTree": {},
|
||||
"model" : [
|
||||
{ "name" : "pid", "type" : "JSON", "path" : "$.entity.pid"},
|
||||
{ "name" : "dateofacceptance", "type" : "String", "path" : "$.entity.result.metadata.dateofacceptance.value"},
|
||||
{ "name" : "title", "type" : "String","path" : "$.entity.result.metadata.title[?(@.qualifier.classid ==\"main title\")].value" },
|
||||
{ "name" : "authors", "type" : "List", "path" : "$.entity.result.metadata.author[*].fullname" }
|
||||
],
|
||||
"blacklists" : {
|
||||
"title" : [
|
||||
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
|
||||
"^(Kiri Karl Morgensternile).*$",
|
||||
"^(\\[Eksliibris Aleksandr).*\\]$",
|
||||
"^(\\[Eksliibris Aleksandr).*$",
|
||||
"^(Eksliibris Aleksandr).*$",
|
||||
"^(Kiri A\\. de Vignolles).*$",
|
||||
"^(2 kirja Karl Morgensternile).*$",
|
||||
"^(Pirita kloostri idaosa arheoloogilised).*$",
|
||||
"^(Kiri tundmatule).*$",
|
||||
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
|
||||
"^(Eksliibris Nikolai Birukovile).*$",
|
||||
"^(Eksliibris Nikolai Issakovile).*$",
|
||||
"^(WHP Cruise Summary Information of section).*$",
|
||||
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
|
||||
"^(Measurement of the spin\\-dependent structure function).*"
|
||||
] } ,
|
||||
"synonyms": {}
|
||||
}
|
||||
|
||||
}
|
44
pom.xml
44
pom.xml
|
@ -84,6 +84,16 @@
|
|||
</snapshots>
|
||||
</repository>
|
||||
|
||||
|
||||
<repository>
|
||||
<id>central</id>
|
||||
<name>Central Repository</name>
|
||||
<url>http://repo.maven.apache.org/maven2</url>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
</repository>
|
||||
|
||||
</repositories>
|
||||
|
||||
<build>
|
||||
|
@ -246,21 +256,6 @@
|
|||
<artifactId>stringtemplate</artifactId>
|
||||
<version>3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.googlecode.protobuf-java-format</groupId>
|
||||
<artifactId>protobuf-java-format</artifactId>
|
||||
<version>1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaire-data-protos</artifactId>
|
||||
<version>3.9.3-proto250</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>eu.dnetlib</groupId>
|
||||
<artifactId>dnet-openaireplus-mapping-utils</artifactId>
|
||||
<version>6.2.21</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
|
@ -269,10 +264,17 @@
|
|||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
<version>1.9.13</version>
|
||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
||||
<artifactId>jackson-dataformat-xml</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-jsonSchema</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
@ -351,6 +353,12 @@
|
|||
<artifactId>oozie-client</artifactId>
|
||||
<version>5.1.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.jayway.jsonpath</groupId>
|
||||
<artifactId>json-path</artifactId>
|
||||
<version>2.4.0</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
|
Loading…
Reference in New Issue