Compare commits

...

56 Commits

Author SHA1 Message Date
Claudio Atzori f04f9dd6c1 Merge pull request 'Precompile blacklists patterns before evaluating clustering criteria' (#1) from optimized-clustering into master
Reviewed-on: #1
2023-06-19 12:43:49 +02:00
Giambattista Bloisi d2d173773e Precompile blacklists patterns before evaluating clustering criteria
Enable Junit 5 tests in maven builds
Make path comparisons platform-independent
Read String resource files assuming they are encoded in UTF-8
Fix a few test conditions
2023-06-16 09:41:11 +02:00
Michele De Bonis 7e2e7dcdcd implementation of the support for authors deduplication: cosinesimilarity comparator and double array json parser 2023-04-17 11:06:27 +02:00
Michele De Bonis b5584f084a minor change in the author match which now can compute count and percentage 2023-04-04 17:10:37 +02:00
Michele De Bonis b4b6a61576 configuration updated for testing 2023-02-02 12:05:06 +01:00
Michele De Bonis 66472ce408 implementation of author dedup configuration and lnfi clustering function 2023-01-31 11:53:10 +01:00
Michele De Bonis 00466512ea implementation of the new software configuration 2022-11-22 17:48:34 +01:00
Michele De Bonis 42cff050e7 minor changes 2022-11-21 14:35:46 +01:00
miconis 5aebe63f22 implementation of new configuration for datasource deduplication 2022-04-26 11:30:40 +02:00
miconis fb2eed9f0e implementation of the java version of the graph processor 2022-04-19 15:29:29 +02:00
miconis 6c47fb0e67 implementation of comparators and clustering function for the author deduplication 2022-04-19 10:18:09 +02:00
miconis 9618e889bd test implementation for the new fdup version 2022-04-13 09:48:56 +02:00
miconis 661818da9e bug fix in test 2022-03-21 14:43:55 +01:00
miconis 66b64937ed [maven-release-plugin] prepare for next development iteration 2022-03-15 15:06:18 +01:00
miconis 5156950a3c [maven-release-plugin] prepare release dnet-dedup-4.1.12 2022-03-15 15:06:12 +01:00
miconis b2cbc09fda bug fix in the normalization of a legalname, city map updated and transliteration support added 2022-03-15 14:59:13 +01:00
miconis 0553103486 [maven-release-plugin] prepare for next development iteration 2022-03-09 13:43:48 +01:00
miconis 674123f695 [maven-release-plugin] prepare release dnet-dedup-4.1.11 2022-03-09 13:43:44 +01:00
miconis cb72ce0a22 bug fix in the AuthorMatch, implementation of the concat function in the model creation with jpath query 2022-03-09 12:53:09 +01:00
miconis 613b32fcf7 implementation of the size threshold on authors list match 2022-03-08 16:49:28 +01:00
miconis 321cba82d0 minor change to remove local machine references 2022-02-02 10:42:11 +01:00
miconis aae4180142 [maven-release-plugin] prepare for next development iteration 2022-01-13 17:22:16 +01:00
miconis 277bedaec8 [maven-release-plugin] prepare release dnet-dedup-4.1.10 2022-01-13 17:22:12 +01:00
miconis de66199001 minor change 2022-01-13 17:20:20 +01:00
miconis 62219e2f46 [maven-release-plugin] prepare for next development iteration 2022-01-13 15:15:05 +01:00
miconis e6f66faeaa [maven-release-plugin] prepare release dnet-dedup-4.1.9 2022-01-13 15:15:00 +01:00
miconis e168d95ec0 bug fix in the authormatch comparator, implementation of tests 2022-01-13 11:58:28 +01:00
miconis 6eb0730188 [maven-release-plugin] prepare for next development iteration 2021-12-30 13:11:57 +01:00
miconis 6c238291bb [maven-release-plugin] prepare release dnet-dedup-4.1.8 2021-12-30 13:11:52 +01:00
miconis 5e8757a457 implementation of new comparators for publication dedup configuration update 2021-12-27 17:35:02 +01:00
miconis 451114418d implementation of the instance type comparator and its tests 2021-11-04 15:20:57 +01:00
miconis 2c1488b91f implementation of the test to see dedup results 2021-10-22 11:21:09 +02:00
miconis fb314e3441 implementation of the graph visualization tool 2021-09-17 10:33:29 +02:00
miconis a9ee4dbe61 bug fix in graph drawer 2021-09-14 10:02:14 +02:00
miconis 5a52aed8e1 dedup test implementation & graph drawing tools 2021-09-13 14:53:19 +02:00
miconis 30aeed3803 [maven-release-plugin] prepare for next development iteration 2021-05-03 16:09:56 +02:00
miconis 4c3b1125d0 [maven-release-plugin] prepare release dnet-dedup-4.1.7 2021-05-03 16:09:08 +02:00
miconis 67a0f965e7 minor change: version updated 2021-05-03 16:05:39 +02:00
miconis 249d25a593 Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-dedup 2021-05-03 15:38:00 +02:00
miconis fad803bd46 implementation of cross comparison for different fields, addition of clustering mechanism to collapse keys from different clustering functions on the same cluster 2021-05-03 15:37:41 +02:00
Claudio Atzori 88e18c3196 [maven-release-plugin] prepare for next development iteration 2021-04-12 16:12:15 +02:00
Claudio Atzori aee3d38783 [maven-release-plugin] prepare release dnet-dedup-4.0.6 2021-04-12 16:12:10 +02:00
Claudio Atzori 1a35598a41 removed references to the old maven repository 2021-04-12 16:11:27 +02:00
Claudio Atzori b318b0d1dc Merge branch 'decisionTreeDedup' 2021-03-22 11:01:16 +01:00
miconis e65526848a implementation of the wf to dedup entities, addition of the module to run the wf on the cluster 2020-12-04 15:41:31 +01:00
Claudio Atzori 8ed0f70907 added description in README 2020-12-02 14:13:13 +01:00
miconis 135c30923f [maven-release-plugin] prepare for next development iteration 2020-09-29 12:06:38 +02:00
miconis cc19511e4e [maven-release-plugin] prepare release dnet-dedup-4.0.5 2020-09-29 12:06:35 +02:00
miconis 14787ca516 minor changes 2020-09-29 12:05:50 +02:00
miconis 5021e5048f fixed error in the treeprocessor. it used th=-1 as default value, now it use th=1 2020-09-29 12:01:25 +02:00
miconis 9e8ea8f6ee fixed error in the block processor: entities with orderField=null were not considered 2020-09-19 17:43:41 +02:00
Sandro La Bruzzo eea8e87b25 fixed NPE 2020-08-06 10:27:05 +02:00
miconis dcb09aaf0d [maven-release-plugin] prepare for next development iteration 2020-07-16 18:59:25 +02:00
miconis 48e263fc8f [maven-release-plugin] prepare release dnet-dedup-4.0.4 2020-07-16 18:59:22 +02:00
miconis 7188648bdc implementation of the clustering function for the suffixprefix chain 2020-07-16 18:57:55 +02:00
Claudio Atzori 63cea67788 [maven-release-plugin] prepare for next development iteration 2020-07-15 17:57:09 +02:00
130 changed files with 19217 additions and 2525 deletions

2
.gitignore vendored
View File

@ -19,3 +19,5 @@
/build
spark-warehouse
/dhp-workflows/dhp-graph-mapper/job-override.properties
test.properties

View File

@ -1,4 +1,6 @@
## Dnet-Dedup
## Dnet-Dedup (gDup)
The GDup Software enables an integrated, scalable, general-purpose system for entity deduplication over big information graphs. GDup supports practitioners with the functionalities needed to realize a fully-fledged entity deduplication workflow over a generic input graph, inclusive of Ground Truth support, end-user feedback, and strategies for identifying and merging duplicates to obtain an output disambiguated graph. GDup is today one of the core components of the OpenAIRE infrastructure production system, monitoring Open Science trends on behalf of the European Commission.
# Decision Tree for authors deduplication

View File

@ -0,0 +1,7 @@
Module utilized by `dhp-workflows`.
Contains all required resources by this parent module:
* assembly XML definitions
* build shell scripts
* oozie package commands for uploading, running and monitoring oozie workflows

View File

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib</groupId>
<artifactId>dhp-build</artifactId>
<version>4.1.13-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-assembly-resources</artifactId>
<packaging>jar</packaging>
<description>This module contains a set of scripts supporting the build lifecycle for the dnet-hadoop project</description>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven.compiler.plugin.version}</version>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,32 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>oozie-installer</id>
<formats>
<format>dir</format>
</formats>
<fileSets>
<fileSet>
<filtered>true</filtered>
<directory>${project.build.directory}/assembly-resources/commands</directory>
<!--
dziala dla (lokalnie zasoby modulu):
<directory>src/main/resources</directory>
nie dziala dla:
<directory>classpath:/commands</directory>
<directory>commands</directory>
<directory>classpath/src/main/resources</directory>
-->
<outputDirectory>/</outputDirectory>
<includes>
<include>**/*</include>
</includes>
<fileMode>0755</fileMode>
<lineEnding>unix</lineEnding>
</fileSet>
</fileSets>
<baseDirectory>/</baseDirectory>
</assembly>

View File

@ -0,0 +1,24 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>tests</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${project.build.testOutputDirectory}
</directory>
<outputDirectory />
</fileSet>
</fileSets>
<!-- <dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>lib</outputDirectory>
</dependencySet>
</dependencySets>-->
</assembly>

View File

@ -0,0 +1,3 @@
#!/bin/bash
hadoop fs -get ${workingDir}

View File

@ -0,0 +1,5 @@
#!/bin/bash
echo ""
echo "---->Contents of the working directory"
hadoop fs -ls ${workingDir}

View File

@ -0,0 +1,5 @@
Execute the scripts in the following order:
1. `upload_workflow.sh`
2. `run_workflow.sh`
3. `print_working_dir.sh` or `get_working_dir.sh`

View File

@ -0,0 +1,10 @@
#!/bin/bash
if [ $# = 0 ] ; then
oozie job -oozie ${oozieServiceLoc} -config job.properties -run
else
oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run
fi

View File

@ -0,0 +1,34 @@
#!/bin/bash
exec 3>&1
BASH_XTRACEFD=3
set -x ## print every executed command
if [ $# = 0 ] ; then
target_dir_root=`pwd`'/${oozieAppDir}'
else
target_dir_root=`readlink -f $1`'/${oozieAppDir}'
fi
# initial phase, creating symbolic links to jars in all subworkflows
# currently disabled
#libDir=$target_dir_root'/lib'
#dirs=`find $target_dir_root/* -maxdepth 10 -type d`
#for dir in $dirs
#do
# if [ -f $dir/workflow.xml ]
# then
# echo "creating symbolic links to jars in directory: $dir/lib"
# if [ ! -d "$dir/lib" ]; then
# mkdir $dir/lib
# fi
# find $libDir -type f -exec ln -s \{\} $dir/lib \;
# fi
#done
#uploading
hadoop fs -rm -r ${sandboxDir}
hadoop fs -mkdir -p ${sandboxDir}
hadoop fs -mkdir -p ${workingDir}
hadoop fs -put $target_dir_root ${sandboxDir}

View File

@ -0,0 +1,7 @@
#sandboxName when not provided explicitly will be generated
sandboxName=${sandboxName}
sandboxDir=/user/${dhp.hadoop.frontend.user.name}/${sandboxName}
workingDir=${sandboxDir}/working_dir
oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir}
oozieTopWfApplicationPath = ${oozie.wf.application.path}

View File

@ -0,0 +1,6 @@
Maven plugin module utilized by `dhp-workflows` for proper `job.properties` file building.
It is based on http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html and supplemented with:
* handling includePropertyKeysFromFiles property allowing writing only properties listed in given property files
As a final outcome only properties listed in `<include>` element and listed as a keys in files from `<includePropertyKeysFromFiles>` element will be written to output file.

View File

@ -0,0 +1,132 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib</groupId>
<artifactId>dhp-build</artifactId>
<version>4.1.13-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-properties-maven-plugin</artifactId>
<version>4.1.13-SNAPSHOT</version>
<packaging>maven-plugin</packaging>
<description>This module is a maven plugin implementing custom properties substitutions in the build lifecycle</description>
<dependencies>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId>
<version>3.6.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-project</artifactId>
<version>2.2.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-artifact</artifactId>
<version>2.2.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<version>${properties.maven.plugin.version}</version>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>annotations</artifactId>
<version>3.0.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
<version>3.0.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>${mockito-core.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<version>${mockito-core.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<directory>target</directory>
<outputDirectory>target/classes</outputDirectory>
<finalName>${project.artifactId}-${project.version}</finalName>
<testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven.compiler.plugin.version}</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<phase>verify</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<detectLinks>true</detectLinks>
<!--<doclint>none</doclint>-->
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-plugin-plugin</artifactId>
<version>3.2</version>
<configuration>
<skipErrorNoDescriptorsFound>true</skipErrorNoDescriptorsFound>
</configuration>
<executions>
<execution>
<id>mojo-descriptor</id>
<phase>process-classes</phase>
<goals>
<goal>descriptor</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -0,0 +1,76 @@
package eu.dnetlib.maven.plugin.properties;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
/**
* Generates oozie properties which were not provided from commandline.
*
* @author mhorst
* @goal generate-properties
*/
public class GenerateOoziePropertiesMojo extends AbstractMojo {
public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir";
public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName";
private final String[] limiters = {
"dhp", "dnetlib", "eu"
};
@Override
public void execute() throws MojoExecutionException, MojoFailureException {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName(
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
if (generatedSandboxName != null) {
System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME, generatedSandboxName);
} else {
System.out
.println(
"unable to generate sandbox name from path: "
+ System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
}
}
}
/**
* Generates sandbox name from workflow source directory.
*
* @param wfSourceDir
* @return generated sandbox name
*/
private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens);
if (tokens.length > 0) {
for (String token : tokens) {
for (String limiter : limiters) {
if (limiter.equals(token)) {
return sandboxNameParts.size() > 0
? StringUtils.join(sandboxNameParts.toArray())
: null;
}
}
if (sandboxNameParts.size() > 0) {
sandboxNameParts.add(0, File.separator);
}
sandboxNameParts.add(0, token);
}
return StringUtils.join(sandboxNameParts.toArray());
} else {
return null;
}
}
}

View File

@ -0,0 +1,447 @@
/**
* Licensed under the Educational Community License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the License at
*
* <p>http://www.opensource.org/licenses/ecl2.php
*
* <p>Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.dnetlib.maven.plugin.properties;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
import org.apache.maven.project.MavenProject;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Writes project properties for the keys listed in specified properties files. Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/2.0.1/write-project-properties-mojo.html
*
* @author mhorst
* @goal write-project-properties
*/
public class WritePredefinedProjectProperties extends AbstractMojo {
private static final String CR = "\r";
private static final String LF = "\n";
private static final String TAB = "\t";
protected static final String PROPERTY_PREFIX_ENV = "env.";
private static final String ENCODING_UTF8 = "utf8";
/** @parameter property="properties.includePropertyKeysFromFiles" */
private String[] includePropertyKeysFromFiles;
/**
* @parameter default-value="${project}"
* @required
* @readonly
*/
protected MavenProject project;
/**
* The file that properties will be written to
*
* @parameter property="properties.outputFile"
* default-value="${project.build.directory}/properties/project.properties";
* @required
*/
protected File outputFile;
/**
* If true, the plugin will silently ignore any non-existent properties files, and the build will continue
*
* @parameter property="properties.quiet" default-value="true"
*/
private boolean quiet;
/**
* Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
* tab=tab. Any other values are taken literally.
*
* @parameter default-value="cr,lf,tab" property="properties.escapeChars"
*/
private String escapeChars;
/**
* If true, the plugin will include system properties when writing the properties file. System properties override
* both environment variables and project properties.
*
* @parameter default-value="false" property="properties.includeSystemProperties"
*/
private boolean includeSystemProperties;
/**
* If true, the plugin will include environment variables when writing the properties file. Environment variables
* are prefixed with "env". Environment variables override project properties.
*
* @parameter default-value="false" property="properties.includeEnvironmentVariables"
*/
private boolean includeEnvironmentVariables;
/**
* Comma separated set of properties to exclude when writing the properties file
*
* @parameter property="properties.exclude"
*/
private String exclude;
/**
* Comma separated set of properties to write to the properties file. If provided, only the properties matching
* those supplied here will be written to the properties file.
*
* @parameter property="properties.include"
*/
private String include;
/*
* (non-Javadoc)
* @see org.apache.maven.plugin.AbstractMojo#execute()
*/
@Override
@SuppressFBWarnings({
"NP_UNWRITTEN_FIELD", "UWF_UNWRITTEN_FIELD"
})
public void execute() throws MojoExecutionException, MojoFailureException {
Properties properties = new Properties();
// Add project properties
properties.putAll(project.getProperties());
if (includeEnvironmentVariables) {
// Add environment variables, overriding any existing properties with the same key
properties.putAll(getEnvironmentVariables());
}
if (includeSystemProperties) {
// Add system properties, overriding any existing properties with the same key
properties.putAll(System.getProperties());
}
// Remove properties as appropriate
trim(properties, exclude, include);
String comment = "# " + new Date() + "\n";
List<String> escapeTokens = getEscapeChars(escapeChars);
getLog().info("Creating " + outputFile);
writeProperties(outputFile, comment, properties, escapeTokens);
}
/**
* Provides environment variables.
*
* @return environment variables
*/
protected static Properties getEnvironmentVariables() {
Properties props = new Properties();
for (Entry<String, String> entry : System.getenv().entrySet()) {
props.setProperty(PROPERTY_PREFIX_ENV + entry.getKey(), entry.getValue());
}
return props;
}
/**
* Removes properties which should not be written.
*
* @param properties
* @param omitCSV
* @param includeCSV
* @throws MojoExecutionException
*/
protected void trim(Properties properties, String omitCSV, String includeCSV)
throws MojoExecutionException {
List<String> omitKeys = getListFromCSV(omitCSV);
for (String key : omitKeys) {
properties.remove(key);
}
List<String> includeKeys = getListFromCSV(includeCSV);
// mh: including keys from predefined properties
if (includePropertyKeysFromFiles != null && includePropertyKeysFromFiles.length > 0) {
for (String currentIncludeLoc : includePropertyKeysFromFiles) {
if (validate(currentIncludeLoc)) {
Properties p = getProperties(currentIncludeLoc);
for (String key : p.stringPropertyNames()) {
includeKeys.add(key);
}
}
}
}
if (includeKeys != null && !includeKeys.isEmpty()) {
// removing only when include keys provided
Set<String> keys = properties.stringPropertyNames();
for (String key : keys) {
if (!includeKeys.contains(key)) {
properties.remove(key);
}
}
}
}
/**
* Checks whether file exists.
*
* @param location
* @return true when exists, false otherwise.
*/
protected boolean exists(String location) {
if (StringUtils.isBlank(location)) {
return false;
}
File file = new File(location);
if (file.exists()) {
return true;
}
ResourceLoader loader = new DefaultResourceLoader();
Resource resource = loader.getResource(location);
return resource.exists();
}
/**
* Validates resource location.
*
* @param location
* @return true when valid, false otherwise
* @throws MojoExecutionException
*/
protected boolean validate(String location) throws MojoExecutionException {
boolean exists = exists(location);
if (exists) {
return true;
}
if (quiet) {
getLog().info("Ignoring non-existent properties file '" + location + "'");
return false;
} else {
throw new MojoExecutionException("Non-existent properties file '" + location + "'");
}
}
/**
* Provides input stream.
*
* @param location
* @return input stream
* @throws IOException
*/
protected InputStream getInputStream(String location) throws IOException {
File file = new File(location);
if (file.exists()) {
return new FileInputStream(location);
}
ResourceLoader loader = new DefaultResourceLoader();
Resource resource = loader.getResource(location);
return resource.getInputStream();
}
/**
* Creates properties for given location.
*
* @param location
* @return properties for given location
* @throws MojoExecutionException
*/
protected Properties getProperties(String location) throws MojoExecutionException {
InputStream in = null;
try {
Properties properties = new Properties();
in = getInputStream(location);
if (location.toLowerCase().endsWith(".xml")) {
properties.loadFromXML(in);
} else {
properties.load(in);
}
return properties;
} catch (IOException e) {
throw new MojoExecutionException("Error reading properties file " + location, e);
} finally {
IOUtils.closeQuietly(in);
}
}
/**
* Provides escape characters.
*
* @param escapeChars
* @return escape characters
*/
protected List<String> getEscapeChars(String escapeChars) {
List<String> tokens = getListFromCSV(escapeChars);
List<String> realTokens = new ArrayList<String>();
for (String token : tokens) {
String realToken = getRealToken(token);
realTokens.add(realToken);
}
return realTokens;
}
/**
* Provides real token.
*
* @param token
* @return real token
*/
protected String getRealToken(String token) {
if (token.equalsIgnoreCase("CR")) {
return CR;
} else if (token.equalsIgnoreCase("LF")) {
return LF;
} else if (token.equalsIgnoreCase("TAB")) {
return TAB;
} else {
return token;
}
}
/**
* Returns content.
*
* @param comment
* @param properties
* @param escapeTokens
* @return content
*/
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
List<String> names = new ArrayList<String>(properties.stringPropertyNames());
Collections.sort(names);
StringBuilder sb = new StringBuilder();
if (!StringUtils.isBlank(comment)) {
sb.append(comment);
}
for (String name : names) {
String value = properties.getProperty(name);
String escapedValue = escape(value, escapeTokens);
sb.append(name + "=" + escapedValue + "\n");
}
return sb.toString();
}
/**
* Writes properties to given file.
*
* @param file
* @param comment
* @param properties
* @param escapeTokens
* @throws MojoExecutionException
*/
protected void writeProperties(
File file, String comment, Properties properties, List<String> escapeTokens)
throws MojoExecutionException {
try {
String content = getContent(comment, properties, escapeTokens);
FileUtils.writeStringToFile(file, content, ENCODING_UTF8);
} catch (IOException e) {
throw new MojoExecutionException("Error creating properties file", e);
}
}
/**
* Escapes characters.
*
* @param s
* @param escapeChars
* @return
*/
protected String escape(String s, List<String> escapeChars) {
String result = s;
for (String escapeChar : escapeChars) {
result = result.replace(escapeChar, getReplacementToken(escapeChar));
}
return result;
}
/**
* Provides replacement token.
*
* @param escapeChar
* @return replacement token
*/
protected String getReplacementToken(String escapeChar) {
if (escapeChar.equals(CR)) {
return "\\r";
} else if (escapeChar.equals(LF)) {
return "\\n";
} else if (escapeChar.equals(TAB)) {
return "\\t";
} else {
return "\\" + escapeChar;
}
}
/**
* Returns list from csv.
*
* @param csv
* @return list of values generated from CSV
*/
protected static final List<String> getListFromCSV(String csv) {
if (StringUtils.isBlank(csv)) {
return new ArrayList<String>();
}
List<String> list = new ArrayList<String>();
String[] tokens = StringUtils.split(csv, ",");
for (String token : tokens) {
list.add(token.trim());
}
return list;
}
public void setIncludeSystemProperties(boolean includeSystemProperties) {
this.includeSystemProperties = includeSystemProperties;
}
public void setEscapeChars(String escapeChars) {
this.escapeChars = escapeChars;
}
public void setIncludeEnvironmentVariables(boolean includeEnvironmentVariables) {
this.includeEnvironmentVariables = includeEnvironmentVariables;
}
public void setExclude(String exclude) {
this.exclude = exclude;
}
public void setInclude(String include) {
this.include = include;
}
public void setQuiet(boolean quiet) {
this.quiet = quiet;
}
/**
* Sets property files for which keys properties should be included.
*
* @param includePropertyKeysFromFiles
*/
public void setIncludePropertyKeysFromFiles(String[] includePropertyKeysFromFiles) {
if (includePropertyKeysFromFiles != null) {
this.includePropertyKeysFromFiles = Arrays
.copyOf(includePropertyKeysFromFiles, includePropertyKeysFromFiles.length);
}
}
}

View File

@ -0,0 +1,110 @@
package eu.dnetlib.maven.plugin.properties;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import org.junit.jupiter.api.*;
import java.nio.file.Paths;
/** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest {
private static final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
public void clearSystemProperties() {
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
}
@Test
public void testExecuteEmpty() throws Exception {
clearSystemProperties();
// execute
mojo.execute();
// assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecuteSandboxNameAlreadySet() throws Exception {
clearSystemProperties();
// given
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
String sandboxName = "originalSandboxName";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
System.setProperty(PROPERTY_NAME_SANDBOX_NAME, sandboxName);
// execute
mojo.execute();
// assert
assertEquals(sandboxName, System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test //fails
public void testExecuteEmptyWorkflowSourceDir() throws Exception {
clearSystemProperties();
// given
String workflowSourceDir = "";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecuteNullSandboxNameGenerated() throws Exception {
clearSystemProperties();
// given
String workflowSourceDir = Paths.get("eu/dnetlib/dhp/").toString();
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecute() throws Exception {
clearSystemProperties();
// given
String workflowSourceDir = Paths.get("eu/dnetlib/dhp/wf/transformers").toString();
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertEquals(Paths.get("wf/transformers").toString(), System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecuteWithoutRoot() throws Exception {
clearSystemProperties();
// given
String workflowSourceDir = Paths.get("wf/transformers").toString();
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertEquals(Paths.get("wf/transformers").toString(), System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
}

View File

@ -0,0 +1,391 @@
package eu.dnetlib.maven.plugin.properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.lenient;
import java.io.*;
import java.util.Properties;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.project.MavenProject;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.mockito.junit.jupiter.MockitoExtension;
/** @author mhorst, claudio.atzori */
@Disabled
@ExtendWith(MockitoExtension.class)
public class WritePredefinedProjectPropertiesTest {
@Mock
private MavenProject mavenProject;
private WritePredefinedProjectProperties mojo;
@TempDir File testFolder;
public void init(File testFolder) {
MockitoAnnotations.initMocks(this);
mojo = new WritePredefinedProjectProperties();
mojo.outputFile = getPropertiesFileLocation(testFolder);
mojo.project = mavenProject;
lenient().doReturn(new Properties()).when(mavenProject).getProperties();
}
// ----------------------------------- TESTS ---------------------------------------------
@Test
public void testExecuteEmpty() throws Exception {
init(testFolder);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(mojo.outputFile.getParentFile());
assertEquals(0, storedProperties.size());
}
@Test
public void testExecuteWithProjectProperties() throws Exception {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
doReturn(projectProperties).when(mavenProject).getProperties();
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(mojo.outputFile.getParentFile());
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(key));
assertEquals(value, storedProperties.getProperty(key));
}
@Test()
public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.outputFile = testFolder;
// execute
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
}
@Test
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String excludedKey = "excludedPropertyKey";
String excludedValue = "excludedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(excludedKey, excludedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setExclude(excludedKey);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(key));
assertEquals(value, storedProperties.getProperty(key));
}
@Test
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setInclude(includedKey);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
File includedPropertiesFile = new File(testFolder, "included.properties");
Properties includedProperties = new Properties();
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileWriter(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {
includedPropertiesFile.getAbsolutePath()
});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
throws Exception {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo
.setIncludePropertyKeysFromFiles(
new String[] {
"/eu/dnetlib/maven/plugin/properties/included.properties"
});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test
public void testExecuteIncludingPropertyKeysFromBlankLocation() {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setIncludePropertyKeysFromFiles(new String[] {
""
});
// execute
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
}
@Test
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
throws Exception {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
File includedPropertiesFile = new File(testFolder, "included.xml");
Properties includedProperties = new Properties();
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {
includedPropertiesFile.getAbsolutePath()
});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
throws Exception {
init(testFolder);
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
File includedPropertiesFile = new File(testFolder, "included.xml");
Properties includedProperties = new Properties();
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {
includedPropertiesFile.getAbsolutePath()
});
// execute
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
}
@Test
public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
init(testFolder);
// given
mojo.setQuiet(true);
mojo.setIncludePropertyKeysFromFiles(new String[] {
"invalid location"
});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertEquals(0, storedProperties.size());
}
@Test
public void testExecuteIncludingPropertyKeysFromInvalidFile() {
init(testFolder);
// given
mojo.setIncludePropertyKeysFromFiles(new String[] {
"invalid location"
});
// execute
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
}
@Test
public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
init(testFolder);
// given
mojo.setIncludeEnvironmentVariables(true);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertTrue(storedProperties.size() > 0);
for (Object currentKey : storedProperties.keySet()) {
assertTrue(((String) currentKey).startsWith(PROPERTY_PREFIX_ENV));
}
}
@Test
public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
init(testFolder);
// given
String key = "systemPropertyKey";
String value = "systemPropertyValue";
System.setProperty(key, value);
mojo.setIncludeSystemProperties(true);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertTrue(storedProperties.size() > 0);
assertTrue(storedProperties.containsKey(key));
assertEquals(value, storedProperties.getProperty(key));
}
@Test
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
throws Exception {
init(testFolder);
// given
String key = "systemPropertyKey ";
String value = "systemPropertyValue";
System.setProperty(key, value);
mojo.setIncludeSystemProperties(true);
String escapeChars = "cr,lf,tab,|";
mojo.setEscapeChars(escapeChars);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties(testFolder);
assertTrue(storedProperties.size() > 0);
assertFalse(storedProperties.containsKey(key));
assertTrue(storedProperties.containsKey(key.trim()));
assertEquals(value, storedProperties.getProperty(key.trim()));
}
// ----------------------------------- PRIVATE -------------------------------------------
private File getPropertiesFileLocation(File testFolder) {
return new File(testFolder, "test.properties");
}
private Properties getStoredProperties(File testFolder)
throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties;
}
}

110
dhp-build/dhp-build.ipr Normal file
View File

@ -0,0 +1,110 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project version="4" relativePaths="false">
<component name="ProjectRootManager" version="2" assert-keyword="true" project-jdk-name="1.8" jdk-15="true"/>
<component name="CodeStyleManager">
<option name="USE_DEFAULT_CODE_STYLE_SCHEME" value="true"/>
<option name="CODE_STYLE_SCHEME" value=""/>
</component>
<component name="libraryTable"/>
<component name="CompilerConfiguration">
<option name="DEFAULT_COMPILER" value="Javac"/>
<option name="CLEAR_OUTPUT_DIRECTORY" value="false"/>
<!--
<wildcardResourcePatterns>
<entry name="${wildcardResourcePattern}"/>
</wildcardResourcePatterns>
-->
<wildcardResourcePatterns>
<entry name="!?*.java"/>
</wildcardResourcePatterns>
</component>
<component name="JavacSettings">
<option name="DEBUGGING_INFO" value="true"/>
<option name="GENERATE_NO_WARNINGS" value="false"/>
<option name="DEPRECATION" value="true"/>
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
<option name="USE_GENERICS_COMPILER" value="false"/>
</component>
<component name="JikesSettings">
<option name="DEBUGGING_INFO" value="true"/>
<option name="DEPRECATION" value="true"/>
<option name="GENERATE_NO_WARNINGS" value="false"/>
<option name="GENERATE_MAKE_FILE_DEPENDENCIES" value="false"/>
<option name="DO_FULL_DEPENDENCE_CHECK" value="false"/>
<option name="IS_INCREMENTAL_MODE" value="false"/>
<option name="IS_EMACS_ERRORS_MODE" value="true"/>
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
</component>
<component name="AntConfiguration">
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="FILTER_TARGETS" value="false"/>
</component>
<component name="JavadocGenerationManager">
<option name="OUTPUT_DIRECTORY"/>
<option name="OPTION_SCOPE" value="protected"/>
<option name="OPTION_HIERARCHY" value="false"/>
<option name="OPTION_NAVIGATOR" value="false"/>
<option name="OPTION_INDEX" value="false"/>
<option name="OPTION_SEPARATE_INDEX" value="false"/>
<option name="OPTION_USE_1_1" value="false"/>
<option name="OPTION_DOCUMENT_TAG_USE" value="false"/>
<option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false"/>
<option name="OPTION_DOCUMENT_TAG_VERSION" value="false"/>
<option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="false"/>
<option name="OPTION_DEPRECATED_LIST" value="false"/>
<option name="OTHER_OPTIONS"/>
<option name="HEAP_SIZE"/>
<option name="OPEN_IN_BROWSER" value="false"/>
</component>
<component name="JUnitProjectSettings">
<option name="TEST_RUNNER" value="UI"/>
</component>
<component name="EntryPointsManager">
<entry_points/>
</component>
<component name="DataSourceManager"/>
<component name="ExportToHTMLSettings">
<option name="PRINT_LINE_NUMBERS" value="false"/>
<option name="OPEN_IN_BROWSER" value="false"/>
<option name="OUTPUT_DIRECTORY"/>
</component>
<component name="ImportConfiguration">
<option name="VENDOR"/>
<option name="RELEASE_TAG"/>
<option name="LOG_MESSAGE"/>
<option name="CHECKOUT_AFTER_IMPORT" value="true"/>
</component>
<component name="ProjectModuleManager">
<modules>
<!-- module filepath="$$PROJECT_DIR$$/${pom.artifactId}.iml"/ -->
<module filepath="$PROJECT_DIR$/dhp-build.iml"/>
<module filepath="$PROJECT_DIR$/dhp-code-style/dhp-code-style.iml"/>
<module filepath="$PROJECT_DIR$/dhp-build-assembly-resources/dhp-build-assembly-resources.iml"/>
<module filepath="$PROJECT_DIR$/dhp-build-properties-maven-plugin/dhp-build-properties-maven-plugin.iml"/>
</modules>
</component>
<UsedPathMacros>
<!--<macro name="cargo"></macro>-->
</UsedPathMacros>
</project>

418
dhp-build/dhp-build.iws Normal file
View File

@ -0,0 +1,418 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project version="4" relativePaths="false">
<component name="LvcsProjectConfiguration">
<option name="ADD_LABEL_ON_PROJECT_OPEN" value="true"/>
<option name="ADD_LABEL_ON_PROJECT_COMPILATION" value="true"/>
<option name="ADD_LABEL_ON_FILE_PACKAGE_COMPILATION" value="true"/>
<option name="ADD_LABEL_ON_PROJECT_MAKE" value="true"/>
<option name="ADD_LABEL_ON_RUNNING" value="true"/>
<option name="ADD_LABEL_ON_DEBUGGING" value="true"/>
<option name="ADD_LABEL_ON_UNIT_TEST_PASSED" value="true"/>
<option name="ADD_LABEL_ON_UNIT_TEST_FAILED" value="true"/>
</component>
<component name="PropertiesComponent">
<property name="MemberChooser.copyJavadoc" value="false"/>
<property name="GoToClass.includeLibraries" value="false"/>
<property name="MemberChooser.showClasses" value="true"/>
<property name="MemberChooser.sorted" value="false"/>
<property name="GoToFile.includeJavaFiles" value="false"/>
<property name="GoToClass.toSaveIncludeLibraries" value="false"/>
</component>
<component name="ToolWindowManager">
<frame x="-4" y="-4" width="1032" height="746" extended-state="6"/>
<editor active="false"/>
<layout>
<window_info id="CVS" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="7"/>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="0"/>
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="1"/>
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="6"/>
<window_info id="Aspects" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="2"/>
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="4"/>
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="sliding" type="sliding" visible="false" weight="0.4" order="0"/>
<window_info id="Web" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="0"/>
<window_info id="EJB" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="3"/>
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="5"/>
</layout>
</component>
<component name="ErrorTreeViewConfiguration">
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="HIDE_WARNINGS" value="false"/>
</component>
<component name="StructureViewFactory">
<option name="SORT_MODE" value="0"/>
<option name="GROUP_INHERITED" value="true"/>
<option name="AUTOSCROLL_MODE" value="true"/>
<option name="SHOW_FIELDS" value="true"/>
<option name="AUTOSCROLL_FROM_SOURCE" value="false"/>
<option name="GROUP_GETTERS_AND_SETTERS" value="true"/>
<option name="SHOW_INHERITED" value="false"/>
<option name="HIDE_NOT_PUBLIC" value="false"/>
</component>
<component name="ProjectViewSettings">
<navigator currentView="ProjectPane" flattenPackages="false" showMembers="false" showStructure="false" autoscrollToSource="false" splitterProportion="0.5"/>
<view id="ProjectPane">
<expanded_node type="directory" url="file://$PROJECT_DIR$"/>
</view>
<view id="SourcepathPane"/>
<view id="ClasspathPane"/>
</component>
<component name="Commander">
<leftPanel view="Project"/>
<rightPanel view="Project"/>
<splitter proportion="0.5"/>
</component>
<component name="AspectsView"/>
<component name="SelectInManager"/>
<component name="HierarchyBrowserManager">
<option name="SHOW_PACKAGES" value="false"/>
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="SORT_ALPHABETICALLY" value="false"/>
</component>
<component name="TodoView" selected-index="0">
<todo-panel id="selected-file">
<are-packages-shown value="false"/>
<flatten-packages value="false"/>
<is-autoscroll-to-source value="true"/>
</todo-panel>
<todo-panel id="all">
<are-packages-shown value="true"/>
<flatten-packages value="false"/>
<is-autoscroll-to-source value="true"/>
</todo-panel>
</component>
<component name="editorManager"/>
<component name="editorHistoryManager"/>
<component name="DaemonCodeAnalyzer">
<disable_hints/>
</component>
<component name="InspectionManager">
<option name="AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="SPLITTER_PROPORTION" value="0.5"/>
<profile name="Default"/>
</component>
<component name="BookmarkManager"/>
<component name="DebuggerManager">
<line_breakpoints/>
<exception_breakpoints>
<breakpoint_any>
<option name="NOTIFY_CAUGHT" value="true"/>
<option name="NOTIFY_UNCAUGHT" value="true"/>
<option name="ENABLED" value="false"/>
<option name="SUSPEND_VM" value="true"/>
<option name="COUNT_FILTER_ENABLED" value="false"/>
<option name="COUNT_FILTER" value="0"/>
<option name="CONDITION_ENABLED" value="false"/>
<option name="CONDITION"/>
<option name="LOG_ENABLED" value="false"/>
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
<option name="LOG_MESSAGE"/>
<option name="CLASS_FILTERS_ENABLED" value="false"/>
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
<option name="SUSPEND_POLICY" value="SuspendAll"/>
</breakpoint_any>
</exception_breakpoints>
<field_breakpoints/>
<method_breakpoints/>
</component>
<component name="DebuggerSettings">
<option name="TRACING_FILTERS_ENABLED" value="true"/>
<option name="TOSTRING_CLASSES_ENABLED" value="false"/>
<option name="VALUE_LOOKUP_DELAY" value="700"/>
<option name="DEBUGGER_TRANSPORT" value="0"/>
<option name="FORCE_CLASSIC_VM" value="true"/>
<option name="HIDE_DEBUGGER_ON_PROCESS_TERMINATION" value="false"/>
<option name="SKIP_SYNTHETIC_METHODS" value="true"/>
<option name="SKIP_CONSTRUCTORS" value="false"/>
<option name="STEP_THREAD_SUSPEND_POLICY" value="SuspendThread"/>
<default_breakpoint_settings>
<option name="NOTIFY_CAUGHT" value="true"/>
<option name="NOTIFY_UNCAUGHT" value="true"/>
<option name="WATCH_MODIFICATION" value="true"/>
<option name="WATCH_ACCESS" value="true"/>
<option name="WATCH_ENTRY" value="true"/>
<option name="WATCH_EXIT" value="true"/>
<option name="ENABLED" value="true"/>
<option name="SUSPEND_VM" value="true"/>
<option name="COUNT_FILTER_ENABLED" value="false"/>
<option name="COUNT_FILTER" value="0"/>
<option name="CONDITION_ENABLED" value="false"/>
<option name="CONDITION"/>
<option name="LOG_ENABLED" value="false"/>
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
<option name="LOG_MESSAGE"/>
<option name="CLASS_FILTERS_ENABLED" value="false"/>
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
<option name="SUSPEND_POLICY" value="SuspendAll"/>
</default_breakpoint_settings>
<filter>
<option name="PATTERN" value="com.sun.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="java.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="javax.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="org.omg.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="sun.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="junit.*"/>
<option name="ENABLED" value="true"/>
</filter>
</component>
<component name="CompilerWorkspaceConfiguration">
<option name="COMPILE_IN_BACKGROUND" value="false"/>
<option name="AUTO_SHOW_ERRORS_IN_EDITOR" value="true"/>
</component>
<component name="RunManager">
<activeType name="Application"/>
<configuration selected="false" default="true" type="Applet" factoryName="Applet">
<module name=""/>
<option name="MAIN_CLASS_NAME"/>
<option name="HTML_FILE_NAME"/>
<option name="HTML_USED" value="false"/>
<option name="WIDTH" value="400"/>
<option name="HEIGHT" value="300"/>
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy"/>
<option name="VM_PARAMETERS"/>
</configuration>
<configuration selected="false" default="true" type="Remote" factoryName="Remote">
<option name="USE_SOCKET_TRANSPORT" value="true"/>
<option name="SERVER_MODE" value="false"/>
<option name="SHMEM_ADDRESS" value="javadebug"/>
<option name="HOST" value="localhost"/>
<option name="PORT" value="5005"/>
</configuration>
<configuration selected="false" default="true" type="Application" factoryName="Application">
<option name="MAIN_CLASS_NAME"/>
<option name="VM_PARAMETERS"/>
<option name="PROGRAM_PARAMETERS"/>
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
<module name=""/>
</configuration>
<configuration selected="false" default="true" type="JUnit" factoryName="JUnit">
<module name=""/>
<option name="PACKAGE_NAME"/>
<option name="MAIN_CLASS_NAME"/>
<option name="METHOD_NAME"/>
<option name="TEST_OBJECT" value="class"/>
<option name="VM_PARAMETERS"/>
<option name="PARAMETERS"/>
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
<option name="ADDITIONAL_CLASS_PATH"/>
<option name="TEST_SEARCH_SCOPE">
<value defaultName="wholeProject"/>
</option>
</configuration>
</component>
<component name="VcsManagerConfiguration">
<option name="ACTIVE_VCS_NAME" value="git"/>
<option name="STATE" value="0"/>
</component>
<component name="VssConfiguration">
<CheckoutOptions>
<option name="COMMENT" value=""/>
<option name="DO_NOT_GET_LATEST_VERSION" value="false"/>
<option name="REPLACE_WRITABLE" value="false"/>
<option name="RECURSIVE" value="false"/>
</CheckoutOptions>
<CheckinOptions>
<option name="COMMENT" value=""/>
<option name="KEEP_CHECKED_OUT" value="false"/>
<option name="RECURSIVE" value="false"/>
</CheckinOptions>
<AddOptions>
<option name="COMMENT" value=""/>
<option name="STORE_ONLY_LATEST_VERSION" value="false"/>
<option name="CHECK_OUT_IMMEDIATELY" value="false"/>
<option name="FILE_TYPE" value="0"/>
</AddOptions>
<UndocheckoutOptions>
<option name="MAKE_WRITABLE" value="false"/>
<option name="REPLACE_LOCAL_COPY" value="0"/>
<option name="RECURSIVE" value="false"/>
</UndocheckoutOptions>
<DiffOptions>
<option name="IGNORE_WHITE_SPACE" value="false"/>
<option name="IGNORE_CASE" value="false"/>
</DiffOptions>
<GetOptions>
<option name="REPLACE_WRITABLE" value="0"/>
<option name="MAKE_WRITABLE" value="false"/>
<option name="RECURSIVE" value="false"/>
</GetOptions>
<option name="CLIENT_PATH" value=""/>
<option name="SRCSAFEINI_PATH" value=""/>
<option name="USER_NAME" value=""/>
<option name="PWD" value=""/>
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
<option name="SHOW_ADD_OPTIONS" value="true"/>
<option name="SHOW_UNDOCHECKOUT_OPTIONS" value="true"/>
<option name="SHOW_DIFF_OPTIONS" value="true"/>
<option name="SHOW_GET_OPTIONS" value="true"/>
<option name="USE_EXTERNAL_DIFF" value="false"/>
<option name="EXTERNAL_DIFF_PATH" value=""/>
<option name="REUSE_LAST_COMMENT" value="false"/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
</component>
<component name="CheckinPanelState"/>
<component name="WebViewSettings">
<webview flattenPackages="false" showMembers="false" autoscrollToSource="false"/>
</component>
<component name="EjbViewSettings">
<EjbView showMembers="false" autoscrollToSource="false"/>
</component>
<component name="AppServerRunManager"/>
<component name="StarteamConfiguration">
<option name="SERVER" value=""/>
<option name="PORT" value="49201"/>
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="PROJECT" value=""/>
<option name="VIEW" value=""/>
<option name="ALTERNATIVE_WORKING_PATH" value=""/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
</component>
<component name="Cvs2Configuration">
<option name="ON_FILE_ADDING" value="0"/>
<option name="ON_FILE_REMOVING" value="0"/>
<option name="PRUNE_EMPTY_DIRECTORIES" value="true"/>
<option name="SHOW_UPDATE_OPTIONS" value="true"/>
<option name="SHOW_ADD_OPTIONS" value="true"/>
<option name="SHOW_REMOVE_OPTIONS" value="true"/>
<option name="MERGING_MODE" value="0"/>
<option name="MERGE_WITH_BRANCH1_NAME" value="HEAD"/>
<option name="MERGE_WITH_BRANCH2_NAME" value="HEAD"/>
<option name="RESET_STICKY" value="false"/>
<option name="CREATE_NEW_DIRECTORIES" value="true"/>
<option name="DEFAULT_TEXT_FILE_SUBSTITUTION" value="kv"/>
<option name="PROCESS_UNKNOWN_FILES" value="false"/>
<option name="PROCESS_DELETED_FILES" value="false"/>
<option name="SHOW_EDIT_DIALOG" value="true"/>
<option name="RESERVED_EDIT" value="false"/>
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6"/>
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
<option name="CHECKOUT_DATE_OR_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="UPDATE_DATE_OR_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="SHOW_CHANGES_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="SHOW_OUTPUT" value="false"/>
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false"/>
<option name="UPDATE_GROUP_BY_PACKAGES" value="false"/>
<option name="ADD_WATCH_INDEX" value="0"/>
<option name="REMOVE_WATCH_INDEX" value="0"/>
<option name="UPDATE_KEYWORD_SUBSTITUTION"/>
<option name="MAKE_NEW_FILES_READONLY" value="false"/>
<option name="SHOW_CORRUPTED_PROJECT_FILES" value="0"/>
<option name="TAG_AFTER_FILE_COMMIT" value="false"/>
<option name="TAG_AFTER_FILE_COMMIT_NAME" value=""/>
<option name="TAG_AFTER_PROJECT_COMMIT" value="false"/>
<option name="TAG_AFTER_PROJECT_COMMIT_NAME" value=""/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="FORCE_NON_EMPTY_COMMENT" value="false"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
</component>
<component name="CvsTabbedWindow"/>
<component name="SvnConfiguration">
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="AUTO_ADD_FILES" value="0"/>
<option name="AUTO_DEL_FILES" value="0"/>
</component>
<component name="PerforceConfiguration">
<option name="PORT" value="magic:1666"/>
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="CLIENT" value=""/>
<option name="TRACE" value="false"/>
<option name="PERFORCE_STATUS" value="true"/>
<option name="CHANGELIST_OPTION" value="false"/>
<option name="SYSTEMROOT" value=""/>
<option name="P4_EXECUTABLE" value="p4"/>
<option name="SHOW_BRANCH_HISTORY" value="false"/>
<option name="GENERATE_COMMENT" value="false"/>
<option name="SYNC_OPTION" value="Sync"/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="FORCE_NON_EMPTY_COMMENT" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
</component>
</project>

View File

@ -0,0 +1,48 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>eu.dnetlib</groupId>
<artifactId>dhp-code-style</artifactId>
<version>4.1.13-SNAPSHOT</version>
<packaging>jar</packaging>
<description>This module contains resources supporting common code style conventions</description>
<distributionManagement>
<snapshotRepository>
<id>dnet45-snapshots</id>
<name>DNet45 Snapshots</name>
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots</url>
<layout>default</layout>
</snapshotRepository>
<repository>
<id>dnet45-releases</id>
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
</repository>
</distributionManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
</plugins>
</pluginManagement>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
</project>

View File

@ -0,0 +1,252 @@
<?xml version="1.0" encoding="UTF-8"?>
<profiles version="10">
<profile kind="CodeFormatterProfile" name="Android" version="10">
<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="120"/>
<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="100"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="tab"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="false"/>
</profile>
</profiles>

View File

@ -0,0 +1,727 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<profiles version="18">
<profile kind="CodeFormatterProfile" name="Android_custom" version="18">
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_for_statment" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_logical_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_method_invocation" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_arrow_in_switch_default" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.align_with_spaces" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_before_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_switch_case_expressions" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_end_of_method_body" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_if_while_statement" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.count_line_length_from_starting_position" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_arrow_in_switch_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_multiplicative_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameterized_type_references" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_logical_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_annotation_declaration_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_enum_constant" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_multiplicative_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.align_tags_descriptions_grouped" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="120"/>
<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_method_body_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_loop_body_block_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_abstract_method" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.keep_enum_constant_declaration_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.align_variable_declarations_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_type_declaration_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_catch_clause" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_additive_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_relational_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiplicative_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.keep_anonymous_type_declaration_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_switch_case_expressions" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_shift_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_lambda_body" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_end_of_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_bitwise_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_type_parameters" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="32"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_loops" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_for_body_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_relational_operator" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_annotation" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_additive_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_string_concatenation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.text_block_indentation" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_module_statements" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_after_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.comment.align_tags_names_descriptions" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.keep_if_then_body_block_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.align_assignment_statements_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_arrow_in_switch_default" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_between_different_tags" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression_chain" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_additive_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_conditional_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_shift_operator" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.align_fields_grouping_blank_lines" value="2147483647"/>
<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_bitwise_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_try_clause" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="48"/>
<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_code_block_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_bitwise_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_assignment_operator" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_not_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_method_delcaration" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.keep_lambda_body_block_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_type_arguments" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="48"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_arrow_in_switch_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_logical_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_bitwise_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_relational_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_tag_description" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_string_concatenation" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_last_class_body_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_while_body_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_logical_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_shift_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_statement_group_in_switch" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_lambda_declaration" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_shift_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_do_while_body_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_enum_declaration_on_one_line" value="one_line_never"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_multiplicative_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_for_loop_header" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_additive_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_getter_setter_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_string_concatenation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="tab"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_relational_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_string_concatenation" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
</profile>
<profile kind="CodeFormatterProfile" name="Dnet" version="18">
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_for_statment" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_logical_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_method_invocation" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_switch_statement" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_enum_constant_declaration" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_arrow_in_switch_default" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.align_with_spaces" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_before_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_switch_case_expressions" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_end_of_method_body" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_if_while_statement" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.count_line_length_from_starting_position" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_arrow_in_switch_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_multiplicative_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameterized_type_references" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_logical_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_annotation_declaration_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_enum_constant" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_multiplicative_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.align_tags_descriptions_grouped" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="140"/>
<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_method_body_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_loop_body_block_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_abstract_method" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.keep_enum_constant_declaration_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.align_variable_declarations_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_type_declaration_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_catch_clause" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_additive_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_relational_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiplicative_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.keep_anonymous_type_declaration_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_switch_case_expressions" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_shift_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_lambda_body" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_end_of_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_bitwise_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_type_parameters" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_loops" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_for_body_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_relational_operator" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_annotation" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_additive_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_string_concatenation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.text_block_indentation" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_module_statements" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_after_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.comment.align_tags_names_descriptions" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.keep_if_then_body_block_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.align_assignment_statements_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_arrow_in_switch_default" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_between_different_tags" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression_chain" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_additive_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_conditional_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_shift_operator" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.align_fields_grouping_blank_lines" value="2147483647"/>
<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_bitwise_operator" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_try_clause" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_code_block_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_bitwise_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_assignment_operator" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_not_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_method_delcaration" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.keep_lambda_body_block_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_type_arguments" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_arrow_in_switch_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_logical_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_bitwise_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_relational_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_tag_description" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_string_concatenation" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_last_class_body_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_while_body_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_logical_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_shift_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_statement_group_in_switch" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.parentheses_positions_in_lambda_declaration" value="common_lines"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_shift_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_do_while_body_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_enum_declaration_on_one_line" value="one_line_if_empty"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_multiplicative_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_for_loop_header" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_additive_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.keep_simple_getter_setter_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_string_concatenation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_code_block" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="tab"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_relational_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_string_concatenation" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="160"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
</profile>
</profiles>

View File

@ -0,0 +1,337 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<profiles version="13">
<profile kind="CodeFormatterProfile" name="GoogleStyle" version="13">
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.disabling_tag" value="@formatter:off"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments.count_dependent" value="16|-1|16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.use_on_off_tags" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_prefer_two_fragments" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_comment_inline_tags" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_local_variable_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter" value="1040"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type.count_dependent" value="1585|-1|1585"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields.count_dependent" value="16|-1|16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression.count_dependent" value="16|4|80"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration.count_dependent" value="16|4|48"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration.count_dependent" value="16|4|49"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_cascading_method_invocation_with_arguments" value="16"/>
<setting id="org.eclipse.jdt.core.compiler.source" value="1.7"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration.count_dependent" value="16|4|48"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_local_variable_annotation" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants.count_dependent" value="16|5|48"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="100"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation.count_dependent" value="16|4|48"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="4"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.enabling_tag" value="@formatter:on"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package" value="1585"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="16"/>
<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_type_annotation" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_field_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_method_declaration" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment_new_line_at_start_of_html_paragraph" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comment_prefix" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_parameter_annotation" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method" value="1585"/>
<setting id="org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="2"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation.count_dependent" value="16|5|80"/>
<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_parameter.count_dependent" value="1040|-1|1040"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_package.count_dependent" value="1585|-1|1585"/>
<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.force_if_else_statement_brace" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="3"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_package_annotation" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation.count_dependent" value="16|-1|16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_type" value="1585"/>
<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.7"/>
<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_new_anonymous_class" value="20"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable.count_dependent" value="1585|-1|1585"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field.count_dependent" value="1585|-1|1585"/>
<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration.count_dependent" value="16|5|80"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type" value="insert"/>
<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_label" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant.count_dependent" value="16|-1|16"/>
<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="100"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="2"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_field" value="1585"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer.count_dependent" value="16|5|80"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.7"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_resources_in_try" value="80"/>
<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration.count_dependent" value="16|4|48"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_method.count_dependent" value="1585|-1|1585"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression.count_dependent" value="16|-1|16"/>
<setting id="org.eclipse.jdt.core.formatter.wrap_non_simple_member_annotation" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_annotations_on_local_variable" value="1585"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call.count_dependent" value="16|5|80"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_generic_type_arguments.count_dependent" value="16|-1|16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression.count_dependent" value="16|5|80"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration.count_dependent" value="16|5|80"/>
<setting id="org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.alignment_for_for_statement" value="16"/>
<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
</profile>
</profiles>

21
dhp-build/pom.xml Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-dedup</artifactId>
<version>4.1.13-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>dhp-build</artifactId>
<packaging>pom</packaging>
<description>This module is a container for the build tools used in dnet-hadoop</description>
<modules>
<module>dhp-code-style</module>
<module>dhp-build-assembly-resources</module>
<module>dhp-build-properties-maven-plugin</module>
</modules>
</project>

View File

@ -0,0 +1,6 @@
useTree = true
entitiesPath = /user/michele.debonis/lda_experiments/authors_pubmed
workingPath = /user/michele.debonis/authors_dedup/gt2_dedup
numPartitions = 1000
dedupConfPath = /user/michele.debonis/lda_experiments/authors.fdup.gt2.conf.json
groundTruthFieldJPath = $.orcid

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-dedup</artifactId>
<version>4.0.3</version>
<version>4.1.13-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
@ -15,32 +15,6 @@
<build>
<plugins>
<!-- <plugin>-->
<!-- <groupId>org.apache.maven.plugins</groupId>-->
<!-- <artifactId>maven-shade-plugin</artifactId>-->
<!-- <version>2.4.3</version>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <phase>package</phase>-->
<!-- <goals>-->
<!-- <goal>shade</goal>-->
<!-- </goals>-->
<!-- <configuration>-->
<!-- <filters>-->
<!-- <filter>-->
<!-- <artifact>*:*</artifact>-->
<!-- <excludes>-->
<!-- <exclude>META-INF/*.SF</exclude>-->
<!-- <exclude>META-INF/*.DSA</exclude>-->
<!-- <exclude>META-INF/*.RSA</exclude>-->
<!-- </excludes>-->
<!-- </filter>-->
<!-- </filters>-->
<!-- </configuration>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
@ -60,10 +34,6 @@
<includes>
<include>**/*.java</include>
</includes>
<!--<includes>-->
<!--<include>src/main/java/**/*.java</include>-->
<!--<include>src/main/java/**/*.scala</include>-->
<!--</includes>-->
</configuration>
</plugin>
@ -71,14 +41,6 @@
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>4.0.1</version>
<!--<executions>-->
<!--<execution>-->
<!--<goals>-->
<!--<goal>compile</goal>-->
<!--<goal>testCompile</goal>-->
<!--</goals>-->
<!--</execution>-->
<!--</executions>-->
<executions>
<execution>
<id>scala-compile-first</id>
@ -107,6 +69,11 @@
<dependencies>
<dependency>
<groupId>edu.cmu</groupId>
<artifactId>secondstring</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-pace-core</artifactId>
@ -129,21 +96,477 @@
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-xml</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-jsonSchema</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>
<dependency>
<groupId>org.reflections</groupId>
<artifactId>reflections</artifactId>
</dependency>
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<profiles>
<profile>
<id>oozie-package</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.4.1</version>
<executions>
<execution>
<id>enforce-connection-properties-file-existence</id>
<phase>initialize</phase>
<goals>
<goal>enforce</goal>
</goals>
<configuration>
<rules>
<requireFilesExist>
<files>
<file>${dhpConnectionProperties}</file>
</files>
<message>
The file with connection properties could not be found. Please, create the ${dhpConnectionProperties} file or set the location to another already created file by using
-DdhpConnectionProperties property.
</message>
</requireFilesExist>
</rules>
<fail>true</fail>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy dependencies</id>
<phase>prepare-package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<includeScope>${oozie.package.dependencies.include.scope}</includeScope>
<excludeScope>${oozie.package.dependencies.exclude.scope}</excludeScope>
<silent>true</silent>
</configuration>
</execution>
</executions>
</plugin>
<!-- Plugin originally defined in attach-test-resources It was moved here to ensure that it will execute before priming -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<id>attach-test-resources-package</id>
<phase>prepare-package</phase>
<goals>
<goal>test-jar</goal>
</goals>
<configuration>
<skip>${oozie.package.skip.test.jar}</skip>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>eu.dnetlib.primer</groupId>
<artifactId>primer-maven-plugin</artifactId>
<version>1.2.0</version>
<executions>
<execution>
<id>priming</id>
<phase>prepare-package</phase>
<goals>
<goal>prime</goal>
</goals>
<configuration>
<classProviderFiles>
<classProviderFile>${project.build.directory}/dependency/*.jar</classProviderFile>
<classProviderFile>${project.build.directory}/*-tests.jar</classProviderFile>
<classProviderFile>${project.build.directory}/classes</classProviderFile>
</classProviderFiles>
<coansysPackageDir>${project.build.directory}/dependency</coansysPackageDir>
<destination>${project.build.directory}/${primed.dir}</destination>
<classpath>${workflow.source.dir}</classpath>
</configuration>
</execution>
</executions>
</plugin>
<!-- reading job.properties to use them in .sh scripts -->
<plugin>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<version>${properties.maven.plugin.version}</version>
<dependencies>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dhp-build-assembly-resources</artifactId>
<version>${project.version}</version>
<!-- contains project-default.properties -->
</dependency>
</dependencies>
<executions>
<execution>
<id>reading-dhp-properties</id>
<phase>initialize</phase>
<goals>
<goal>read-project-properties</goal>
</goals>
<configuration>
<locations>
<location>${dhpConnectionProperties}</location>
</locations>
<quiet>false</quiet>
</configuration>
</execution>
<execution>
<id>read-default-properties</id>
<phase>prepare-package</phase>
<goals>
<goal>read-project-properties</goal>
</goals>
<configuration>
<locations>
<location>classpath:project-default.properties</location>
</locations>
<quiet>true</quiet>
</configuration>
</execution>
<execution>
<id>read-job-properties</id>
<phase>prepare-package</phase>
<goals>
<goal>read-project-properties</goal>
</goals>
<configuration>
<locations>
<param>${project.build.directory}/${primed.dir}/job.properties</param>
<param>job-override.properties</param>
</locations>
<quiet>true</quiet>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>eu.dnetlib</groupId>
<artifactId>dhp-build-properties-maven-plugin</artifactId>
<version>${project.version}</version>
<executions>
<execution>
<phase>validate</phase>
<goals>
<goal>generate-properties</goal>
<!-- generates sandboxName based on workflow.source.dir when not specified as commandline parameter -->
</goals>
<configuration>
</configuration>
</execution>
<execution>
<id>write-job-properties</id>
<phase>prepare-package</phase>
<goals>
<goal>write-project-properties</goal>
</goals>
<configuration>
<outputFile>target/${oozie.package.file.name}/job.properties</outputFile>
<!-- notice: dots are not allowed for job.properties! -->
<include>nameNode,jobTracker,queueName,importerQueueName,oozieLauncherQueueName,
workingDir,oozieTopWfApplicationPath,oozieServiceLoc,
sparkDriverMemory,sparkExecutorMemory,sparkExecutorCores,
oozie.wf.application.path,projectVersion,oozie.use.system.libpath,
oozieActionShareLibForSpark1,spark1YarnHistoryServerAddress,spark1EventLogDir,
oozieActionShareLibForSpark2,spark2YarnHistoryServerAddress,spark2EventLogDir,
sparkSqlWarehouseDir
</include>
<includeSystemProperties>true</includeSystemProperties>
<includePropertyKeysFromFiles>
<!-- <param>${workflow.source.dir}/job.properties</param> -->
<param>${project.build.directory}/${primed.dir}/job.properties</param>
<param>job-override.properties</param>
</includePropertyKeysFromFiles>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<version>2.1.11</version>
<executions>
<execution>
<goals>
<goal>revision</goal>
</goals>
</execution>
</executions>
<configuration>
<verbose>true</verbose>
<dateFormat>yyyy-MM-dd'T'HH:mm:ssZ</dateFormat>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<generateGitPropertiesFilename>target/${oozie.package.file.name}/${oozieAppDir}/version.properties</generateGitPropertiesFilename>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<dependencies>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dhp-build-assembly-resources</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>assembly-oozie-installer</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<finalName>${oozie.package.file.name}_shell_scripts</finalName>
<descriptorRefs>
<descriptorRef>oozie-installer</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- this plugin prepares oozie installer package-->
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<!-- extracting shared resources phase -->
<execution>
<id>installer-copy-custom</id>
<phase>process-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<tasks>
<property name="assembly-resources.loc" value="${maven.dependency.eu.dnetlib.dhp.dhp-build-assembly-resources.jar.path}" />
<unjar src="${assembly-resources.loc}" dest="${project.build.directory}/assembly-resources" />
</tasks>
</configuration>
</execution>
<!-- packaging phase -->
<execution>
<phase>package</phase>
<configuration>
<tasks>
<!-- copying workflow resources -->
<mkdir dir="target/${oozie.package.file.name}" />
<mkdir dir="target/${oozie.package.file.name}/${oozieAppDir}" />
<copy todir="target/${oozie.package.file.name}/${oozieAppDir}">
<!-- <fileset dir="${workflow.source.dir}/${oozieAppDir}" /> replacing with primed dir location -->
<fileset dir="target/${primed.dir}/${oozieAppDir}" />
</copy>
<!-- copying all jars to oozie lib directory -->
<mkdir dir="target/${oozie.package.file.name}/${oozieAppDir}/lib" />
<copy todir="target/${oozie.package.file.name}/${oozieAppDir}/lib">
<fileset dir="${project.build.directory}/dependency" />
</copy>
<!-- copying current module lib -->
<copy todir="target/${oozie.package.file.name}/${oozieAppDir}/lib">
<fileset dir="${project.build.directory}">
<include name="*.jar" />
</fileset>
</copy>
<fixcrlf srcdir="target/${oozie.package.file.name}/${oozieAppDir}/" encoding="UTF-8" outputencoding="UTF-8" includes="**/*.sh,**/*.json,**/*.py,**/*.sql" eol="lf" />
<!-- creating tar.gz package -->
<tar destfile="target/${oozie.package.file.name}.tar.gz" compression="gzip" longfile="gnu">
<tarfileset dir="target/${oozie.package.file.name}" />
<tarfileset dir="target/${oozie.package.file.name}_shell_scripts" filemode="0755">
<include name="**/*.sh" />
</tarfileset>
<tarfileset dir="target/${oozie.package.file.name}_shell_scripts" filemode="0644">
<exclude name="**/*.sh" />
</tarfileset>
</tar>
<!-- cleanup -->
<delete dir="target/${oozie.package.file.name}" />
<delete dir="target/${oozie.package.file.name}_shell_scripts" />
</tasks>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>deploy</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.5.0</version>
<executions>
<execution>
<id>create-target-dir</id>
<phase>package</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>ssh</executable>
<arguments>
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<argument>rm -rf ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; mkdir -p ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/</argument>
</arguments>
</configuration>
</execution>
<execution>
<id>upload-oozie-package</id>
<phase>package</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>scp</executable>
<arguments>
<argument>-P ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<argument>target/${oozie.package.file.name}.tar.gz</argument>
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}:${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/${oozie.package.file.name}.tar.gz</argument>
</arguments>
</configuration>
</execution>
<execution>
<id>extract-and-upload-to-hdfs</id>
<phase>package</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>ssh</executable>
<!-- <outputFile>target/redirected_upload.log</outputFile> -->
<arguments>
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument>tar -zxf oozie-package.tar.gz; </argument>
<argument>rm ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/oozie-package.tar.gz; </argument>
<argument>./upload_workflow.sh</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>run</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.5.0</version>
<executions>
<execution>
<id>run-job</id>
<phase>package</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>ssh</executable>
<!-- this file will be used by test verification profile reading job identifier -->
<outputFile>${oozie.execution.log.file.location}</outputFile>
<arguments>
<argument>${dhp.hadoop.frontend.user.name}@${dhp.hadoop.frontend.host.name}</argument>
<argument>-p ${dhp.hadoop.frontend.port.ssh}</argument>
<argument>-o StrictHostKeyChecking=no</argument>
<argument>cd ${dhp.hadoop.frontend.temp.dir}/oozie-packages/${sandboxName}/${output.dir.name}/; </argument>
<argument>./run_workflow.sh</argument>
</arguments>
</configuration>
</execution>
<execution>
<id>show-run-log-on-stdout</id>
<phase>package</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>cat</executable>
<arguments>
<argument>${oozie.execution.log.file.location}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -1,10 +1,10 @@
package eu.dnetlib;
import com.google.common.hash.Hashing;
import eu.dnetlib.graph.GraphProcessor;
import eu.dnetlib.graph.JavaGraphProcessor;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.BlockProcessor;
import eu.dnetlib.pace.util.BlockProcessorForTesting;
import eu.dnetlib.pace.util.MapDocumentUtil;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.reporter.SparkReporter;
@ -19,7 +19,6 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.graphx.Edge;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
@ -28,6 +27,7 @@ import org.apache.spark.util.LongAccumulator;
import scala.Serializable;
import scala.Tuple2;
import java.nio.charset.Charset;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
@ -57,21 +57,20 @@ public class Deduper implements Serializable {
.reduceByKey((b1, b2) -> Block.from(b1, b2, of, maxQueueSize));
}
public static Iterator<Tuple2<String, String>> ccToMergeRel(ConnectedComponent cc, DedupConfig dedupConf) {
return cc
.getDocs()
public static Iterator<Tuple2<String, String>> ccToMergeRel(Tuple2<String, List<String>> cc, DedupConfig dedupConf) {
return cc._2()
.stream()
.flatMap(
id -> {
List<Tuple2<String, String>> tmp = new ArrayList<>();
tmp.add(new Tuple2<>(cc.getCcId(), id));
tmp.add(new Tuple2<>(cc._1(), id));
return tmp.stream();
})
.iterator();
}
public static long hash(final String id) {
return Hashing.murmur3_128().hashString(id).asLong();
return Hashing.murmur3_128().hashString(id, Charset.defaultCharset()).asLong();
}
public static ConnectedComponent entityMerger(String key, Iterator<String> values) {
@ -79,20 +78,20 @@ public class Deduper implements Serializable {
ConnectedComponent cc = new ConnectedComponent();
cc.setCcId(key);
cc.setDocs(StreamSupport.stream(Spliterators.spliteratorUnknownSize(values, Spliterator.ORDERED), false)
.collect(Collectors.toSet()));
.collect(Collectors.toCollection(HashSet::new)));
return cc;
}
public static JavaRDD<Relation> computeRelations(
JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config) {
JavaSparkContext context, JavaPairRDD<String, Block> blocks, DedupConfig config, boolean useTree, boolean noMatch) {
Map<String, LongAccumulator> accumulators = Utility.constructAccumulator(config, context.sc());
return blocks
.flatMapToPair(
it -> {
final SparkReporter reporter = new SparkReporter(accumulators);
new BlockProcessor(config)
.processSortedBlock(it._1(), it._2().getDocuments(), reporter);
new BlockProcessorForTesting(config)
.processSortedBlock(it._1(), it._2().getDocuments(), reporter, useTree, noMatch);
return reporter.getRelations().iterator();
})
.mapToPair(it -> new Tuple2<>(it._1() + it._2(), new Relation(it._1(), it._2(), "simRel")))
@ -100,7 +99,7 @@ public class Deduper implements Serializable {
.map(Tuple2::_2);
}
public static void createSimRels(DedupConfig dedupConf, SparkSession spark, String entitiesPath, String simRelsPath){
public static void createSimRels(DedupConfig dedupConf, SparkSession spark, String entitiesPath, String simRelsPath, boolean useTree, boolean noMatch){
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
@ -116,7 +115,7 @@ public class Deduper implements Serializable {
JavaPairRDD<String, Block> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConf);
// create relations by comparing only elements in the same group
JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConf);
JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConf, useTree, noMatch);
// save the simrel in the workingdir
spark
@ -137,29 +136,30 @@ public class Deduper implements Serializable {
.map(s -> MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), s))
.mapToPair((PairFunction<String, Object, String>) s -> new Tuple2<>(hash(s), s));
final RDD<Edge<String>> edgeRdd = spark
final JavaRDD<Edge<String>> edgeRdd = spark
.read()
.load(simRelsPath)
.as(Encoders.bean(Relation.class))
.javaRDD()
.map(Relation::toEdgeRdd)
.rdd();
.map(Relation::toEdgeRdd);
JavaPairRDD<String, List<String>> ccs = JavaGraphProcessor
.findCCs(vertexes, edgeRdd, dedupConf.getWf().getMaxIterations());
JavaRDD<Relation> mergeRel = ccs
.filter(cc -> cc._2().size() > 1)
.flatMap(cc -> Deduper.ccToMergeRel(cc, dedupConf))
.map(it -> new Relation(it._1(), it._2(), "mergeRel"));
final Dataset<Relation> mergeRels = spark
.createDataset(
GraphProcessor
.findCCs(vertexes.rdd(), edgeRdd, maxIterations)
.toJavaRDD()
.filter(k -> k.getDocs().size() > 1)
.flatMap(cc -> ccToMergeRel(cc, dedupConf))
.map(it -> new Relation(it._1(), it._2(), "mergeRel"))
.rdd(),
mergeRel.rdd(),
Encoders.bean(Relation.class));
mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelsPath);
}
public static void createDedupEntity(DedupConfig dedupConf, String mergeRelsPath, String entitiesPath, SparkSession spark, String dedupEntityPath){
public static void createDedupEntity(DedupConfig dedupConf, String simRelsPath, String mergeRelsPath, String entitiesPath, SparkSession spark, String dedupEntityPath){
JavaPairRDD<String, String> entities = spark
.read()
@ -170,7 +170,15 @@ public class Deduper implements Serializable {
.toJavaRDD()
.mapToPair(t -> t);
// <source, target>: source is the dedup_id, target is the id of the mergedIn
// <source_raw_id, relation(source, target)>
JavaPairRDD<String, Relation> simRels = spark
.read()
.load(simRelsPath)
.as(Encoders.bean(Relation.class))
.toJavaRDD()
.mapToPair(r-> new Tuple2<>(r.getSource(), r));
// <raw_id, relation(dedup_id, raw_id)>
JavaPairRDD<String, Relation> mergeRels = spark
.read()
.load(mergeRelsPath)
@ -183,7 +191,22 @@ public class Deduper implements Serializable {
.groupByKey()
.map(t-> entityMerger(t._1(), t._2().iterator()));
dedupEntities.saveAsTextFile(dedupEntityPath);
JavaPairRDD<String, Iterable<Relation>> simRelsWithDedupId = simRels
.join(mergeRels)
.mapToPair(x -> new Tuple2<>(x._2()._2().getSource(), x._2()._1()))
.groupByKey();
JavaRDD<ConnectedComponent> groupEntity = mergeRels.join(entities)
.mapToPair(t -> new Tuple2<>(t._2()._1().getSource(), t._2()._2()))
.groupByKey()
.join(simRelsWithDedupId)
.map(x -> new ConnectedComponent(
x._1(),
x._2()._1(),
x._2()._2())
);
groupEntity.saveAsTextFile(dedupEntityPath);
}
}

View File

@ -0,0 +1,56 @@
package eu.dnetlib.graph;
import com.clearspring.analytics.util.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ConnectedComponent;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.graphx.*;
import org.apache.spark.rdd.RDD;
import org.apache.spark.storage.StorageLevel;
import scala.Tuple2;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import java.util.List;
public class JavaGraphProcessor {
//<ccId, list(json)>
public static JavaPairRDD<String, List<String>> findCCs(JavaPairRDD<Object, String> vertexes, JavaRDD<Edge<String>> edges, int maxIterations) {
ClassTag<String> stringTag = ClassTag$.MODULE$.apply(String.class);
Graph<String, String> graph =
Graph.apply(
vertexes.rdd(),
edges.rdd(),
"",
StorageLevel.MEMORY_ONLY(),
StorageLevel.MEMORY_ONLY(),
stringTag,
stringTag
);
GraphOps<String, String> graphOps = new GraphOps<>(graph, stringTag, stringTag);
JavaRDD<Tuple2<Object, Object>> cc = graphOps.connectedComponents(maxIterations).vertices().toJavaRDD();
JavaPairRDD<Object, String> joinResult = vertexes
.leftOuterJoin(cc.mapToPair(x -> x))
.mapToPair(x -> {
if (!x._2()._2().isPresent()) {
return new Tuple2<>(x._1(), x._2()._1());
} else {
return new Tuple2<>(x._2()._2(), x._2()._1());
}
});
return joinResult
.groupByKey()
.map(x -> Lists.newArrayList(x._2()))
.zipWithUniqueId()
.mapToPair(x -> new Tuple2<>("dedup______::" + x._2().toString(), x._1()));
}
}

View File

@ -0,0 +1,74 @@
package eu.dnetlib.jobs;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ArgumentApplicationParser;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.stream.Collectors;
public abstract class AbstractSparkJob implements Serializable {
protected static final int NUM_PARTITIONS = 1000;
protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
public ArgumentApplicationParser parser; // parameters for the spark action
public SparkSession spark; // the spark session
public AbstractSparkJob() {}
public AbstractSparkJob(ArgumentApplicationParser parser, SparkSession spark) {
this.parser = parser;
this.spark = spark;
}
abstract void run() throws IOException;
protected static SparkSession getSparkSession(SparkConf conf) {
return SparkSession.builder().config(conf).getOrCreate();
}
protected static <T> void save(Dataset<T> dataset, String outPath, SaveMode mode) {
dataset.write().option("compression", "gzip").mode(mode).json(outPath);
}
protected static DedupConfig loadDedupConfig(String dedupConfPath) throws IOException {
return DedupConfig.load(
readFileFromHDFS(dedupConfPath)
);
}
protected static String readFileFromHDFS(String filePath) throws IOException {
Path path=new Path(filePath);
FileSystem fs = FileSystem.get(new Configuration());
BufferedReader br=new BufferedReader(new InputStreamReader(fs.open(path), StandardCharsets.UTF_8));
try {
return String.join("", br.lines().collect(Collectors.toList()));
} finally {
br.close();
}
}
public static String readResource(String path, Class<? extends AbstractSparkJob> clazz) throws IOException {
return IOUtils.toString(clazz.getResourceAsStream(path));
}
}

View File

@ -0,0 +1,207 @@
package eu.dnetlib.jobs;
import eu.dnetlib.Deduper;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.FieldValueImpl;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.MapDocumentUtil;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ArgumentApplicationParser;
import eu.dnetlib.support.Block;
import eu.dnetlib.support.ConnectedComponent;
import eu.dnetlib.support.Relation;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.codehaus.jackson.map.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
public class SparkComputeStatistics extends AbstractSparkJob {
private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.jobs.SparkComputeStatistics.class);
public SparkComputeStatistics(ArgumentApplicationParser parser, SparkSession spark) {
super(parser, spark);
}
public static void main(String[] args) throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(
readResource("/jobs/parameters/computeStatistics_parameters.json", eu.dnetlib.jobs.SparkCreateSimRels.class)
);
parser.parseArgument(args);
SparkConf conf = new SparkConf();
new eu.dnetlib.jobs.SparkComputeStatistics(
parser,
getSparkSession(conf)
).run();
}
@Override
public void run() throws IOException {
//https://towardsdatascience.com/7-evaluation-metrics-for-clustering-algorithms-bdc537ff54d2#:~:text=There%20are%20two%20types%20of,to%20all%20unsupervised%20learning%20results)
// read oozie parameters
final String entitiesPath = parser.get("entitiesPath");
final String workingPath = parser.get("workingPath");
final String dedupConfPath = parser.get("dedupConfPath");
final String groundTruthFieldJPath = parser.get("groundTruthFieldJPath");
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("entitiesPath: '{}'", entitiesPath);
log.info("workingPath: '{}'", workingPath);
log.info("numPartitions: '{}'", numPartitions);
log.info("dedupConfPath: '{}'", dedupConfPath);
log.info("groundTruthFieldJPath: '{}'", groundTruthFieldJPath);
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
DedupConfig dedupConfig = loadDedupConfig(dedupConfPath);
JavaPairRDD<String, MapDocument> mapDocuments = sc
.textFile(entitiesPath)
.repartition(numPartitions)
.mapToPair(
(PairFunction<String, String, MapDocument>) s -> {
MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, s);
//put in the map the groundTruthField used to compute statistics
d.getFieldMap().put("groundTruth", new FieldValueImpl(Type.String, "groundTruth", MapDocumentUtil.getJPathString(groundTruthFieldJPath, s)));
return new Tuple2<>(d.getIdentifier(), d);
});
JavaRDD<String> entities = mapDocuments.map(d -> d._2().getFieldMap().get("groundTruth").stringValue());
// create blocks
JavaRDD<List<String>> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConfig)
.map(b -> b._2().getDocuments().stream().map(d -> d.getFieldMap().get("groundTruth").stringValue()).collect(Collectors.toList()));
// <source, target>: source is the dedup_id, target is the id of the mergedIn
JavaRDD<Relation> mergerels = spark
.read()
.load(workingPath + "/mergerels")
.as(Encoders.bean(Relation.class))
.toJavaRDD();
JavaRDD<Relation> simrels = spark
.read()
.load(workingPath + "/simrels")
.as(Encoders.bean(Relation.class))
.toJavaRDD();
JavaRDD<List<String>> groups = sc.textFile(workingPath + "/groupentities")
.map(e -> new ObjectMapper().readValue(e, ConnectedComponent.class))
.map(e -> e.getDocs().stream().map(d -> MapDocumentUtil.getJPathString(groundTruthFieldJPath, d)).collect(Collectors.toList()));
long entities_number = entities.count();
long blocks_number = blocks.count();
double blocks_randIndex = randIndex(blocks);
long simrels_number = simrels.count();
long mergerels_number = mergerels.count();
double groups_randIndex = randIndex(groups);
long groups_number = groups.count();
long groundtruth_number = entities.filter(e -> !e.isEmpty()).count();
long correct_groups = groups.filter(x -> x.stream().distinct().count()==1).count();
long wrong_groups = groups_number - correct_groups;
String print =
"Entities : " + entities_number + "\n" +
"Ground Truth : " + groundtruth_number + "\n" +
"Blocks : " + blocks_number + "\n" +
"Blocks RI : " + blocks_randIndex + "\n" +
"SimRels : " + simrels_number + "\n" +
"MergeRels : " + mergerels_number + "\n" +
"Groups : " + groups_number + " (correct: " + correct_groups + ", wrong: " + wrong_groups + ")\n" +
"Groups RI : " + groups_randIndex;
System.out.println(print);
writeStatsFileToHDFS(groundtruth_number, entities_number, blocks_randIndex, groups_randIndex, blocks_number, simrels_number, mergerels_number, groups_number, workingPath + "/stats_file.txt");
}
public static void writeStatsFileToHDFS(long groundtruth_number, long entities_number, double blocks_randIndex, double groups_randIndex, long blocks_number, long simrels_number, long mergerels_number, long groups_number, String filePath) throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(filePath), true);
try {
fs = FileSystem.get(conf);
Path outFile = new Path(filePath);
// Verification
if (fs.exists(outFile)) {
System.out.println("Output file already exists");
throw new IOException("Output file already exists");
}
String print =
"Entities : " + entities_number + "\n" +
"Ground Truth : " + groundtruth_number + "\n" +
"Blocks : " + blocks_number + "\n" +
"Blocks RI : " + blocks_randIndex + "\n" +
"SimRels : " + simrels_number + "\n" +
"MergeRels : " + mergerels_number + "\n" +
"Groups : " + groups_number + "\n" +
"Groups RI : " + groups_randIndex;
// Create file to write
FSDataOutputStream out = fs.create(outFile);
try{
out.writeBytes(print);
}
finally {
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
//TODO find another maesure that takes into account all the elements outside of the group too
//RandIndex = number of pairwise correct predictions/total number of possible pairs (in the same cluster) -> bounded between 0 and 1
public double randIndex(JavaRDD<List<String>> clusters) {
Tuple2<Integer, Integer> reduce = clusters.map(c -> {
int num = 0;
for (String id : c.stream().distinct().filter(s -> !s.isEmpty()).collect(Collectors.toList())) {
int n = (int) c.stream().filter(i -> i.equals(id)).count();
num += binomialCoefficient(n);
}
int den = binomialCoefficient(c.size());
return new Tuple2<>(num, den);
})
.reduce((a, b) -> new Tuple2<>(a._1() + b._1(), a._2() + b._2()));
return (double)reduce._1()/ reduce._2();
}
private static int binomialCoefficient(int n)
{
return n*(n-1)/2;
}
//V-measure = harmonic mean of homogeneity and completeness, homogeneity = each cluster contains only members of a single class, completeness = all members of a given class are assigned to the same cluster
}

View File

@ -0,0 +1,116 @@
package eu.dnetlib.jobs;
import eu.dnetlib.Deduper;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.util.MapDocumentUtil;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ArgumentApplicationParser;
import eu.dnetlib.support.ConnectedComponent;
import eu.dnetlib.support.Relation;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.Tuple3;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
public class SparkCreateGroupEntity extends AbstractSparkJob {
private static final Logger log = LoggerFactory.getLogger(SparkCreateGroupEntity.class);
public SparkCreateGroupEntity(ArgumentApplicationParser parser, SparkSession spark) {
super(parser, spark);
}
public static void main(String[] args) throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(
Utility.readResource("/jobs/parameters/createGroupEntity_parameters.json", SparkCreateGroupEntity.class)
);
parser.parseArgument(args);
SparkConf conf = new SparkConf();
new SparkCreateGroupEntity(
parser,
getSparkSession(conf)
).run();
}
@Override
public void run() throws IOException {
// read oozie parameters
final String entitiesPath = parser.get("entitiesPath");
final String workingPath = parser.get("workingPath");
final String dedupConfPath = parser.get("dedupConfPath");
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("entitiesPath: '{}'", entitiesPath);
log.info("workingPath: '{}'", workingPath);
log.info("dedupConfPath: '{}'", dedupConfPath);
log.info("numPartitions: '{}'", numPartitions);
DedupConfig dedupConf = DedupConfig.load(readFileFromHDFS(dedupConfPath));
// <raw_id, json>
JavaPairRDD<String, String> entities = spark
.read()
.textFile(entitiesPath)
.map((MapFunction<String, Tuple2<String, String>>) it ->
new Tuple2<>(MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), it), it),
Encoders.tuple(Encoders.STRING(), Encoders.STRING()))
.toJavaRDD()
.mapToPair(t -> t);
// <source_raw_id, relation(source, target)>
JavaPairRDD<String, Relation> simRels = spark
.read()
.load(workingPath + "/simrels")
.as(Encoders.bean(Relation.class))
.toJavaRDD()
.mapToPair(r-> new Tuple2<>(r.getSource(), r));
// <raw_id, relation(dedup_id, raw_id)>
JavaPairRDD<String, Relation> mergeRels = spark
.read()
.load(workingPath + "/mergerels")
.as(Encoders.bean(Relation.class))
.toJavaRDD()
.mapToPair(r -> new Tuple2<>(r.getTarget(), r));
// <dedup_id, simrel>
JavaPairRDD<String, Iterable<Relation>> simRelsWithDedupId = simRels
.join(mergeRels)
.mapToPair(x -> new Tuple2<>(x._2()._2().getSource(), x._2()._1()))
.groupByKey();
JavaRDD<ConnectedComponent> groupEntity = mergeRels.join(entities)
.mapToPair(t -> new Tuple2<>(t._2()._1().getSource(), t._2()._2()))
.groupByKey()
.join(simRelsWithDedupId)
.map(x -> new ConnectedComponent(
x._1(),
x._2()._1(),
x._2()._2())
);
groupEntity.saveAsTextFile(workingPath + "/groupentities", GzipCodec.class);
}
}

View File

@ -0,0 +1,105 @@
package eu.dnetlib.jobs;
import eu.dnetlib.Deduper;
import eu.dnetlib.graph.JavaGraphProcessor;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.util.MapDocumentUtil;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ArgumentApplicationParser;
import eu.dnetlib.support.ConnectedComponent;
import eu.dnetlib.support.Relation;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.graphx.Edge;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import static eu.dnetlib.Deduper.hash;
public class SparkCreateMergeRels extends AbstractSparkJob {
private static final Logger log = LoggerFactory.getLogger(SparkCreateMergeRels.class);
public SparkCreateMergeRels(ArgumentApplicationParser parser, SparkSession spark) {
super(parser, spark);
}
public static void main(String[] args) throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(
Utility.readResource("/jobs/parameters/createMergeRels_parameters.json", SparkCreateMergeRels.class)
);
parser.parseArgument(args);
SparkConf conf = new SparkConf();
new SparkCreateMergeRels(
parser,
getSparkSession(conf)
).run();
}
@Override
public void run() throws IOException {
// read oozie parameters
final String entitiesPath = parser.get("entitiesPath");
final String workingPath = parser.get("workingPath");
final String dedupConfPath = parser.get("dedupConfPath");
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
log.info("entitiesPath: '{}'", entitiesPath);
log.info("workingPath: '{}'", workingPath);
log.info("dedupConfPath: '{}'", dedupConfPath);
log.info("numPartitions: '{}'", numPartitions);
DedupConfig dedupConf = DedupConfig.load(readFileFromHDFS(dedupConfPath));
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
final JavaPairRDD<Object, String> vertexes = sc
.textFile(entitiesPath)
.map(s -> MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), s))
.mapToPair((PairFunction<String, Object, String>) s -> new Tuple2<>(hash(s), s));
final JavaRDD<Edge<String>> edgeRdd = spark
.read()
.load(workingPath + "/simrels")
.as(Encoders.bean(Relation.class))
.javaRDD()
.map(Relation::toEdgeRdd);
JavaPairRDD<String, List<String>> ccs = JavaGraphProcessor
.findCCs(vertexes, edgeRdd, dedupConf.getWf().getMaxIterations());
JavaRDD<Relation> mergeRel = ccs
.filter(cc -> cc._2().size() > 1)
.flatMap(cc -> Deduper.ccToMergeRel(cc, dedupConf))
.map(it -> new Relation(it._1(), it._2(), "mergeRel"));
final Dataset<Relation> mergeRels = spark
.createDataset(
mergeRel.rdd(),
Encoders.bean(Relation.class));
mergeRels.write().mode(SaveMode.Overwrite).parquet(workingPath + "/mergerels");
}
}

View File

@ -0,0 +1,96 @@
package eu.dnetlib.jobs;
import eu.dnetlib.Deduper;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.MapDocumentUtil;
import eu.dnetlib.pace.utils.Utility;
import eu.dnetlib.support.ArgumentApplicationParser;
import eu.dnetlib.support.Block;
import eu.dnetlib.support.Relation;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import java.io.IOException;
import java.util.Optional;
public class SparkCreateSimRels extends AbstractSparkJob {
private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class);
public SparkCreateSimRels(ArgumentApplicationParser parser, SparkSession spark) {
super(parser, spark);
}
public static void main(String[] args) throws Exception {
ArgumentApplicationParser parser = new ArgumentApplicationParser(
readResource("/jobs/parameters/createSimRels_parameters.json", SparkCreateSimRels.class)
);
parser.parseArgument(args);
SparkConf conf = new SparkConf();
new SparkCreateSimRels(
parser,
getSparkSession(conf)
).run();
}
@Override
public void run() throws IOException {
// read oozie parameters
final String entitiesPath = parser.get("entitiesPath");
final String workingPath = parser.get("workingPath");
final String dedupConfPath = parser.get("dedupConfPath");
final int numPartitions = Optional
.ofNullable(parser.get("numPartitions"))
.map(Integer::valueOf)
.orElse(NUM_PARTITIONS);
final boolean useTree = Boolean.parseBoolean(parser.get("useTree"));
log.info("entitiesPath: '{}'", entitiesPath);
log.info("workingPath: '{}'", workingPath);
log.info("dedupConfPath: '{}'", dedupConfPath);
log.info("numPartitions: '{}'", numPartitions);
log.info("useTree: '{}'", useTree);
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
DedupConfig dedupConfig = loadDedupConfig(dedupConfPath);
JavaPairRDD<String, MapDocument> mapDocuments = sc
.textFile(entitiesPath)
.repartition(numPartitions)
.mapToPair(
(PairFunction<String, String, MapDocument>) s -> {
MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, s);
return new Tuple2<>(d.getIdentifier(), d);
});
// create blocks for deduplication
JavaPairRDD<String, Block> blocks = Deduper.createSortedBlocks(mapDocuments, dedupConfig);
// create relations by comparing only elements in the same group
JavaRDD<Relation> relations = Deduper.computeRelations(sc, blocks, dedupConfig, useTree, false);
// save the simrel in the workingdir
spark
.createDataset(relations.rdd(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Overwrite)
.save(workingPath + "/simrels");
}
}

View File

@ -2,9 +2,11 @@ package eu.dnetlib.pace.utils;
import com.google.common.collect.Sets;
import com.google.common.hash.Hashing;
import eu.dnetlib.jobs.*;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.MapDocumentComparator;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
@ -20,9 +22,7 @@ import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.*;
public class Utility {
@ -97,4 +97,9 @@ public class Utility {
}
}
public static String readResource(String path, Class<? extends AbstractSparkJob> clazz) throws IOException {
return IOUtils.toString(
clazz.getResourceAsStream(path));
}
}

View File

@ -0,0 +1,95 @@
package eu.dnetlib.support;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.cli.*;
import org.apache.commons.io.IOUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Serializable;
import java.io.StringWriter;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class ArgumentApplicationParser implements Serializable {
private final Options options = new Options();
private final Map<String, String> objectMap = new HashMap<>();
private final List<String> compressedValues = new ArrayList<>();
public ArgumentApplicationParser(final String json_configuration) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
createOptionMap(configuration);
}
public ArgumentApplicationParser(final OptionsParameter[] configuration) {
createOptionMap(configuration);
}
private void createOptionMap(final OptionsParameter[] configuration) {
Arrays
.stream(configuration)
.map(
conf -> {
final Option o = new Option(conf.getParamName(), true, conf.getParamDescription());
o.setLongOpt(conf.getParamLongName());
o.setRequired(conf.isParamRequired());
if (conf.isCompressed()) {
compressedValues.add(conf.getParamLongName());
}
return o;
})
.forEach(options::addOption);
// HelpFormatter formatter = new HelpFormatter();
// formatter.printHelp("myapp", null, options, null, true);
}
public static String decompressValue(final String abstractCompressed) {
try {
byte[] byteArray = org.apache.commons.codec.binary.Base64.decodeBase64(abstractCompressed.getBytes());
GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(byteArray));
final StringWriter stringWriter = new StringWriter();
IOUtils.copy(gis, stringWriter);
return stringWriter.toString();
} catch (Throwable e) {
System.out.println("Wrong value to decompress:" + abstractCompressed);
throw new RuntimeException(e);
}
}
public static String compressArgument(final String value) throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes());
gzip.close();
return java.util.Base64.getEncoder().encodeToString(out.toByteArray());
}
public void parseArgument(final String[] args) throws Exception {
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(options, args);
Arrays
.stream(cmd.getOptions())
.forEach(
it -> objectMap
.put(
it.getLongOpt(),
compressedValues.contains(it.getLongOpt())
? decompressValue(it.getValue())
: it.getValue()));
}
public String get(final String key) {
return objectMap.get(key);
}
public Map<String, String> getObjectMap() {
return objectMap;
}
}

View File

@ -1,10 +1,7 @@
package eu.dnetlib.support;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
@ -12,6 +9,7 @@ import java.util.stream.StreamSupport;
import com.google.common.collect.Lists;
import eu.dnetlib.pace.model.MapDocument;
import org.codehaus.jackson.annotate.JsonIgnore;
public class Block implements Serializable {
@ -23,6 +21,11 @@ public class Block implements Serializable {
super();
}
public Block(String key, List<MapDocument> documents) {
this.key = key;
this.documents = documents;
}
public Block(String key, Iterable<MapDocument> documents) {
this.key = key;
this.documents = Lists.newArrayList(documents);
@ -89,6 +92,13 @@ public class Block implements Serializable {
return documents.size();
}
@Override
public String toString() {
return "Block{" +
"key='" + key + '\'' +
", size=" + documents.size() + '\'' +
", names=" + documents.stream().map(d -> d.getFieldMap().get("country").stringValue()).collect(Collectors.toList()) + '\'' +
'}';
}
}

View File

@ -2,56 +2,38 @@ package eu.dnetlib.support;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashSet;
import java.util.Set;
import eu.dnetlib.pace.utils.Utility;
import org.apache.commons.lang.StringUtils;
import org.codehaus.jackson.annotate.JsonIgnore;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Sets;
import eu.dnetlib.pace.util.PaceException;
import org.codehaus.jackson.map.ObjectMapper;
public class ConnectedComponent implements Serializable {
private Set<String> docs;
private HashSet<String> docs;
private String ccId;
private HashSet<Relation> simrels;
public ConnectedComponent() {
}
public ConnectedComponent(String ccId, Set<String> docs, Set<Relation> simrels) {
this.docs = new HashSet<>(docs);
this.ccId = ccId;
this.simrels = new HashSet<>(simrels);
}
public ConnectedComponent(Set<String> docs) {
this.docs = docs;
createID();
this.docs = new HashSet<>(docs);
//initialization of id and relations missing
}
public String createID() {
if (docs.size() > 1) {
final String s = getMin();
ccId = "dedup::" + Utility.md5(s);
return ccId;
} else {
return docs.iterator().next();
}
}
@JsonIgnore
public String getMin() {
final StringBuilder min = new StringBuilder();
docs
.forEach(
i -> {
if (StringUtils.isBlank(min.toString())) {
min.append(i);
} else {
if (min.toString().compareTo(i) > 0) {
min.setLength(0);
min.append(i);
}
}
});
return min.toString();
public ConnectedComponent(String ccId, Iterable<String> docs, Iterable<Relation> simrels) {
this.ccId = ccId;
this.docs = Sets.newHashSet(docs);
this.simrels = Sets.newHashSet(simrels);
}
@Override
@ -68,7 +50,7 @@ public class ConnectedComponent implements Serializable {
return docs;
}
public void setDocs(Set<String> docs) {
public void setDocs(HashSet<String> docs) {
this.docs = docs;
}
@ -79,4 +61,12 @@ public class ConnectedComponent implements Serializable {
public void setCcId(String ccId) {
this.ccId = ccId;
}
public void setSimrels(HashSet<Relation> simrels) {
this.simrels = simrels;
}
public HashSet<Relation> getSimrels() {
return simrels;
}
}

View File

@ -0,0 +1,37 @@
package eu.dnetlib.support;
public class OptionsParameter {
private String paramName;
private String paramLongName;
private String paramDescription;
private boolean paramRequired;
private boolean compressed;
public OptionsParameter() {
}
public String getParamName() {
return paramName;
}
public String getParamLongName() {
return paramLongName;
}
public String getParamDescription() {
return paramDescription;
}
public boolean isParamRequired() {
return paramRequired;
}
public boolean isCompressed() {
return compressed;
}
public void setCompressed(boolean compressed) {
this.compressed = compressed;
}
}

View File

@ -47,4 +47,13 @@ public class Relation implements Serializable {
public Edge<String> toEdgeRdd(){
return new Edge<>(Deduper.hash(source), Deduper.hash(target), type);
}
@Override
public String toString() {
return "Relation{" +
"source='" + source + '\'' +
", target='" + target + '\'' +
", type='" + type + '\'' +
'}';
}
}

View File

@ -0,0 +1,18 @@
<configuration>
<property>
<name>jobTracker</name>
<value>yarnRM</value>
</property>
<property>
<name>nameNode</name>
<value>hdfs://nameservice1</value>
</property>
<property>
<name>oozie.use.system.libpath</name>
<value>true</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
</configuration>

View File

@ -0,0 +1,204 @@
<workflow-app name="Deduplication WF" xmlns="uri:oozie:workflow:0.5">
<parameters>
<property>
<name>entitiesPath</name>
<description>the input entity path</description>
</property>
<property>
<name>workingPath</name>
<description>path for the working directory</description>
</property>
<property>
<name>numPartitions</name>
<description>number of partitions for the spark files</description>
</property>
<property>
<name>dedupConfPath</name>
<description>path for the dedup configuration file</description>
</property>
<property>
<name>groundTruthFieldJPath</name>
<description>jpath of the field to be used as ground truth</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
</property>
<property>
<name>sparkExecutorMemory</name>
<description>memory for individual executor</description>
</property>
<property>
<name>sparkExecutorCores</name>
<description>number of cores used by single executor</description>
</property>
<property>
<name>oozieActionShareLibForSpark2</name>
<description>oozie action sharelib for spark 2.*</description>
</property>
<property>
<name>spark2ExtraListeners</name>
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
<description>spark 2.* extra listeners classname</description>
</property>
<property>
<name>spark2SqlQueryExecutionListeners</name>
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
<description>spark 2.* sql query execution listeners classname</description>
</property>
<property>
<name>spark2YarnHistoryServerAddress</name>
<description>spark 2.* yarn history server address</description>
</property>
<property>
<name>spark2EventLogDir</name>
<description>spark 2.* event log dir location</description>
</property>
</parameters>
<global>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapreduce.job.queuename</name>
<value>${queueName}</value>
</property>
<property>
<name>oozie.launcher.mapred.job.queue.name</name>
<value>${oozieLauncherQueueName}</value>
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>${oozieActionShareLibForSpark2}</value>
</property>
</configuration>
</global>
<start to="resetWorkingPath"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="resetWorkingPath">
<fs>
<delete path="${workingPath}"/>
</fs>
<ok to="CreateSimRels"/>
<error to="Kill"/>
</action>
<action name="CreateSimRels">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Create Similarity Relations</name>
<class>eu.dnetlib.jobs.SparkCreateSimRels</class>
<jar>dnet-dedup-test-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=32
--executor-memory=12G
--executor-cores=4
--driver-memory=4G
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
--conf spark.dynamicAllocation.enabled=false
</spark-opts>
<arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--numPartitions</arg><arg>${numPartitions}</arg>
<arg>--dedupConfPath</arg><arg>${dedupConfPath}</arg>
<arg>--useTree</arg><arg>${useTree}</arg>
</spark>
<ok to="CreateMergeRels"/>
<error to="Kill"/>
</action>
<action name="CreateMergeRels">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Create Merge Relations</name>
<class>eu.dnetlib.jobs.SparkCreateMergeRels</class>
<jar>dnet-dedup-test-${projectVersion}.jar</jar>
<spark-opts>
--num-executors=32
--executor-memory=12G
--executor-cores=4
--driver-memory=4G
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
--conf spark.dynamicAllocation.enabled=true
</spark-opts>
<arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--numPartitions</arg><arg>${numPartitions}</arg>
<arg>--dedupConfPath</arg><arg>${dedupConfPath}</arg>
</spark>
<ok to="CreateGroupEntities"/>
<error to="Kill"/>
</action>
<action name="CreateGroupEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Create Group Entities</name>
<class>eu.dnetlib.jobs.SparkCreateGroupEntity</class>
<jar>dnet-dedup-test-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--numPartitions</arg><arg>${numPartitions}</arg>
<arg>--dedupConfPath</arg><arg>${dedupConfPath}</arg>
</spark>
<ok to="ComputeStatistics"/>
<error to="Kill"/>
</action>
<action name="ComputeStatistics">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>Compute Statistics</name>
<class>eu.dnetlib.jobs.SparkComputeStatistics</class>
<jar>dnet-dedup-test-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory=${sparkExecutorMemory}
--executor-cores=${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=3840
</spark-opts>
<arg>--entitiesPath</arg><arg>${entitiesPath}</arg>
<arg>--workingPath</arg><arg>${workingPath}</arg>
<arg>--numPartitions</arg><arg>${numPartitions}</arg>
<arg>--dedupConfPath</arg><arg>${dedupConfPath}</arg>
<arg>--groundTruthFieldJPath</arg><arg>${groundTruthFieldJPath}</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>

View File

@ -0,0 +1,32 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "dedup configuration to be used",
"paramRequired": true
},
{
"paramName": "gt",
"paramLongName": "groundTruthFieldJPath",
"paramDescription": "field to be used as groundtruth",
"paramRequired": true
}
]

View File

@ -0,0 +1,26 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "path of the dedup configuration",
"paramRequired": false
}
]

View File

@ -0,0 +1,26 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "path of the dedup configuration",
"paramRequired": false
}
]

View File

@ -0,0 +1,32 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "path of the dedup configuration",
"paramRequired": true
},
{
"paramName": "ut",
"paramLongName": "useTree",
"paramDescription": "chose the tree configuration or not",
"paramRequired": true
}
]

File diff suppressed because one or more lines are too long

View File

@ -1,37 +1,57 @@
package eu.dnetlib.pace;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.FieldListImpl;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.support.ConnectedComponent;
import org.apache.commons.io.IOUtils;
import org.apache.spark.api.java.JavaRDD;
import org.codehaus.jackson.map.ObjectMapper;
import scala.Tuple2;
import java.io.*;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public abstract class DedupTestUtils {
public static String prepareTable(MapDocument doc) {
// public static void printStatistics(JavaRDD<ConnectedComponent> ccs){
// final JavaRDD<ConnectedComponent> connectedComponents = ccs.filter(cc -> cc.getDocs().size()>1);
// final JavaRDD<ConnectedComponent> nonDeduplicated = ccs.filter(cc -> cc.getDocs().size()==1);
//
// //print deduped
// connectedComponents.map(cc -> {
// StringBuilder sb = new StringBuilder();
// for (MapDocument m : cc.getDocs()){
// sb.append(m.getFieldMap().get("originalId").stringValue() + " - "+ m.getFieldMap().get("legalname").stringValue() + "\n");
// }
// return sb.toString();
// }).foreach(s -> System.out.println("*******\n" + s + "*******\n"));
//
// //print nondeduped
// nonDeduplicated.foreach(cc -> {
// System.out.println(cc.getId() + " - " + cc.getFieldMap().get("legalname").stringValue());
// });
//
// System.out.println("Non duplicates: " + nonDeduplicated.count());
// System.out.println("Duplicates: " + connectedComponents.flatMap(cc -> cc.getDocs().iterator()).count());
// System.out.println("Connected Components: " + connectedComponents.count());
//
// }
String ret = "<table>";
for(String fieldName: doc.getFieldMap().keySet()) {
if (doc.getFieldMap().get(fieldName).getType().equals(Type.String)) {
ret += "<tr><th>" + fieldName + "</th><td>" + doc.getFieldMap().get(fieldName).stringValue() + "</td></tr>";
}
else if (doc.getFieldMap().get(fieldName).getType().equals(Type.List)) {
ret += "<tr><th>" + fieldName + "</th><td>[" + ((FieldListImpl)doc.getFieldMap().get(fieldName)).stringList().stream().collect(Collectors.joining(";")) + "]</td></tr>";
}
}
return ret + "</table>";
}
public static void prepareGraphParams(List<String> vertexes, List<Tuple2<String, String>> edgesTuple, String filePath, String templateFilePath, Map<String, MapDocument> mapDocuments) {
List<Node> nodes = vertexes.stream().map(v -> new Node(v.substring(3, 20).replaceAll("_", ""), vertexes.indexOf(v), prepareTable(mapDocuments.get(v)))).collect(Collectors.toList());
List<Edge> edges = edgesTuple.stream().map(e -> new Edge(vertexes.indexOf(e._1()), vertexes.indexOf(e._2()))).collect(Collectors.toList());
try(FileWriter fw = new FileWriter(filePath)) {
String fullText = IOUtils.toString(new FileReader(templateFilePath));
String s = fullText
.replaceAll("%nodes%", new ObjectMapper().writeValueAsString(nodes))
.replaceAll("%edges%", new ObjectMapper().writeValueAsString(edges));
IOUtils.write(s, fw);
} catch (IOException e) {
e.printStackTrace();
}
}
public static String getOrganizationLegalname(MapDocument mapDocument){
return mapDocument.getFieldMap().get("legalname").stringValue();
@ -47,3 +67,65 @@ public abstract class DedupTestUtils {
}
}
class Node{
String label;
int id;
String title;
public Node(String label, int id, String title) {
this.label = label;
this.id = id;
this.title = title;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
}
class Edge{
int from;
int to;
public Edge(int from, int to) {
this.from = from;
this.to = to;
}
public int getFrom() {
return from;
}
public void setFrom(int from) {
this.from = from;
}
public int getTo() {
return to;
}
public void setTo(int to) {
this.to = to;
}
}

View File

@ -1,39 +0,0 @@
package eu.dnetlib.pace.clustering;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.MapDocument;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Before;
import org.junit.Test;
public class ClusteringCombinerTest {
// TODO RE IMPLEMENT Tests with the new configuration
// private static final Log log = LogFactory.getLog(ClusteringCombinerTest.class);
//
// private Config config;
//
// @Before
// public void setUp() {
// config = getOrganizationTestConf();
// }
//
// @Test
// public void testCombine() {
//
// final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
// log.info("University of Turin");
// log.info(ClusteringCombiner.combine(organization, config));
// }
//
// @Test
// public void testCombineBlacklistAware() {
//
// final MapDocument organization = organization(config, "A", "University of Turin", "UNITO");
// log.info("University of Turin");
// log.info(BlacklistAwareClusteringCombiner.filterAndCombine(organization, config));
// }
}

View File

@ -0,0 +1,134 @@
{
"wf": {
"threshold": "0.99",
"dedupRun": "001",
"entityType": "author",
"subEntityType": "author",
"subEntityValue": "author",
"orderField": "fullname",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "50",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
"resultResult_publicationDataset_isRelatedTo",
"resultResult_similarity_isAmongTopNSimilarDocuments",
"resultResult_similarity_hasAmongTopNSimilarDocuments",
"resultOrganization_affiliation_isAffiliatedWith",
"resultResult_part_hasPart",
"resultResult_part_isPartOf",
"resultResult_supplement_isSupplementTo",
"resultResult_supplement_isSupplementedBy",
"resultResult_version_isVersionOf"
],
"includeChildren": "true",
"maxIterations": 20,
"idPath": "$.id"
},
"pace": {
"clustering" : [
{ "name" : "lnfi", "fields" : [ "fullname" ], "params" : {} }
],
"decisionTree": {
"start": {
"fields": [
{
"field": "orcid",
"comparator": "exactMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "orcids",
"ignoreUndefined": "true"
},
"orcids": {
"fields": [
{
"field": "orcids",
"comparator": "stringListMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {"type": "count"}
}
],
"threshold": 3.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "coauthors",
"undefined": "coauthors",
"ignoreUndefined": "true"
},
"coauthors": {
"fields": [
{
"field": "coauthors",
"comparator": "authorsMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {"type": "count"}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "topicsMatch",
"negative": "NO_MATCH",
"undefined": "topicsMatch",
"ignoreUndefined": "true"
},
"topicsMatch": {
"fields": [
{
"field": "topics",
"comparator": "cosineSimilarity",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.5,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "false"
}
},
"model": [
{
"name": "topics",
"type": "DoubleArray",
"path": "$.topics"
},
{
"name": "fullname",
"type": "String",
"path": "$.fullname"
},
{
"name": "orcid",
"type": "String",
"path": "$.orcid"
},
{
"name": "coauthors",
"type": "List",
"path": "$.coAuthors[*].fullname"
},
{
"name": "orcids",
"type": "List",
"path": "$.coAuthors[*].orcid"
}
],
"blacklists": {},
"synonyms": {}
}
}

View File

@ -0,0 +1,134 @@
{
"wf": {
"threshold": "0.99",
"dedupRun": "001",
"entityType": "author",
"subEntityType": "author",
"subEntityValue": "author",
"orderField": "fullname",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "50",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
"resultResult_publicationDataset_isRelatedTo",
"resultResult_similarity_isAmongTopNSimilarDocuments",
"resultResult_similarity_hasAmongTopNSimilarDocuments",
"resultOrganization_affiliation_isAffiliatedWith",
"resultResult_part_hasPart",
"resultResult_part_isPartOf",
"resultResult_supplement_isSupplementTo",
"resultResult_supplement_isSupplementedBy",
"resultResult_version_isVersionOf"
],
"includeChildren": "true",
"maxIterations": 20,
"idPath": "$.id"
},
"pace": {
"clustering" : [
{ "name" : "lnfi", "fields" : [ "fullname" ], "params" : {} }
],
"decisionTree": {
"start": {
"fields": [
{
"field": "orcid",
"comparator": "exactMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "orcids",
"ignoreUndefined": "true"
},
"orcids": {
"fields": [
{
"field": "orcids",
"comparator": "stringListMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {"type": "count"}
}
],
"threshold": 3.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "coauthors",
"undefined": "coauthors",
"ignoreUndefined": "true"
},
"coauthors": {
"fields": [
{
"field": "coauthors",
"comparator": "authorsMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {"type": "count"}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "topicsMatch",
"negative": "NO_MATCH",
"undefined": "topicsMatch",
"ignoreUndefined": "true"
},
"topicsMatch": {
"fields": [
{
"field": "topics",
"comparator": "cosineSimilarity",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "false"
}
},
"model": [
{
"name": "topics",
"type": "DoubleArray",
"path": "$.topics"
},
{
"name": "fullname",
"type": "String",
"path": "$.fullname"
},
{
"name": "orcid",
"type": "String",
"path": "$.orcid"
},
{
"name": "coauthors",
"type": "List",
"path": "$.coAuthors[*].fullname"
},
{
"name": "orcids",
"type": "List",
"path": "$.coAuthors[*].orcid"
}
],
"blacklists": {},
"synonyms": {}
}
}

View File

@ -0,0 +1,87 @@
{
"wf" : {
"threshold" : "0.99",
"dedupRun" : "001",
"entityType" : "datasource",
"orderField" : "englishname",
"queueMaxSize" : "2000",
"groupMaxSize" : "50",
"slidingWindowSize" : "200",
"idPath":"$.id",
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
"includeChildren" : "true",
"maxIterations": "20"
},
"pace" : {
"clustering" : [
{ "name" : "sortedngrampairs", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 2, "ngramLen" : "3", "collapseOn:name": "0"} },
{ "name" : "suffixprefix", "fields" : [ "officialname", "englishname" ], "params" : { "max" : 1, "len" : "3", "collapseOn:name": "0" } },
{"name" : "ngrams", "fields" : ["officialname", "englishname"], "params" : {"ngramLen": 4, "max" : 2, "maxPerToken": 2, "minNgramLen": 1, "collapseOn:name": "0"}},
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
],
"decisionTree" : {
"start": {
"fields": [
{
"field": "websiteurl",
"comparator": "domainExactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1,
"aggregation": "AVG",
"positive": "layer2",
"negative": "NO_MATCH",
"undefined": "layer2",
"ignoreUndefined": "true"
},
"layer2": {
"fields": [
{
"field": "officialname",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {
"threshold": 0.9
}
},
{
"field": "englishname",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {
"threshold": 0.9
}
},
{
"field": "officialname",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {
"crossCompare": "englishname",
"threshold": 0.9
}
}
],
"threshold": 0.9,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
}
},
"model" : [
{ "name" : "englishname", "type" : "String", "path" : "$.englishname" },
{ "name" : "officialname", "type" : "String", "path" : "$.officialname" },
{ "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl" }
],
"blacklists" : {},
"synonyms": {}
}
}

View File

@ -3,8 +3,9 @@
"threshold" : "0.99",
"dedupRun" : "001",
"entityType" : "organization",
"subEntityValue": "organization",
"orderField" : "legalname",
"queueMaxSize" : "2000",
"queueMaxSize" : "100000",
"groupMaxSize" : "50",
"slidingWindowSize" : "200",
"idPath":"$.id",
@ -143,10 +144,10 @@
}
},
"model" : [
{ "name" : "country", "type" : "String", "path" : "$.organization.metadata.country.classid"},
{ "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"},
{ "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" },
{ "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" },
{ "name" : "country", "type" : "String", "path" : "$.country.classid"},
{ "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value"},
{ "name" : "legalname", "type" : "String", "path" : "$.legalname.value" },
{ "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" },
{ "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid')].value"},
{ "name" : "originalId", "type" : "String", "path" : "$.id" }
],
@ -154,7 +155,7 @@
"legalname" : []
},
"synonyms": {
"key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti"],
"key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti", "Πανεπιστήμιο", "panepistemio"],
"key::2": ["studies","studi","études","estudios","estudos","Studien","studia","исследования","studies","σπουδές"],
"key::3": ["advanced","superiore","supérieur","supérieure","supérieurs","supérieures","avancado","avancados","fortgeschrittene","fortgeschritten","zaawansowany","передовой","gevorderd","gevorderde","προχωρημένος","προχωρημένη","προχωρημένο","προχωρημένες","προχωρημένα","wyzsza"],
"key::4": ["institute","istituto","institut","instituto","instituto","Institut","instytut","институт","instituut","ινστιτούτο"],
@ -163,7 +164,7 @@
"key::7": ["college","collegio","colegio","faculdade","Hochschule","Szkoła Wyższa","Высшая школа","κολλέγιο"],
"key::8": ["foundation","fondazione","fondation","fundación","fundação","Stiftung","Fundacja","фонд","stichting","ίδρυμα","idryma"],
"key::9": ["center","centro","centre","centro","centro","zentrum","centrum","центр","centrum","κέντρο"],
"key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό"],
"key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό", "eθνικό"],
"key::11": ["association","associazione","association","asociación","associação","Verein","verband","stowarzyszenie","ассоциация","associatie"],
"key::12": ["society","societa","société","sociedad","sociedade","gesellschaft","społeczeństwo","общество","maatschappij","κοινωνία"],
"key::13": ["international","internazionale","international","internacional","internacional","international","międzynarodowy","Международный","internationaal","internationale","διεθνής","διεθνή","διεθνές"],

View File

@ -71,6 +71,13 @@
"weight": 1,
"countIfUndefined": "false",
"params": {}
},
{
"field": "rorid",
"comparator": "exactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1,
@ -115,8 +122,8 @@
"aggregation": "AND",
"positive": "layer3",
"negative": "NO_MATCH",
"undefined": "layer3",
"ignoreUndefined": "true"
"undefined": "NO_MATCH",
"ignoreUndefined": "false"
},
"layer3": {
"fields": [
@ -184,11 +191,12 @@
}
},
"model" : [
{ "name" : "country", "type" : "String", "path" : "$.organization.metadata.country.classid"},
{ "name" : "legalshortname", "type" : "String", "path" : "$.organization.metadata.legalshortname.value"},
{ "name" : "legalname", "type" : "String", "path" : "$.organization.metadata.legalname.value" },
{ "name" : "websiteurl", "type" : "URL", "path" : "$.organization.metadata.websiteurl.value" },
{ "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid.ac')].value"},
{ "name" : "country", "type" : "String", "path" : "$.country.classid"},
{ "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value"},
{ "name" : "legalname", "type" : "String", "path" : "$.legalname.value" },
{ "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" },
{ "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='GRID')].value"},
{ "name" : "rorid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='ROR')].value"},
{ "name" : "originalId", "type" : "String", "path" : "$.id" }
],
"blacklists" : {

View File

@ -0,0 +1,268 @@
{
"wf" : {
"threshold" : "0.99",
"dedupRun" : "001",
"entityType" : "organization",
"orderField" : "legalname",
"queueMaxSize" : "2000",
"groupMaxSize" : "50",
"slidingWindowSize" : "200",
"idPath":"$.id",
"rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
"includeChildren" : "true",
"maxIterations": "20"
},
"pace" : {
"clustering" : [
{ "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
{ "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } },
{ "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } },
{ "name" : "keywordsclustering", "fields" : [ "legalname" ], "params" : { "max": 2, "windowSize": 4} }
],
"decisionTree" : {
"start": {
"fields": [
{
"field": "gridid",
"comparator": "exactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "layer2",
"ignoreUndefined": "false"
},
"layer2": {
"fields": [
{
"field": "websiteurl",
"comparator": "domainExactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {}
},
{
"field": "country",
"comparator": "exactMatch",
"weight": 1,
"countIfUndefined": "true",
"params": {}
},
{
"field": "legalname",
"comparator": "numbersMatch",
"weight": 1,
"countIfUndefined": "true",
"params": {}
},
{
"field": "legalname",
"comparator": "romansMatch",
"weight": 1,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 1,
"aggregation": "AND",
"positive": "layer3",
"negative": "NO_MATCH",
"undefined": "layer3",
"ignoreUndefined": "true"
},
"layer3": {
"fields": [
{
"field": "legalname",
"comparator": "cityMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {
"windowSize": "4"
}
}
],
"threshold": 0.1,
"aggregation": "AVG",
"positive": "layer4",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"layer4": {
"fields": [
{
"field": "legalname",
"comparator": "keywordMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {
"windowSize": "4"
}
}
],
"threshold": 0.7,
"aggregation": "AVG",
"positive": "layer5",
"negative": "NO_MATCH",
"undefined": "layer5",
"ignoreUndefined": "true"
},
"layer5": {
"fields": [
{
"field": "legalname",
"comparator": "jaroWinklerNormalizedName",
"weight": 0.9,
"countIfUndefined": "true",
"params": {
"windowSize": "4"
}
},
{
"field": "legalshortname",
"comparator": "jaroWinklerNormalizedName",
"weight": 0.1,
"countIfUndefined": "false",
"params": {
"windowSize": 4
}
}
],
"threshold": 0.9,
"aggregation": "W_MEAN",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
}
},
"model" : [
{ "name" : "country", "type" : "String", "path" : "$.country.classid"},
{ "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value"},
{ "name" : "legalname", "type" : "String", "path" : "$.legalname.value" },
{ "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" },
{ "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid')].value"},
{ "name" : "originalId", "type" : "String", "path" : "$.id" }
],
"blacklists" : {
"legalname" : []
},
"synonyms": {
"key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti"],
"key::2": ["studies","studi","études","estudios","estudos","Studien","studia","исследования","studies","σπουδές"],
"key::3": ["advanced","superiore","supérieur","supérieure","supérieurs","supérieures","avancado","avancados","fortgeschrittene","fortgeschritten","zaawansowany","передовой","gevorderd","gevorderde","προχωρημένος","προχωρημένη","προχωρημένο","προχωρημένες","προχωρημένα","wyzsza"],
"key::4": ["institute","istituto","institut","instituto","instituto","Institut","instytut","институт","instituut","ινστιτούτο"],
"key::5": ["hospital","ospedale","hôpital","hospital","hospital","Krankenhaus","szpital","больница","ziekenhuis","νοσοκομείο"],
"key::6": ["research","ricerca","recherche","investigacion","pesquisa","Forschung","badania","исследования","onderzoek","έρευνα","erevna","erevnas"],
"key::7": ["college","collegio","colegio","faculdade","Hochschule","Szkoła Wyższa","Высшая школа","κολλέγιο"],
"key::8": ["foundation","fondazione","fondation","fundación","fundação","Stiftung","Fundacja","фонд","stichting","ίδρυμα","idryma"],
"key::9": ["center","centro","centre","centro","centro","zentrum","centrum","центр","centrum","κέντρο"],
"key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό"],
"key::11": ["association","associazione","association","asociación","associação","Verein","verband","stowarzyszenie","ассоциация","associatie"],
"key::12": ["society","societa","société","sociedad","sociedade","gesellschaft","społeczeństwo","общество","maatschappij","κοινωνία"],
"key::13": ["international","internazionale","international","internacional","internacional","international","międzynarodowy","Международный","internationaal","internationale","διεθνής","διεθνή","διεθνές"],
"key::14": ["community","comunita","communauté","comunidad","comunidade","Gemeinschaft","społeczność","сообщество","gemeenschap","κοινότητα"],
"key::15": ["school","scuola","école","escuela","escola","schule","Szkoła","школа","school","σχολείο"],
"key::16": ["education","educazione","éducation","educacion","Educação","Bildung","Edukacja","образование","opleiding","εκπαίδευση"],
"key::17": ["academy","accademia","académie","academia","academia","Akademie","akademie","академия","academie","ακαδημία"],
"key::18": ["public","pubblico","public","publique","publics","publiques","publico","publico","Öffentlichkeit","publiczny","публичный","publiek","publieke","δημόσιος","δημόσια","δημόσιο"],
"key::19": ["museum","museo","musée","mueso","museu","museum","muzeum","музей","museum","μουσείο"],
"key::20": ["group","gruppo","groupe","grupo","grupo","gruppe","grupa","группа","groep","ομάδα","όμιλος"],
"key::21": ["department","dipartimento","département","departamento","departamento","abteilung","departament","отдел","afdeling","τμήμα"],
"key::22": ["council","consiglio","conseil","Consejo","conselho","gesellschaft","rada","совет","raad","συμβούλιο"],
"key::23": ["library","biblioteca","bibliothèque","biblioteca","biblioteca","Bibliothek","biblioteka","библиотека","bibliotheek","βιβλιοθήκη"],
"key::24": ["ministry","ministero","ministère","ministerio","ministério","Ministerium","ministerstwo","министерство","ministerie","υπουργείο"],
"key::25": ["services","servizi","services","servicios","Serviços","Dienstleistungen","usługi","услуги","diensten","υπηρεσίες"],
"key::26": ["central","centrale","central","centrale","centrales","central","central","zentral","centralny","цетральный","centraal","κεντρικός","κεντρική","κεντρικό","κεντρικά"],
"key::27": ["general","generale","général","générale","généraux","générales","general","geral","general","Allgemeines","general","общий","algemeen","algemene","γενικός","γενική","γενικό","γενικά"],
"key::28": ["applied","applicati","appliqué","appliquée","appliqués","appliquées","aplicado","aplicada","angewendet","stosowany","прикладной","toegepast","toegepaste","εφαρμοσμένος","εφαρμοσμένη","εφαρμοσμένο","εφαρμοσμένα"],
"key::29": ["european","europee","europea","européen","européenne","européens","européennes","europeo","europeu","europäisch","europejski","европейский","Europees","Europese","ευρωπαϊκός","ευρωπαϊκή","ευρωπαϊκό","ευρωπαϊκά"],
"key::30": ["agency","agenzia","agence","agencia","agencia","agentur","agencja","агенция","agentschap","πρακτορείο"],
"key::31": ["laboratory","laboratorio","laboratoire","laboratorio","laboratorio","labor","laboratorium","лаборатория","laboratorium","εργαστήριο"],
"key::32": ["industry","industria","industrie","индустрия","industrie","βιομηχανία"],
"key::33": ["industrial","industriale","industriel","industrielle","industriels","industrielles","индустриальный","industrieel","βιομηχανικός","βιομηχανική","βιομηχανικό","βιομηχανικά","βιομηχανικές"],
"key::34": ["consortium","consorzio","consortium","консорциум","consortium","κοινοπραξία"],
"key::35": ["organization","organizzazione","organisation","organización","organização","organizacja","организация","organisatie","οργανισμός"],
"key::36": ["authority","autorità","autorité","авторитет","autoriteit"],
"key::37": ["federation","federazione","fédération","федерация","federatie","ομοσπονδία"],
"key::38": ["observatory","osservatorio","observatoire","обсерватория","observatorium","αστεροσκοπείο"],
"key::39": ["bureau","ufficio","bureau","офис","bureau","γραφείο"],
"key::40": ["company","impresa","compagnie","société","компания","bedrijf","εταιρία"],
"key::41": ["polytechnic","politecnico","polytechnique","политехника","polytechnisch","πολυτεχνείο","universita politecnica","polytechnic university","universidad politecnica","universitat politecnica","politechnika","politechniki","university technology","university science technology"],
"key::42": ["coalition","coalizione","coalition","коалиция","coalitie","συνασπισμός"],
"key::43": ["initiative","iniziativa","initiative","инициатива","initiatief","πρωτοβουλία"],
"key::44": ["academic","accademico","académique","universitaire","акадеческий academisch","ακαδημαϊκός","ακαδημαϊκή","ακαδημαϊκό","ακαδημαϊκές","ακαδημαϊκοί"],
"key::45": ["institution","istituzione","institution","институциональный","instelling","ινστιτούτο"],
"key::46": ["division","divisione","division","отделение","divisie","τμήμα"],
"key::47": ["committee","comitato","comité","комитет","commissie","επιτροπή"],
"key::48": ["promotion","promozione","продвижение","proothisis","forderung"],
"key::49": ["medical","medicine","clinical","medicina","clinici","médico","medicina","clínica","médico","medicina","clínica","medizinisch","Medizin","klinisch","medisch","geneeskunde","klinisch","ιατρικός","ιατρική","ιατρικό","ιατρικά","κλινικός","κλινική","κλινικό","κλινικά","tıbbi","tıp","klinik","orvosi","orvostudomány","klinikai","zdravniški","medicinski","klinični","meditsiini","kliinik","kliiniline"],
"key::50": ["technology","technological","tecnologia","tecnologie","tecnología","tecnológico","tecnologia","tecnológico","Technologie","technologisch","technologie","technologisch","τεχνολογία","τεχνολογικός","τεχνολογική","τεχνολογικό","teknoloji","teknolojik","technológia","technológiai","tehnologija","tehnološki","tehnoloogia","tehnoloogiline","technologii","technical","texniki","teknik"],
"key::51": ["science","scientific","scienza","scientifiche","scienze","ciencia","científico","ciência","científico","Wissenschaft","wissenschaftlich","wetenschap","wetenschappelijk","επιστήμη","επιστημονικός","επιστημονική","επιστημονικό","επιστημονικά","bilim","bilimsel","tudomány","tudományos","znanost","znanstveni","teadus","teaduslik",""],
"key::52": ["engineering","ingegneria","ingeniería","engenharia","Ingenieurwissenschaft","ingenieurswetenschappen","bouwkunde","μηχανικός","μηχανική","μηχανικό","mühendislik","mérnöki","Inženirstvo","inseneeria","inseneri",""],
"key::53": ["management","gestione","gestionale","gestionali","gestión","administración","gestão","administração","Verwaltung","management","διαχείριση","yönetim","menedzsment","vodstvo","upravljanje","management","juhtkond","juhtimine","haldus",""],
"key::54": ["energy","energia","energía","energia","Energie","energie","ενέργεια","enerji","energia","energija","energia",""],
"key::55": ["agricultural","agriculture","agricoltura","agricole","agrícola","agricultura","agrícola","agricultura","landwirtschaftlich","Landwirtschaft","landbouwkundig","landbouw","αγροτικός","αγροτική","αγροτικό","γεωργικός","γεωργική","γεωργικό","γεωργία","tarımsal","tarım","mezőgazdasági","mezőgazdaság","poljedelski","poljedelstvo","põllumajandus","põllumajanduslik",""],
"key::56": ["information","informazione","información","informação","Information","informatie","πληροφορία","bilgi","információ","informacija","informatsioon","informatycznych",""],
"key::57": ["social","sociali","social","social","Sozial","sociaal","maatschappelijk","κοινωνικός","κοινωνική","κοινωνικό","κοινωνικά","sosyal","szociális","družbeni","sotsiaal","sotsiaalne",""],
"key::58": ["environmental","ambiente","medioambiental","ambiente","medioambiente","meioambiente","Umwelt","milieu","milieuwetenschap","milieukunde","περιβαλλοντικός","περιβαλλοντική","περιβαλλοντικό","περιβαλλοντικά","çevre","környezeti","okoliški","keskonna",""],
"key::59": ["business","economia","economiche","economica","negocio","empresa","negócio","Unternehmen","bedrijf","bedrijfskunde","επιχείρηση","iş","üzleti","posel","ettevõte/äri",""],
"key::60": ["pharmaceuticals","pharmacy","farmacia","farmaceutica","farmacéutica","farmacia","farmacêutica","farmácia","Pharmazeutika","Arzneimittelkunde","farmaceutica","geneesmiddelen","apotheek","φαρμακευτικός","φαρμακευτική","φαρμακευτικό","φαρμακευτικά","φαρμακείο","ilaç","eczane","gyógyszerészeti","gyógyszertár","farmacevtika","lekarništvo","farmaatsia","farmatseutiline",""],
"key::61": ["healthcare","health services","salute","atenciónmédica","cuidadodelasalud","cuidadoscomasaúde","Gesundheitswesen","gezondheidszorg","ιατροφαρμακευτικήπερίθαλψη","sağlıkhizmeti","egészségügy","zdravstvo","tervishoid","tervishoiu",""],
"key::62": ["history","storia","historia","história","Geschichte","geschiedenis","geschiedkunde","ιστορία","tarih","történelem","zgodovina","ajalugu",""],
"key::63": ["materials","materiali","materia","materiales","materiais","materialen","υλικά","τεκμήρια","malzemeler","anyagok","materiali","materjalid","vahendid",""],
"key::64": ["economics","economia","economiche","economica","economía","economia","Wirtschaft","economie","οικονομικά","οικονομικέςεπιστήμες","ekonomi","közgazdaságtan","gospodarstvo","ekonomija","majanduslik","majandus",""],
"key::65": ["therapeutics","terapeutica","terapéutica","terapêutica","therapie","θεραπευτική","tedavibilimi","gyógykezelés","terapevtika","terapeutiline","ravi",""],
"key::66": ["oncology","oncologia","oncologico","oncología","oncologia","Onkologie","oncologie","ογκολογία","onkoloji","onkológia","onkologija","onkoloogia",""],
"key::67": ["natural","naturali","naturale","natural","natural","natürlich","natuurlijk","φυσικός","φυσική","φυσικό","φυσικά","doğal","természetes","naraven","loodus",""],
"key::68": ["educational","educazione","pedagogia","educacional","educativo","educacional","pädagogisch","educatief","εκπαιδευτικός","εκπαιδευτική","εκπαιδευτικό","εκπαιδευτικά","eğitimsel","oktatási","izobraževalen","haridus","hariduslik",""],
"key::69": ["biomedical","biomedica","biomédico","biomédico","biomedizinisch","biomedisch","βιοιατρικός","βιοιατρική","βιοιατρικό","βιοιατρικά","biyomedikal","orvosbiológiai","biomedicinski","biomeditsiiniline",""],
"key::70": ["veterinary","veterinaria","veterinarie","veterinaria","veterinária","tierärtzlich","veterinair","veeartsenijlkunde","κτηνιατρικός","κτηνιατρική","κτηνιατρικό","κτηνιατρικά","veteriner","állatorvosi","veterinar","veterinarski","veterinaaria",""],
"key::71": ["chemistry","chimica","química","química","Chemie","chemie","scheikunde","χημεία","kimya","kémia","kemija","keemia",""],
"key::72": ["security","sicurezza","seguridad","segurança","Sicherheit","veiligheid","ασφάλεια","güvenlik","biztonsági","varnost","turvalisus","julgeolek",""],
"key::73": ["biotechnology","biotecnologia","biotecnologie","biotecnología","biotecnologia","Biotechnologie","biotechnologie","βιοτεχνολογία","biyoteknoloji","biotechnológia","biotehnologija","biotehnoloogia",""],
"key::74": ["military","militare","militari","militar","militar","Militär","militair","leger","στρατιωτικός","στρατιωτική","στρατιωτικό","στρατιωτικά","askeri","katonai","vojaški","vojni","militaar","wojskowa",""],
"key::75": ["theological","teologia","teologico","teológico","tecnológica","theologisch","theologisch","θεολογικός","θεολογική","θεολογικό","θεολογικά","teolojik","technológiai","teološki","teoloogia","usuteadus","teoloogiline",""],
"key::76": ["electronics","elettronica","electrónica","eletrônicos","Elektronik","elektronica","ηλεκτρονική","elektronik","elektronika","elektronika","elektroonika",""],
"key::77": ["forestry","forestale","forestali","silvicultura","forestal","floresta","Forstwirtschaft","bosbouw","δασοκομία","δασολογία","ormancılık","erdészet","gozdarstvo","metsandus",""],
"key::78": ["maritime","marittima","marittime","marittimo","marítimo","marítimo","maritiem","ναυτικός","ναυτική","ναυτικό","ναυτικά","ναυτιλιακός","ναυτιλιακή","ναυτιλιακό","ναυτιλιακά","θαλάσσιος","θαλάσσια","θαλάσσιο","denizcilik","tengeri","morski","mere","merendus",""],
"key::79": ["sports","sport","deportes","esportes","Sport","sport","sportwetenschappen","άθληση","γυμναστικήδραστηριότητα","spor","sport","šport","sport","spordi",""],
"key::80": ["surgery","chirurgia","chirurgiche","cirugía","cirurgia","Chirurgie","chirurgie","heelkunde","εγχείρηση","επέμβαση","χειρουργικήεπέμβαση","cerrahi","sebészet","kirurgija","kirurgia",""],
"key::81": ["cultural","culturale","culturali","cultura","cultural","cultural","kulturell","cultureel","πολιτιστικός","πολιτιστική","πολιτιστικό","πολιτισμικός","πολιτισμική","πολιτισμικό","kültürel","kultúrális","kulturni","kultuuri","kultuuriline",""],
"key::82": ["computerscience","informatica","ordenador","computadora","informática","computación","cienciasdelacomputación","ciênciadacomputação","Computer","computer","υπολογιστής","ηλεκτρονικόςυπολογιστής","bilgisayar","számítógép","računalnik","arvuti",""],
"key::83": ["finance","financial","finanza","finanziarie","finanza","financiero","finanças","financeiro","Finanzen","finanziell","financiën","financieel","χρηματοοικονομικά","χρηματοδότηση","finanse","finansal","pénzügy","pénzügyi","finance","finančni","finants","finantsiline",""],
"key::84": ["communication","comunicazione","comuniciación","comunicação","Kommunikation","communication","επικοινωνία","iletişim","kommunikáció","komuniciranje","kommunikatsioon",""],
"key::85": ["justice","giustizia","justicia","justiça","Recht","Justiz","justitie","gerechtigheid","δικαιοσύνη","υπουργείοδικαιοσύνης","δίκαιο","adalet","igazságügy","pravo","õigus",""],
"key::86": ["aerospace","aerospaziale","aerospaziali","aeroespacio","aeroespaço","Luftfahrt","luchtvaart","ruimtevaart","αεροπορικός","αεροπορική","αεροπορικό","αεροναυπηγικός","αεροναυπηγική","αεροναυπηγικό","αεροναυπηγικά","havacılıkveuzay","légtér","zrakoplovstvo","atmosfäär","kosmos",""],
"key::87": ["dermatology","dermatologia","dermatología","dermatologia","Dermatologie","dermatologie","δρματολογία","dermatoloji","bőrgyógyászat","dermatológia","dermatologija","dermatoloogia",""],
"key::88": ["architecture","architettura","arquitectura","arquitetura","Architektur","architectuur","αρχιτεκτονική","mimarlık","építészet","arhitektura","arhitektuur",""],
"key::89": ["mathematics","matematica","matematiche","matemáticas","matemáticas","Mathematik","wiskunde","mathematica","μαθηματικά","matematik","matematika","matematika","matemaatika",""],
"key::90": ["language","lingue","linguistica","linguistiche","lenguaje","idioma","língua","idioma","Sprache","taal","taalkunde","γλώσσα","dil","nyelv","jezik","keel",""],
"key::91": ["neuroscience","neuroscienza","neurociencia","neurociência","Neurowissenschaft","neurowetenschappen","νευροεπιστήμη","nörobilim","idegtudomány","nevroznanost","neuroteadused",""],
"key::92": ["automation","automazione","automatización","automação","Automatisierung","automatisering","αυτοματοποίηση","otomasyon","automatizálás","avtomatizacija","automatiseeritud",""],
"key::93": ["pediatric","pediatria","pediatriche","pediatrico","pediátrico","pediatría","pediátrico","pediatria","pädiatrisch","pediatrische","παιδιατρική","pediatrik","gyermekgyógyászat","pediatrija","pediaatria",""],
"key::94": ["photonics","fotonica","fotoniche","fotónica","fotônica","Photonik","fotonica","φωτονική","fotonik","fotonika","fotonika","fotoonika",""],
"key::95": ["mechanics", "mechanical", "meccanica","meccaniche","mecánica","mecânica","Mechanik","Maschinenbau","mechanica","werktuigkunde","μηχανικής","mekanik","gépészet","mehanika","mehaanika",""],
"key::96": ["psychiatrics","psichiatria","psichiatrica","psichiatriche","psiquiatría","psiquiatria","Psychiatrie","psychiatrie","ψυχιατρική","psikiyatrik","pszihiátria","psihiatrija","psühhaatria",""],
"key::97": ["psychology","fisiologia","psicología","psicologia","Psychologie","psychologie","ψυχολογία","psikoloji","pszihológia","psihologija","psühholoogia",""],
"key::98": ["automotive","industriaautomobilistica","industriadelautomóvil","automotriz","industriaautomotriz","automotivo","Automobilindustrie","autoindustrie","αυτοκίνητος","αυτοκίνητη","αυτοκίνητο","αυτοκινούμενος","αυτοκινούμενη","αυτοκινούμενο","αυτοκινητιστικός","αυτοκινητιστική","αυτοκινητιστικό","otomotiv","autóipari","samogiben","avtomobilskaindustrija","auto-",""],
"key::99": ["neurology","neurologia","neurologiche","neurología","neurologia","Neurologie","neurologie","zenuwleer","νευρολογία","nöroloji","neurológia","ideggyógyászat","nevrologija","neuroloogia",""],
"key::100": ["geology","geologia","geologiche","geología","geologia","Geologie","geologie","aardkunde","γεωλογία","jeoloji","geológia","földtudomány","geologija","geoloogia",""],
"key::101": ["microbiology","microbiologia","micro-biologia","microbiologiche","microbiología","microbiologia","Mikrobiologie","microbiologie","μικροβιολογία","mikrobiyoloji","mikrobiológia","mikrobiologija","mikrobioloogia",""],
"key::102": ["informatics","informatica","informática","informática","informatica",""],
"key::103": ["forschungsgemeinschaft","comunita ricerca","research community","research foundation","research association"],
"key::104": ["commerce","ticaret","ticarət","commercio","trade","handel","comercio"],
"key::105" : ["state", "stato", "etade", "estado", "statale", "etat", "zustand", "estado"],
"key::106" : ["seminary", "seminario", "seminaire", "seminar"],
"key::107" : ["agricultural forestry", "af", "a f"],
"key::108" : ["agricultural mechanical", "am", "a m"],
"key::109" : ["catholic", "catholique", "katholische", "catolica", "cattolica", "catolico"]
}
}
}

View File

@ -0,0 +1,442 @@
{
"wf": {
"threshold": "0.99",
"dedupRun": "001",
"entityType": "result",
"subEntityType": "resulttype",
"subEntityValue": "publication",
"orderField": "title",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "50",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
"resultResult_publicationDataset_isRelatedTo",
"resultResult_similarity_isAmongTopNSimilarDocuments",
"resultResult_similarity_hasAmongTopNSimilarDocuments",
"resultOrganization_affiliation_isAffiliatedWith",
"resultResult_part_hasPart",
"resultResult_part_isPartOf",
"resultResult_supplement_isSupplementTo",
"resultResult_supplement_isSupplementedBy",
"resultResult_version_isVersionOf"
],
"includeChildren": "true",
"maxIterations": 20,
"idPath": "$.id"
},
"pace": {
"clustering": [
{
"name": "wordsStatsSuffixPrefixChain",
"fields": [
"title"
],
"params": {
"mod": "10"
}
},
{
"name": "lowercase",
"fields": [
"doi",
"altdoi"
],
"params": {
"collapseOn:pid": "0"
}
}
],
"decisionTree": {
"start": {
"fields": [
{
"field": "instance",
"comparator": "instanceTypeMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 0.5,
"aggregation": "MAX",
"positive": "layer1",
"negative": "NO_MATCH",
"undefined": "layer1",
"ignoreUndefined": "true"
},
"layer1": {
"fields": [
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid"
}
},
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid",
"crossCompare": "alternateid"
}
}
],
"threshold": 0.5,
"aggregation": "MAX",
"positive": "layer2",
"negative": "layer3",
"undefined": "layer3",
"ignoreUndefined": "true"
},
"layer2": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.9,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"layer3": {
"fields": [
{
"field": "title",
"comparator": "titleVersionMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
},
{
"field": "authors",
"comparator": "sizeMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1.0,
"aggregation": "AND",
"positive": "layer4",
"negative": "NO_MATCH",
"undefined": "layer4",
"ignoreUndefined": "false"
},
"layer4": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.99,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
}
},
"model": [
{
"name": "doi",
"type": "String",
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "altdoi",
"type": "String",
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "pid",
"type": "JSON",
"path": "$.instance[*].pid[*]",
"overrideMatch": "true"
},
{
"name": "alternateid",
"type": "JSON",
"path": "$.instance[*].alternateIdentifier[*]",
"overrideMatch": "true"
},
{
"name": "title",
"type": "String",
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
"length": 250,
"size": 5
},
{
"name": "authors",
"type": "List",
"path": "$.author[*].fullname",
"size": 200
},
{
"name": "resulttype",
"type": "String",
"path": "$.resulttype.classid"
},
{
"name": "instance",
"type": "List",
"path": "$.instance[*].instancetype.classname"
}
],
"blacklists": {
"title": [
"(?i)^Data Management Plan",
"^Inside Front Cover$",
"(?i)^Poster presentations$",
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
"^Problems with perinatal pathology\\.?$",
"(?i)^Cases? of Puerperal Convulsions$",
"(?i)^Operative Gyna?ecology$",
"(?i)^Mind the gap\\!?\\:?$",
"^Chronic fatigue syndrome\\.?$",
"^Cartas? ao editor Letters? to the Editor$",
"^Note from the Editor$",
"^Anesthesia Abstract$",
"^Annual report$",
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
"(?i)^Graph and Table of Infectious Diseases?$",
"^Presentation$",
"(?i)^Reviews and Information on Publications$",
"(?i)^PUBLIC HEALTH SERVICES?$",
"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
"(?i)^Adrese autora$",
"(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
"(?i)^Acknowledgement to Referees$",
"(?i)^Behçet's disease\\.?$",
"(?i)^Isolation and identification of restriction endonuclease.*$",
"(?i)^CEREBROVASCULAR DISEASES?.?$",
"(?i)^Screening for abdominal aortic aneurysms?\\.?$",
"^Event management$",
"(?i)^Breakfast and Crohn's disease.*\\.?$",
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
"^Gushi hakubutsugaku$",
"^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
"^Intestinal spirocha?etosis$",
"^Treatment of Rodent Ulcer$",
"(?i)^\\W*Cloud Computing\\W*$",
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
"^Free Communications, Poster Presentations: Session [A-F]$",
"^“The Historical Aspects? of Quackery\\.?”$",
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
"(?i)^Case Report$",
"^Boletín Informativo$",
"(?i)^Glioblastoma Multiforme$",
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
"^Zaměstnanecké výhody$",
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
"(?i)^Carotid body tumours?\\.?$",
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
"^Avant-propos$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
"^Viñetas de Cortázar$",
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
"^Aus der AGMB$",
"^Znanstveno-stručni prilozi$",
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
"^Finanční analýza podniku$",
"^Financial analysis( of business)?$",
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
"^Jikken nihon shūshinsho$",
"(?i)^CORONER('|s)(s|') INQUESTS$",
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
"(?i)^Consultants' contract(s)?$",
"(?i)^Upute autorima$",
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
"^Joshi shin kokubun$",
"^Kōtō shōgaku dokuhon nōson'yō$",
"^Jinjō shōgaku shōka$",
"^Shōgaku shūjichō$",
"^Nihon joshi dokuhon$",
"^Joshi shin dokuhon$",
"^Chūtō kanbun dokuhon$",
"^Wabun dokuhon$",
"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
"(?i)^cardiac rehabilitation$",
"(?i)^Analytical summary$",
"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
"^Prikazi i osvrti$",
"^Rodinný dům s provozovnou$",
"^Family house with an establishment$",
"^Shinsei chūtō shin kokugun$",
"^Pulmonary alveolar proteinosis(\\.?)$",
"^Shinshū kanbun$",
"^Viñeta(s?) de Rodríguez$",
"(?i)^RUBRIKA UREDNIKA$",
"^A Matching Model of the Academic Publication Market$",
"^Yōgaku kōyō$",
"^Internetový marketing$",
"^Internet marketing$",
"^Chūtō kokugo dokuhon$",
"^Kokugo dokuhon$",
"^Antibiotic Cover for Dental Extraction(s?)$",
"^Strategie podniku$",
"^Strategy of an Enterprise$",
"(?i)^respiratory disease(s?)(\\.?)$",
"^Award(s?) for Gallantry in Civil Defence$",
"^Podniková kultura$",
"^Corporate Culture$",
"^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
"^Pracovní motivace$",
"^Work Motivation$",
"^Kaitei kōtō jogaku dokuhon$",
"^Konsolidovaná účetní závěrka$",
"^Consolidated Financial Statements$",
"(?i)^intracranial tumour(s?)$",
"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
"^Úroveň motivačního procesu jako způsobu vedení lidí$",
"^The level of motivation process as a leadership$",
"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
"(?i)^news and events$",
"(?i)^NOVOSTI I DOGAĐAJI$",
"^Sansū no gakushū$",
"^Posouzení informačního systému firmy a návrh změn$",
"^Information System Assessment and Proposal for ICT Modification$",
"^Stresové zatížení pracovníků ve vybrané profesi$",
"^Stress load in a specific job$",
"^Sunday: Poster Sessions, Pt.*$",
"^Monday: Poster Sessions, Pt.*$",
"^Wednesday: Poster Sessions, Pt.*",
"^Tuesday: Poster Sessions, Pt.*$",
"^Analýza reklamy$",
"^Analysis of advertising$",
"^Shōgaku shūshinsho$",
"^Shōgaku sansū$",
"^Shintei joshi kokubun$",
"^Taishō joshi kokubun dokuhon$",
"^Joshi kokubun$",
"^Účetní uzávěrka a účetní závěrka v ČR$",
"(?i)^The \"?Causes\"? of Cancer$",
"^Normas para la publicación de artículos$",
"^Editor('|s)(s|') [Rr]eply$",
"^Editor(|s)(s|) letter$",
"^Redaktoriaus žodis$",
"^DISCUSSION ON THE PRECEDING PAPER$",
"^Kōtō shōgaku shūshinsho jidōyō$",
"^Shōgaku nihon rekishi$",
"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
"^Préface$",
"^Occupational [Hh]ealth [Ss]ervices.$",
"^In Memoriam Professor Toshiyuki TAKESHIMA$",
"^Účetní závěrka ve vybraném podniku.*$",
"^Financial statements in selected company$",
"^Abdominal [Aa]ortic [Aa]neurysms.*$",
"^Pseudomyxoma peritonei$",
"^Kazalo autora$",
"(?i)^uvodna riječ$",
"^Motivace jako způsob vedení lidí$",
"^Motivation as a leadership$",
"^Polyfunkční dům$",
"^Multi\\-funkcional building$",
"^Podnikatelský plán$",
"(?i)^Podnikatelský záměr$",
"(?i)^Business Plan$",
"^Oceňování nemovitostí$",
"^Marketingová komunikace$",
"^Marketing communication$",
"^Sumario Analítico$",
"^Riječ uredništva$",
"^Savjetovanja i priredbe$",
"^Índice$",
"^(Starobosanski nadpisi).*$",
"^Vzdělávání pracovníků v organizaci$",
"^Staff training in organization$",
"^(Life Histories of North American Geometridae).*$",
"^Strategická analýza podniku$",
"^Strategic Analysis of an Enterprise$",
"^Sadržaj$",
"^Upute suradnicima$",
"^Rodinný dům$",
"(?i)^Fami(l)?ly house$",
"^Upute autorima$",
"^Strategic Analysis$",
"^Finanční analýza vybraného podniku$",
"^Finanční analýza$",
"^Riječ urednika$",
"(?i)^Content(s?)$",
"(?i)^Inhalt$",
"^Jinjō shōgaku shūshinsho jidōyō$",
"(?i)^Index$",
"^Chūgaku kokubun kyōkasho$",
"^Retrato de una mujer$",
"^Retrato de un hombre$",
"^Kōtō shōgaku dokuhon$",
"^Shotōka kokugo$",
"^Shōgaku dokuhon$",
"^Jinjō shōgaku kokugo dokuhon$",
"^Shinsei kokugo dokuhon$",
"^Teikoku dokuhon$",
"^Instructions to Authors$",
"^KİTAP TAHLİLİ$",
"^PRZEGLĄD PIŚMIENNICTWA$",
"(?i)^Presentación$",
"^İçindekiler$",
"(?i)^Tabl?e of contents$",
"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
"^Editorial( Board)?$",
"(?i)^Editorial \\(English\\)$",
"^Editörden$",
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
"^(Kiri Karl Morgensternile).*$",
"^(\\[Eksliibris Aleksandr).*\\]$",
"^(\\[Eksliibris Aleksandr).*$",
"^(Eksliibris Aleksandr).*$",
"^(Kiri A\\. de Vignolles).*$",
"^(2 kirja Karl Morgensternile).*$",
"^(Pirita kloostri idaosa arheoloogilised).*$",
"^(Kiri tundmatule).*$",
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
"^(Eksliibris Nikolai Birukovile).*$",
"^(Eksliibris Nikolai Issakovile).*$",
"^(WHP Cruise Summary Information of section).*$",
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*",
"(?i)^.*authors[']? reply\\.?$",
"(?i)^.*authors[']? response\\.?$",
"^Data [mM]anagement [sS]ervices\\.$",
"Research and Advanced Technology for Digital Libraries"
]
},
"synonyms": {}
}
}

View File

@ -0,0 +1,476 @@
{
"wf": {
"threshold": "0.99",
"dedupRun": "001",
"entityType": "result",
"subEntityType": "resulttype",
"subEntityValue": "publication",
"orderField": "title",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "50",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
"resultResult_publicationDataset_isRelatedTo",
"resultResult_similarity_isAmongTopNSimilarDocuments",
"resultResult_similarity_hasAmongTopNSimilarDocuments",
"resultOrganization_affiliation_isAffiliatedWith",
"resultResult_part_hasPart",
"resultResult_part_isPartOf",
"resultResult_supplement_isSupplementTo",
"resultResult_supplement_isSupplementedBy",
"resultResult_version_isVersionOf"
],
"includeChildren": "true",
"maxIterations": 20,
"idPath": "$.id"
},
"pace": {
"clustering": [
{
"name": "wordsStatsSuffixPrefixChain",
"fields": [
"title"
],
"params": {
"mod": "10"
}
},
{
"name": "lowercase",
"fields": [
"doi",
"altdoi"
],
"params": {
"collapseOn:pid": "0"
}
}
],
"decisionTree": {
"start": {
"fields": [
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid",
"mode": "count"
}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "instanceTypeCheck",
"ignoreUndefined": "false"
},
"instanceTypeCheck": {
"fields": [
{
"field": "instance",
"comparator": "instanceTypeMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 0.5,
"aggregation": "MAX",
"positive": "pidVSaltid",
"negative": "NO_MATCH",
"undefined": "pidVSaltid",
"ignoreUndefined": "true"
},
"pidVSaltid": {
"fields": [
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid",
"crossCompare": "alternateid",
"mode": "count"
}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "softCheck",
"negative": "earlyExits",
"undefined": "earlyExits",
"ignoreUndefined": "true"
},
"softCheck": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.9,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"earlyExits": {
"fields": [
{
"field": "title",
"comparator": "titleVersionMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
},
{
"field": "authors",
"comparator": "sizeMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1.0,
"aggregation": "AND",
"positive": "strongCheck",
"negative": "NO_MATCH",
"undefined": "strongCheck",
"ignoreUndefined": "false"
},
"strongCheck": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.99,
"aggregation": "AVG",
"positive": "surnames",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"surnames": {
"fields": [
{
"field": "authors",
"comparator": "authorsMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"surname_th": 0.75,
"fullname_th": 0.75,
"size_th": 20,
"mode": "surname"
}
}
],
"threshold": 0.6,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "MATCH",
"ignoreUndefined": "true"
}
},
"model": [
{
"name": "doi",
"type": "String",
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "altdoi",
"type": "String",
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "pid",
"type": "JSON",
"path": "$.instance[*].pid[*]",
"overrideMatch": "true"
},
{
"name": "alternateid",
"type": "JSON",
"path": "$.instance[*].alternateIdentifier[*]",
"overrideMatch": "true"
},
{
"name": "title",
"type": "StringConcat",
"path": "$.title[?(@.qualifier.classid == 'main title')].value|||$.title[?(@.qualifier.classid == 'subtitle')].value",
"length": 250,
"size": 5
},
{
"name": "authors",
"type": "List",
"path": "$.author[*].fullname",
"size": 200
},
{
"name": "resulttype",
"type": "String",
"path": "$.resulttype.classid"
},
{
"name": "instance",
"type": "List",
"path": "$.instance[*].instancetype.classname"
}
],
"blacklists": {
"title": [
"(?i)^Data Management Plan",
"^Inside Front Cover$",
"(?i)^Poster presentations$",
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
"^Problems with perinatal pathology\\.?$",
"(?i)^Cases? of Puerperal Convulsions$",
"(?i)^Operative Gyna?ecology$",
"(?i)^Mind the gap\\!?\\:?$",
"^Chronic fatigue syndrome\\.?$",
"^Cartas? ao editor Letters? to the Editor$",
"^Note from the Editor$",
"^Anesthesia Abstract$",
"^Annual report$",
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
"(?i)^Graph and Table of Infectious Diseases?$",
"^Presentation$",
"(?i)^Reviews and Information on Publications$",
"(?i)^PUBLIC HEALTH SERVICES?$",
"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
"(?i)^Adrese autora$",
"(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
"(?i)^Acknowledgement to Referees$",
"(?i)^Behçet's disease\\.?$",
"(?i)^Isolation and identification of restriction endonuclease.*$",
"(?i)^CEREBROVASCULAR DISEASES?.?$",
"(?i)^Screening for abdominal aortic aneurysms?\\.?$",
"^Event management$",
"(?i)^Breakfast and Crohn's disease.*\\.?$",
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
"^Gushi hakubutsugaku$",
"^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
"^Intestinal spirocha?etosis$",
"^Treatment of Rodent Ulcer$",
"(?i)^\\W*Cloud Computing\\W*$",
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
"^Free Communications, Poster Presentations: Session [A-F]$",
"^“The Historical Aspects? of Quackery\\.?”$",
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
"(?i)^Case Report$",
"^Boletín Informativo$",
"(?i)^Glioblastoma Multiforme$",
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
"^Zaměstnanecké výhody$",
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
"(?i)^Carotid body tumours?\\.?$",
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
"^Avant-propos$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
"^Viñetas de Cortázar$",
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
"^Aus der AGMB$",
"^Znanstveno-stručni prilozi$",
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
"^Finanční analýza podniku$",
"^Financial analysis( of business)?$",
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
"^Jikken nihon shūshinsho$",
"(?i)^CORONER('|s)(s|') INQUESTS$",
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
"(?i)^Consultants' contract(s)?$",
"(?i)^Upute autorima$",
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
"^Joshi shin kokubun$",
"^Kōtō shōgaku dokuhon nōson'yō$",
"^Jinjō shōgaku shōka$",
"^Shōgaku shūjichō$",
"^Nihon joshi dokuhon$",
"^Joshi shin dokuhon$",
"^Chūtō kanbun dokuhon$",
"^Wabun dokuhon$",
"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
"(?i)^cardiac rehabilitation$",
"(?i)^Analytical summary$",
"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
"^Prikazi i osvrti$",
"^Rodinný dům s provozovnou$",
"^Family house with an establishment$",
"^Shinsei chūtō shin kokugun$",
"^Pulmonary alveolar proteinosis(\\.?)$",
"^Shinshū kanbun$",
"^Viñeta(s?) de Rodríguez$",
"(?i)^RUBRIKA UREDNIKA$",
"^A Matching Model of the Academic Publication Market$",
"^Yōgaku kōyō$",
"^Internetový marketing$",
"^Internet marketing$",
"^Chūtō kokugo dokuhon$",
"^Kokugo dokuhon$",
"^Antibiotic Cover for Dental Extraction(s?)$",
"^Strategie podniku$",
"^Strategy of an Enterprise$",
"(?i)^respiratory disease(s?)(\\.?)$",
"^Award(s?) for Gallantry in Civil Defence$",
"^Podniková kultura$",
"^Corporate Culture$",
"^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
"^Pracovní motivace$",
"^Work Motivation$",
"^Kaitei kōtō jogaku dokuhon$",
"^Konsolidovaná účetní závěrka$",
"^Consolidated Financial Statements$",
"(?i)^intracranial tumour(s?)$",
"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
"^Úroveň motivačního procesu jako způsobu vedení lidí$",
"^The level of motivation process as a leadership$",
"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
"(?i)^news and events$",
"(?i)^NOVOSTI I DOGAĐAJI$",
"^Sansū no gakushū$",
"^Posouzení informačního systému firmy a návrh změn$",
"^Information System Assessment and Proposal for ICT Modification$",
"^Stresové zatížení pracovníků ve vybrané profesi$",
"^Stress load in a specific job$",
"^Sunday: Poster Sessions, Pt.*$",
"^Monday: Poster Sessions, Pt.*$",
"^Wednesday: Poster Sessions, Pt.*",
"^Tuesday: Poster Sessions, Pt.*$",
"^Analýza reklamy$",
"^Analysis of advertising$",
"^Shōgaku shūshinsho$",
"^Shōgaku sansū$",
"^Shintei joshi kokubun$",
"^Taishō joshi kokubun dokuhon$",
"^Joshi kokubun$",
"^Účetní uzávěrka a účetní závěrka v ČR$",
"(?i)^The \"?Causes\"? of Cancer$",
"^Normas para la publicación de artículos$",
"^Editor('|s)(s|') [Rr]eply$",
"^Editor(|s)(s|) letter$",
"^Redaktoriaus žodis$",
"^DISCUSSION ON THE PRECEDING PAPER$",
"^Kōtō shōgaku shūshinsho jidōyō$",
"^Shōgaku nihon rekishi$",
"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
"^Préface$",
"^Occupational [Hh]ealth [Ss]ervices.$",
"^In Memoriam Professor Toshiyuki TAKESHIMA$",
"^Účetní závěrka ve vybraném podniku.*$",
"^Financial statements in selected company$",
"^Abdominal [Aa]ortic [Aa]neurysms.*$",
"^Pseudomyxoma peritonei$",
"^Kazalo autora$",
"(?i)^uvodna riječ$",
"^Motivace jako způsob vedení lidí$",
"^Motivation as a leadership$",
"^Polyfunkční dům$",
"^Multi\\-funkcional building$",
"^Podnikatelský plán$",
"(?i)^Podnikatelský záměr$",
"(?i)^Business Plan$",
"^Oceňování nemovitostí$",
"^Marketingová komunikace$",
"^Marketing communication$",
"^Sumario Analítico$",
"^Riječ uredništva$",
"^Savjetovanja i priredbe$",
"^Índice$",
"^(Starobosanski nadpisi).*$",
"^Vzdělávání pracovníků v organizaci$",
"^Staff training in organization$",
"^(Life Histories of North American Geometridae).*$",
"^Strategická analýza podniku$",
"^Strategic Analysis of an Enterprise$",
"^Sadržaj$",
"^Upute suradnicima$",
"^Rodinný dům$",
"(?i)^Fami(l)?ly house$",
"^Upute autorima$",
"^Strategic Analysis$",
"^Finanční analýza vybraného podniku$",
"^Finanční analýza$",
"^Riječ urednika$",
"(?i)^Content(s?)$",
"(?i)^Inhalt$",
"^Jinjō shōgaku shūshinsho jidōyō$",
"(?i)^Index$",
"^Chūgaku kokubun kyōkasho$",
"^Retrato de una mujer$",
"^Retrato de un hombre$",
"^Kōtō shōgaku dokuhon$",
"^Shotōka kokugo$",
"^Shōgaku dokuhon$",
"^Jinjō shōgaku kokugo dokuhon$",
"^Shinsei kokugo dokuhon$",
"^Teikoku dokuhon$",
"^Instructions to Authors$",
"^KİTAP TAHLİLİ$",
"^PRZEGLĄD PIŚMIENNICTWA$",
"(?i)^Presentación$",
"^İçindekiler$",
"(?i)^Tabl?e of contents$",
"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
"^Editorial( Board)?$",
"(?i)^Editorial \\(English\\)$",
"^Editörden$",
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
"^(Kiri Karl Morgensternile).*$",
"^(\\[Eksliibris Aleksandr).*\\]$",
"^(\\[Eksliibris Aleksandr).*$",
"^(Eksliibris Aleksandr).*$",
"^(Kiri A\\. de Vignolles).*$",
"^(2 kirja Karl Morgensternile).*$",
"^(Pirita kloostri idaosa arheoloogilised).*$",
"^(Kiri tundmatule).*$",
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
"^(Eksliibris Nikolai Birukovile).*$",
"^(Eksliibris Nikolai Issakovile).*$",
"^(WHP Cruise Summary Information of section).*$",
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*",
"(?i)^.*authors[']? reply\\.?$",
"(?i)^.*authors[']? response\\.?$",
"^Data [mM]anagement [sS]ervices\\.$",
"Research and Advanced Technology for Digital Libraries"
]
},
"synonyms": {}
}
}

View File

@ -0,0 +1,348 @@
{
"wf": {
"threshold": "0.99",
"dedupRun": "001",
"entityType": "result",
"subEntityType": "resulttype",
"subEntityValue": "publication",
"orderField": "title",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "50",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
"resultResult_publicationDataset_isRelatedTo",
"resultResult_similarity_isAmongTopNSimilarDocuments",
"resultResult_similarity_hasAmongTopNSimilarDocuments",
"resultOrganization_affiliation_isAffiliatedWith",
"resultResult_part_hasPart",
"resultResult_part_isPartOf",
"resultResult_supplement_isSupplementTo",
"resultResult_supplement_isSupplementedBy",
"resultResult_version_isVersionOf"
],
"includeChildren": "true",
"maxIterations": 20,
"idPath": "$.id"
},
"pace": {
"clustering": [
{
"name": "wordsStatsSuffixPrefixChain",
"fields": [
"title"
],
"params": {
"mod": "10"
}
},
{
"name": "lowercase",
"fields": [
"doi",
"altdoi"
],
"params": {
"collapseOn:pid": "0"
}
}
],
"decisionTree": {
"start": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.9,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "MATCH",
"undefined": "MATCH",
"ignoreUndefined": "true"
}
},
"model": [
{
"name": "doi",
"type": "String",
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "altdoi",
"type": "String",
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "pid",
"type": "JSON",
"path": "$.instance[*].pid[*]",
"overrideMatch": "true"
},
{
"name": "alternateid",
"type": "JSON",
"path": "$.instance[*].alternateIdentifier[*]",
"overrideMatch": "true"
},
{
"name": "title",
"type": "String",
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
"length": 250,
"size": 5
},
{
"name": "authors",
"type": "List",
"path": "$.author[*].fullname",
"size": 200
},
{
"name": "resulttype",
"type": "String",
"path": "$.resulttype.classid"
}
],
"blacklists": {
"title": [
"(?i)^Data Management Plan",
"^Inside Front Cover$",
"(?i)^Poster presentations$",
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
"^Problems with perinatal pathology\\.?$",
"(?i)^Cases? of Puerperal Convulsions$",
"(?i)^Operative Gyna?ecology$",
"(?i)^Mind the gap\\!?\\:?$",
"^Chronic fatigue syndrome\\.?$",
"^Cartas? ao editor Letters? to the Editor$",
"^Note from the Editor$",
"^Anesthesia Abstract$",
"^Annual report$",
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
"(?i)^Graph and Table of Infectious Diseases?$",
"^Presentation$",
"(?i)^Reviews and Information on Publications$",
"(?i)^PUBLIC HEALTH SERVICES?$",
"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
"(?i)^Adrese autora$",
"(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
"(?i)^Acknowledgement to Referees$",
"(?i)^Behçet's disease\\.?$",
"(?i)^Isolation and identification of restriction endonuclease.*$",
"(?i)^CEREBROVASCULAR DISEASES?.?$",
"(?i)^Screening for abdominal aortic aneurysms?\\.?$",
"^Event management$",
"(?i)^Breakfast and Crohn's disease.*\\.?$",
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
"^Gushi hakubutsugaku$",
"^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
"^Intestinal spirocha?etosis$",
"^Treatment of Rodent Ulcer$",
"(?i)^\\W*Cloud Computing\\W*$",
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
"^Free Communications, Poster Presentations: Session [A-F]$",
"^“The Historical Aspects? of Quackery\\.?”$",
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
"(?i)^Case Report$",
"^Boletín Informativo$",
"(?i)^Glioblastoma Multiforme$",
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
"^Zaměstnanecké výhody$",
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
"(?i)^Carotid body tumours?\\.?$",
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
"^Avant-propos$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
"^Viñetas de Cortázar$",
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
"^Aus der AGMB$",
"^Znanstveno-stručni prilozi$",
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
"^Finanční analýza podniku$",
"^Financial analysis( of business)?$",
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
"^Jikken nihon shūshinsho$",
"(?i)^CORONER('|s)(s|') INQUESTS$",
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
"(?i)^Consultants' contract(s)?$",
"(?i)^Upute autorima$",
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
"^Joshi shin kokubun$",
"^Kōtō shōgaku dokuhon nōson'yō$",
"^Jinjō shōgaku shōka$",
"^Shōgaku shūjichō$",
"^Nihon joshi dokuhon$",
"^Joshi shin dokuhon$",
"^Chūtō kanbun dokuhon$",
"^Wabun dokuhon$",
"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
"(?i)^cardiac rehabilitation$",
"(?i)^Analytical summary$",
"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
"^Prikazi i osvrti$",
"^Rodinný dům s provozovnou$",
"^Family house with an establishment$",
"^Shinsei chūtō shin kokugun$",
"^Pulmonary alveolar proteinosis(\\.?)$",
"^Shinshū kanbun$",
"^Viñeta(s?) de Rodríguez$",
"(?i)^RUBRIKA UREDNIKA$",
"^A Matching Model of the Academic Publication Market$",
"^Yōgaku kōyō$",
"^Internetový marketing$",
"^Internet marketing$",
"^Chūtō kokugo dokuhon$",
"^Kokugo dokuhon$",
"^Antibiotic Cover for Dental Extraction(s?)$",
"^Strategie podniku$",
"^Strategy of an Enterprise$",
"(?i)^respiratory disease(s?)(\\.?)$",
"^Award(s?) for Gallantry in Civil Defence$",
"^Podniková kultura$",
"^Corporate Culture$",
"^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
"^Pracovní motivace$",
"^Work Motivation$",
"^Kaitei kōtō jogaku dokuhon$",
"^Konsolidovaná účetní závěrka$",
"^Consolidated Financial Statements$",
"(?i)^intracranial tumour(s?)$",
"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
"^Úroveň motivačního procesu jako způsobu vedení lidí$",
"^The level of motivation process as a leadership$",
"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
"(?i)^news and events$",
"(?i)^NOVOSTI I DOGAĐAJI$",
"^Sansū no gakushū$",
"^Posouzení informačního systému firmy a návrh změn$",
"^Information System Assessment and Proposal for ICT Modification$",
"^Stresové zatížení pracovníků ve vybrané profesi$",
"^Stress load in a specific job$",
"^Sunday: Poster Sessions, Pt.*$",
"^Monday: Poster Sessions, Pt.*$",
"^Wednesday: Poster Sessions, Pt.*",
"^Tuesday: Poster Sessions, Pt.*$",
"^Analýza reklamy$",
"^Analysis of advertising$",
"^Shōgaku shūshinsho$",
"^Shōgaku sansū$",
"^Shintei joshi kokubun$",
"^Taishō joshi kokubun dokuhon$",
"^Joshi kokubun$",
"^Účetní uzávěrka a účetní závěrka v ČR$",
"(?i)^The \"?Causes\"? of Cancer$",
"^Normas para la publicación de artículos$",
"^Editor('|s)(s|') [Rr]eply$",
"^Editor(|s)(s|) letter$",
"^Redaktoriaus žodis$",
"^DISCUSSION ON THE PRECEDING PAPER$",
"^Kōtō shōgaku shūshinsho jidōyō$",
"^Shōgaku nihon rekishi$",
"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
"^Préface$",
"^Occupational [Hh]ealth [Ss]ervices.$",
"^In Memoriam Professor Toshiyuki TAKESHIMA$",
"^Účetní závěrka ve vybraném podniku.*$",
"^Financial statements in selected company$",
"^Abdominal [Aa]ortic [Aa]neurysms.*$",
"^Pseudomyxoma peritonei$",
"^Kazalo autora$",
"(?i)^uvodna riječ$",
"^Motivace jako způsob vedení lidí$",
"^Motivation as a leadership$",
"^Polyfunkční dům$",
"^Multi\\-funkcional building$",
"^Podnikatelský plán$",
"(?i)^Podnikatelský záměr$",
"(?i)^Business Plan$",
"^Oceňování nemovitostí$",
"^Marketingová komunikace$",
"^Marketing communication$",
"^Sumario Analítico$",
"^Riječ uredništva$",
"^Savjetovanja i priredbe$",
"^Índice$",
"^(Starobosanski nadpisi).*$",
"^Vzdělávání pracovníků v organizaci$",
"^Staff training in organization$",
"^(Life Histories of North American Geometridae).*$",
"^Strategická analýza podniku$",
"^Strategic Analysis of an Enterprise$",
"^Sadržaj$",
"^Upute suradnicima$",
"^Rodinný dům$",
"(?i)^Fami(l)?ly house$",
"^Upute autorima$",
"^Strategic Analysis$",
"^Finanční analýza vybraného podniku$",
"^Finanční analýza$",
"^Riječ urednika$",
"(?i)^Content(s?)$",
"(?i)^Inhalt$",
"^Jinjō shōgaku shūshinsho jidōyō$",
"(?i)^Index$",
"^Chūgaku kokubun kyōkasho$",
"^Retrato de una mujer$",
"^Retrato de un hombre$",
"^Kōtō shōgaku dokuhon$",
"^Shotōka kokugo$",
"^Shōgaku dokuhon$",
"^Jinjō shōgaku kokugo dokuhon$",
"^Shinsei kokugo dokuhon$",
"^Teikoku dokuhon$",
"^Instructions to Authors$",
"^KİTAP TAHLİLİ$",
"^PRZEGLĄD PIŚMIENNICTWA$",
"(?i)^Presentación$",
"^İçindekiler$",
"(?i)^Tabl?e of contents$",
"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
"^Editorial( Board)?$",
"(?i)^Editorial \\(English\\)$",
"^Editörden$",
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
"^(Kiri Karl Morgensternile).*$",
"^(\\[Eksliibris Aleksandr).*\\]$",
"^(\\[Eksliibris Aleksandr).*$",
"^(Eksliibris Aleksandr).*$",
"^(Kiri A\\. de Vignolles).*$",
"^(2 kirja Karl Morgensternile).*$",
"^(Pirita kloostri idaosa arheoloogilised).*$",
"^(Kiri tundmatule).*$",
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
"^(Eksliibris Nikolai Birukovile).*$",
"^(Eksliibris Nikolai Issakovile).*$",
"^(WHP Cruise Summary Information of section).*$",
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*",
"(?i)^.*authors[']? reply\\.?$",
"(?i)^.*authors[']? response\\.?$",
"^Data [mM]anagement [sS]ervices\\.$",
"Research and Advanced Technology for Digital Libraries"
]
},
"synonyms": {}
}
}

View File

@ -6,10 +6,10 @@
"subEntityType": "resulttype",
"subEntityValue": "publication",
"orderField": "title",
"queueMaxSize": "2000",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "200",
"slidingWindowSize": "50",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
@ -28,10 +28,26 @@
"idPath": "$.id"
},
"pace": {
"clustering" : [
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
{ "name" : "lowercase", "fields" : [ "doi" ], "params" : { } }
"clustering": [
{
"name": "wordsStatsSuffixPrefixChain",
"fields": [
"title"
],
"params": {
"mod": "10"
}
},
{
"name": "lowercase",
"fields": [
"doi",
"altdoi"
],
"params": {
"collapseOn:pid": "0"
}
}
],
"decisionTree": {
"start": {
@ -43,18 +59,75 @@
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid"
"jpath_classid": "$.qualifier.classid",
"mode": "count"
}
}
],
"threshold": 0.5,
"aggregation": "AVG",
"threshold": 1.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "layer2",
"undefined": "layer2",
"negative": "instanceTypeCheck",
"undefined": "instanceTypeCheck",
"ignoreUndefined": "false"
},
"instanceTypeCheck": {
"fields": [
{
"field": "instance",
"comparator": "instanceTypeMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 0.5,
"aggregation": "MAX",
"positive": "pidVSaltid",
"negative": "NO_MATCH",
"undefined": "pidVSaltid",
"ignoreUndefined": "true"
},
"layer2": {
"pidVSaltid": {
"fields": [
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid",
"crossCompare": "alternateid",
"mode": "count"
}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "softCheck",
"negative": "earlyExits",
"undefined": "earlyExits",
"ignoreUndefined": "true"
},
"softCheck": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.9,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"earlyExits": {
"fields": [
{
"field": "title",
@ -73,12 +146,12 @@
],
"threshold": 1.0,
"aggregation": "AND",
"positive": "layer3",
"positive": "strongCheck",
"negative": "NO_MATCH",
"undefined": "layer3",
"undefined": "strongCheck",
"ignoreUndefined": "false"
},
"layer3": {
"strongCheck": {
"fields": [
{
"field": "title",
@ -90,28 +163,60 @@
],
"threshold": 0.99,
"aggregation": "AVG",
"positive": "MATCH",
"positive": "surnames",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"surnames": {
"fields": [
{
"field": "authors",
"comparator": "authorsMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"surname_th": 0.75,
"fullname_th": 0.75,
"mode": "full"
}
}
],
"threshold": 0.6,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "MATCH",
"ignoreUndefined": "true"
}
},
"model": [
{
"name": "doi",
"type": "String",
"path": "$.pid[?(@.qualifier.classid == 'doi')].value"
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "altdoi",
"type": "String",
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "pid",
"type": "JSON",
"path": "$.pid",
"path": "$.instance[*].pid[*]",
"overrideMatch": "true"
},
{
"name": "alternateid",
"type": "JSON",
"path": "$.instance[*].alternateIdentifier[*]",
"overrideMatch": "true"
},
{
"name": "title",
"type": "String",
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
"type": "StringConcat",
"path": "$.title[?(@.qualifier.classid == 'main title')].value|||$.title[?(@.qualifier.classid == 'subtitle')].value",
"length": 250,
"size": 5
},
@ -125,10 +230,16 @@
"name": "resulttype",
"type": "String",
"path": "$.resulttype.classid"
},
{
"name": "instance",
"type": "List",
"path": "$.instance[*].instancetype.classname"
}
],
"blacklists": {
"title": [
"(?i)^Data Management Plan",
"^Inside Front Cover$",
"(?i)^Poster presentations$",
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
@ -354,7 +465,16 @@
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*",
"(?i)^.*authors[']? reply\\.?$",
"(?i)^.*authors[']? response\\.?$"
"(?i)^.*authors[']? response\\.?$",
"^Data [mM]anagement [sS]ervices\\.$",
"Research and Advanced Technology for Digital Libraries",
"(?i)^risky business$",
"(?i)^great expectations\\.?$",
"(?i)^what's in a name\\?$",
"(?i)^decisions, decisions\\.?$",
"(?i)^update to our reader, reviewer, and author communities.*",
"(?i)^lest we forget$",
"(?i)^measure for measure$"
]
},
"synonyms": {}

View File

@ -0,0 +1,381 @@
{
"wf": {
"threshold": "0.99",
"dedupRun": "001",
"entityType": "result",
"subEntityType": "resulttype",
"subEntityValue": "publication",
"orderField": "title",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "100",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
"resultResult_publicationDataset_isRelatedTo",
"resultResult_similarity_isAmongTopNSimilarDocuments",
"resultResult_similarity_hasAmongTopNSimilarDocuments",
"resultOrganization_affiliation_isAffiliatedWith",
"resultResult_part_hasPart",
"resultResult_part_isPartOf",
"resultResult_supplement_isSupplementTo",
"resultResult_supplement_isSupplementedBy",
"resultResult_version_isVersionOf"
],
"includeChildren": "true",
"maxIterations": 20,
"idPath": "$.id"
},
"pace": {
"clustering" : [
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } },
{ "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid": "0"} }
],
"decisionTree": {
"start": {
"fields": [
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid",
"mode": "count"
}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "versionCheck",
"undefined": "versionCheck",
"ignoreUndefined": "true"
},
"versionCheck": {
"fields": [
{
"field": "title",
"comparator": "titleVersionMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1.0,
"aggregation": "MAX",
"positive": "titleCheck",
"negative": "NO_MATCH",
"undefined": "titleCheck",
"ignoreUndefined": "false"
},
"titleCheck": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.9,
"aggregation": "MAX",
"positive": "authorsCheck",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"authorsCheck": {
"fields": [
{
"field": "authors",
"comparator": "authorsMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 0.6,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "MATCH",
"ignoreUndefined": "false"
}
},
"model": [
{
"name": "doi",
"type": "String",
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "altdoi",
"type": "String",
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "pid",
"type": "JSON",
"path": "$.instance[*].pid[*]",
"overrideMatch": "true"
},
{
"name": "title",
"type": "String",
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
"length": 250,
"size": 5
},
{
"name": "authors",
"type": "List",
"path": "$.author[*].fullname",
"size": 200
},
{
"name": "resulttype",
"type": "String",
"path": "$.resulttype.classid"
}
],
"blacklists": {
"title": [
"(?i)^Data Management Plan",
"^Inside Front Cover$",
"(?i)^Poster presentations$",
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
"^Problems with perinatal pathology\\.?$",
"(?i)^Cases? of Puerperal Convulsions$",
"(?i)^Operative Gyna?ecology$",
"(?i)^Mind the gap\\!?\\:?$",
"^Chronic fatigue syndrome\\.?$",
"^Cartas? ao editor Letters? to the Editor$",
"^Note from the Editor$",
"^Anesthesia Abstract$",
"^Annual report$",
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
"(?i)^Graph and Table of Infectious Diseases?$",
"^Presentation$",
"(?i)^Reviews and Information on Publications$",
"(?i)^PUBLIC HEALTH SERVICES?$",
"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
"(?i)^Adrese autora$",
"(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
"(?i)^Acknowledgement to Referees$",
"(?i)^Behçet's disease\\.?$",
"(?i)^Isolation and identification of restriction endonuclease.*$",
"(?i)^CEREBROVASCULAR DISEASES?.?$",
"(?i)^Screening for abdominal aortic aneurysms?\\.?$",
"^Event management$",
"(?i)^Breakfast and Crohn's disease.*\\.?$",
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
"^Gushi hakubutsugaku$",
"^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
"^Intestinal spirocha?etosis$",
"^Treatment of Rodent Ulcer$",
"(?i)^\\W*Cloud Computing\\W*$",
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
"^Free Communications, Poster Presentations: Session [A-F]$",
"^“The Historical Aspects? of Quackery\\.?”$",
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
"(?i)^Case Report$",
"^Boletín Informativo$",
"(?i)^Glioblastoma Multiforme$",
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
"^Zaměstnanecké výhody$",
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
"(?i)^Carotid body tumours?\\.?$",
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
"^Avant-propos$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
"^Viñetas de Cortázar$",
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
"^Aus der AGMB$",
"^Znanstveno-stručni prilozi$",
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
"^Finanční analýza podniku$",
"^Financial analysis( of business)?$",
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
"^Jikken nihon shūshinsho$",
"(?i)^CORONER('|s)(s|') INQUESTS$",
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
"(?i)^Consultants' contract(s)?$",
"(?i)^Upute autorima$",
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
"^Joshi shin kokubun$",
"^Kōtō shōgaku dokuhon nōson'yō$",
"^Jinjō shōgaku shōka$",
"^Shōgaku shūjichō$",
"^Nihon joshi dokuhon$",
"^Joshi shin dokuhon$",
"^Chūtō kanbun dokuhon$",
"^Wabun dokuhon$",
"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
"(?i)^cardiac rehabilitation$",
"(?i)^Analytical summary$",
"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
"^Prikazi i osvrti$",
"^Rodinný dům s provozovnou$",
"^Family house with an establishment$",
"^Shinsei chūtō shin kokugun$",
"^Pulmonary alveolar proteinosis(\\.?)$",
"^Shinshū kanbun$",
"^Viñeta(s?) de Rodríguez$",
"(?i)^RUBRIKA UREDNIKA$",
"^A Matching Model of the Academic Publication Market$",
"^Yōgaku kōyō$",
"^Internetový marketing$",
"^Internet marketing$",
"^Chūtō kokugo dokuhon$",
"^Kokugo dokuhon$",
"^Antibiotic Cover for Dental Extraction(s?)$",
"^Strategie podniku$",
"^Strategy of an Enterprise$",
"(?i)^respiratory disease(s?)(\\.?)$",
"^Award(s?) for Gallantry in Civil Defence$",
"^Podniková kultura$",
"^Corporate Culture$",
"^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
"^Pracovní motivace$",
"^Work Motivation$",
"^Kaitei kōtō jogaku dokuhon$",
"^Konsolidovaná účetní závěrka$",
"^Consolidated Financial Statements$",
"(?i)^intracranial tumour(s?)$",
"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
"^Úroveň motivačního procesu jako způsobu vedení lidí$",
"^The level of motivation process as a leadership$",
"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
"(?i)^news and events$",
"(?i)^NOVOSTI I DOGAĐAJI$",
"^Sansū no gakushū$",
"^Posouzení informačního systému firmy a návrh změn$",
"^Information System Assessment and Proposal for ICT Modification$",
"^Stresové zatížení pracovníků ve vybrané profesi$",
"^Stress load in a specific job$",
"^Sunday: Poster Sessions, Pt.*$",
"^Monday: Poster Sessions, Pt.*$",
"^Wednesday: Poster Sessions, Pt.*",
"^Tuesday: Poster Sessions, Pt.*$",
"^Analýza reklamy$",
"^Analysis of advertising$",
"^Shōgaku shūshinsho$",
"^Shōgaku sansū$",
"^Shintei joshi kokubun$",
"^Taishō joshi kokubun dokuhon$",
"^Joshi kokubun$",
"^Účetní uzávěrka a účetní závěrka v ČR$",
"(?i)^The \"?Causes\"? of Cancer$",
"^Normas para la publicación de artículos$",
"^Editor('|s)(s|') [Rr]eply$",
"^Editor(|s)(s|) letter$",
"^Redaktoriaus žodis$",
"^DISCUSSION ON THE PRECEDING PAPER$",
"^Kōtō shōgaku shūshinsho jidōyō$",
"^Shōgaku nihon rekishi$",
"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
"^Préface$",
"^Occupational [Hh]ealth [Ss]ervices.$",
"^In Memoriam Professor Toshiyuki TAKESHIMA$",
"^Účetní závěrka ve vybraném podniku.*$",
"^Financial statements in selected company$",
"^Abdominal [Aa]ortic [Aa]neurysms.*$",
"^Pseudomyxoma peritonei$",
"^Kazalo autora$",
"(?i)^uvodna riječ$",
"^Motivace jako způsob vedení lidí$",
"^Motivation as a leadership$",
"^Polyfunkční dům$",
"^Multi\\-funkcional building$",
"^Podnikatelský plán$",
"(?i)^Podnikatelský záměr$",
"(?i)^Business Plan$",
"^Oceňování nemovitostí$",
"^Marketingová komunikace$",
"^Marketing communication$",
"^Sumario Analítico$",
"^Riječ uredništva$",
"^Savjetovanja i priredbe$",
"^Índice$",
"^(Starobosanski nadpisi).*$",
"^Vzdělávání pracovníků v organizaci$",
"^Staff training in organization$",
"^(Life Histories of North American Geometridae).*$",
"^Strategická analýza podniku$",
"^Strategic Analysis of an Enterprise$",
"^Sadržaj$",
"^Upute suradnicima$",
"^Rodinný dům$",
"(?i)^Fami(l)?ly house$",
"^Upute autorima$",
"^Strategic Analysis$",
"^Finanční analýza vybraného podniku$",
"^Finanční analýza$",
"^Riječ urednika$",
"(?i)^Content(s?)$",
"(?i)^Inhalt$",
"^Jinjō shōgaku shūshinsho jidōyō$",
"(?i)^Index$",
"^Chūgaku kokubun kyōkasho$",
"^Retrato de una mujer$",
"^Retrato de un hombre$",
"^Kōtō shōgaku dokuhon$",
"^Shotōka kokugo$",
"^Shōgaku dokuhon$",
"^Jinjō shōgaku kokugo dokuhon$",
"^Shinsei kokugo dokuhon$",
"^Teikoku dokuhon$",
"^Instructions to Authors$",
"^KİTAP TAHLİLİ$",
"^PRZEGLĄD PIŚMIENNICTWA$",
"(?i)^Presentación$",
"^İçindekiler$",
"(?i)^Tabl?e of contents$",
"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
"^Editorial( Board)?$",
"(?i)^Editorial \\(English\\)$",
"^Editörden$",
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
"^(Kiri Karl Morgensternile).*$",
"^(\\[Eksliibris Aleksandr).*\\]$",
"^(\\[Eksliibris Aleksandr).*$",
"^(Eksliibris Aleksandr).*$",
"^(Kiri A\\. de Vignolles).*$",
"^(2 kirja Karl Morgensternile).*$",
"^(Pirita kloostri idaosa arheoloogilised).*$",
"^(Kiri tundmatule).*$",
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
"^(Eksliibris Nikolai Birukovile).*$",
"^(Eksliibris Nikolai Issakovile).*$",
"^(WHP Cruise Summary Information of section).*$",
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*",
"(?i)^.*authors[']? reply\\.?$",
"(?i)^.*authors[']? response\\.?$",
"^Data [mM]anagement [sS]ervices\\.$",
"Research and Advanced Technology for Digital Libraries",
"Food and Nutrition"
]
},
"synonyms": {}
}
}

View File

@ -0,0 +1,150 @@
{
"wf" : {
"threshold" : "0.99",
"dedupRun" : "001",
"entityType" : "result",
"subEntityType" : "resulttype",
"subEntityValue" : "software",
"orderField" : "title",
"queueMaxSize" : "200",
"groupMaxSize" : "100",
"maxChildren" : "100",
"slidingWindowSize" : "50",
"rootBuilder" : [ "result", "resultProject_outcome_isProducedBy", "resultResult_publicationDataset_isRelatedTo", "resultResult_similarity_isAmongTopNSimilarDocuments", "resultResult_similarity_hasAmongTopNSimilarDocuments", "resultOrganization_affiliation_hasAuthorInstitution", "resultResult_part_hasPart", "resultResult_part_isPartOf", "resultResult_supplement_isSupplementTo", "resultResult_supplement_isSupplementedBy", "resultResult_version_isVersionOf" ],
"includeChildren" : "true"
},
"pace" : {
"clustering" : [
{ "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
{ "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid":"0"} },
{ "name" : "ngrams", "fields" : [ "title" ], "params" : {"ngramLen": 3, "max": 4, "maxPerToken":1, "minNgramLen":3}},
{ "name" : "urlclustering", "fields": [ "url" ], "params" : {}}
],
"decisionTree": {
"start": {
"fields": [
{
"field": "doi",
"comparator": "exactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1,
"aggregation": "MAX",
"positive": "MATCH",
"negative": "titleCheck",
"undefined": "titleCheck",
"ignoreUndefined": "false"
},
"titleCheck": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitleIgnoreVersion",
"weight": 1,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 0.95,
"aggregation": "AVG",
"positive": "pidCheck",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "false"
},
"pidCheck": {
"fields": [
{
"field": "altdoi",
"comparator": "exactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {}
},
{
"field": "doi",
"comparator": "exactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {"crossCompare": "altdoi"}
},
{
"field": "url",
"comparator": "exactMatch",
"weight": 1,
"countIfUndefined": "false",
"params": {}
}
],
"threshold": 1,
"aggregation": "OR",
"positive": "MATCH",
"negative": "authorsCheck",
"undefined": "authorsCheck",
"ignoreUndefined": "false"
},
"authorsCheck": {
"fields": [
{
"field": "authors",
"comparator": "authorsMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"surname_th": 0.70,
"fullname_th": 0.70,
"size_th": 20,
"mode": "surname"
}
}
],
"threshold": 1,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "MATCH",
"ignoreUndefined": "false"
}
},
"model" : [
{
"name" : "doi",
"type" : "String",
"path" : "$.instance.pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name" : "altdoi",
"type" : "String",
"path" : "$.instance.alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
},
{
"name" : "title",
"type" : "String",
"path" : "$.title[?(@.qualifier.classid == 'main title')].value",
"length" : 250,
"size" : 5
},
{
"name" : "url",
"type" : "String",
"path" : "$.instance.url"
},
{
"name" : "resulttype",
"type" : "String",
"path" : "$.resulttype.classid"
},
{
"name": "authors",
"type": "List",
"path": "$.author[*].fullname",
"size": 200
}
],
"blacklists" : {},
"synonyms": {}
}
}

View File

@ -0,0 +1,403 @@
{
"wf": {
"threshold": "0.99",
"dedupRun": "001",
"entityType": "result",
"subEntityType": "resulttype",
"subEntityValue": "publication",
"orderField": "title",
"queueMaxSize": "200",
"groupMaxSize": "100",
"maxChildren": "100",
"slidingWindowSize": "50",
"rootBuilder": [
"result",
"resultProject_outcome_isProducedBy",
"resultResult_publicationDataset_isRelatedTo",
"resultResult_similarity_isAmongTopNSimilarDocuments",
"resultResult_similarity_hasAmongTopNSimilarDocuments",
"resultOrganization_affiliation_isAffiliatedWith",
"resultResult_part_hasPart",
"resultResult_part_isPartOf",
"resultResult_supplement_isSupplementTo",
"resultResult_supplement_isSupplementedBy",
"resultResult_version_isVersionOf"
],
"includeChildren": "true",
"maxIterations": 20,
"idPath": "$.id"
},
"pace": {
"clustering" : [
{ "name" : "wordsStatsSuffixPrefixChain", "fields" : [ "title" ], "params" : { "mod" : "10" } },
{ "name" : "lowercase", "fields" : [ "doi", "altdoi" ], "params" : { "collapseOn:pid": "0"} }
],
"decisionTree": {
"start": {
"fields": [
{
"field": "title",
"comparator": "titleVersionMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {}
},
{
"field": "authors",
"comparator": "sizeMatch",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 1.0,
"aggregation": "AND",
"positive": "pid_check",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "false"
},
"pid_check": {
"fields": [
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid"
}
},
{
"field": "pid",
"comparator": "jsonListMatch",
"weight": 1.0,
"countIfUndefined": "false",
"params": {
"jpath_value": "$.value",
"jpath_classid": "$.qualifier.classid",
"crossCompare": "alternateid"
}
}
],
"threshold": 0.5,
"aggregation": "MAX",
"positive": "title_check_low_th",
"negative": "title_check_high_th",
"undefined": "title_check_high_th",
"ignoreUndefined": "true"
},
"title_check_low_th": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.9,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
},
"title_check_high_th": {
"fields": [
{
"field": "title",
"comparator": "levensteinTitle",
"weight": 1.0,
"countIfUndefined": "true",
"params": {}
}
],
"threshold": 0.99,
"aggregation": "AVG",
"positive": "MATCH",
"negative": "NO_MATCH",
"undefined": "NO_MATCH",
"ignoreUndefined": "true"
}
},
"model": [
{
"name": "doi",
"type": "String",
"path": "$.instance[*].pid[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "altdoi",
"type": "String",
"path": "$.instance[*].alternateIdentifier[?(@.qualifier.classid == 'doi')].value"
},
{
"name": "pid",
"type": "JSON",
"path": "$.instance[*].pid[*]",
"overrideMatch": "true"
},
{
"name": "alternateid",
"type": "JSON",
"path": "$.instance[*].alternateIdentifier[*]",
"overrideMatch": "true"
},
{
"name": "title",
"type": "String",
"path": "$.title[?(@.qualifier.classid == 'main title')].value",
"length": 250,
"size": 5
},
{
"name": "authors",
"type": "List",
"path": "$.author[*].fullname",
"size": 200
},
{
"name": "resulttype",
"type": "String",
"path": "$.resulttype.classid"
}
],
"blacklists": {
"title": [
"(?i)^Data Management Plan",
"^Inside Front Cover$",
"(?i)^Poster presentations$",
"^THE ASSOCIATION AND THE GENERAL MEDICAL COUNCIL$",
"^Problems with perinatal pathology\\.?$",
"(?i)^Cases? of Puerperal Convulsions$",
"(?i)^Operative Gyna?ecology$",
"(?i)^Mind the gap\\!?\\:?$",
"^Chronic fatigue syndrome\\.?$",
"^Cartas? ao editor Letters? to the Editor$",
"^Note from the Editor$",
"^Anesthesia Abstract$",
"^Annual report$",
"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\\.?”?$",
"(?i)^Graph and Table of Infectious Diseases?$",
"^Presentation$",
"(?i)^Reviews and Information on Publications$",
"(?i)^PUBLIC HEALTH SERVICES?$",
"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
"(?i)^Adrese autora$",
"(?i)^Systematic Part .*\\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
"(?i)^Acknowledgement to Referees$",
"(?i)^Behçet's disease\\.?$",
"(?i)^Isolation and identification of restriction endonuclease.*$",
"(?i)^CEREBROVASCULAR DISEASES?.?$",
"(?i)^Screening for abdominal aortic aneurysms?\\.?$",
"^Event management$",
"(?i)^Breakfast and Crohn's disease.*\\.?$",
"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\\..*\\.$",
"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\\.?$",
"^Gushi hakubutsugaku$",
"^Starobosanski nadpisi u Bosni i Hercegovini \\(.*\\)$",
"^Intestinal spirocha?etosis$",
"^Treatment of Rodent Ulcer$",
"(?i)^\\W*Cloud Computing\\W*$",
"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",
"^Free Communications, Poster Presentations: Session [A-F]$",
"^“The Historical Aspects? of Quackery\\.?”$",
"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
"(?i)^Case Report$",
"^Boletín Informativo$",
"(?i)^Glioblastoma Multiforme$",
"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
"^Zaměstnanecké výhody$",
"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
"(?i)^Carotid body tumours?\\.?$",
"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
"^Avant-propos$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
"(?i)^St\\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
"(?i)^PUBLIC HEALTH VERSUS THE STATE$",
"^Viñetas de Cortázar$",
"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\\.)?$",
"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\\.?)$",
"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
"^Aus der AGMB$",
"^Znanstveno-stručni prilozi$",
"(?i)^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
"(?i)^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
"(?i)^Hodnocení finanční situace podniku a návrhy na její zlepšení$",
"^Finanční analýza podniku$",
"^Financial analysis( of business)?$",
"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
"^Jikken nihon shūshinsho$",
"(?i)^CORONER('|s)(s|') INQUESTS$",
"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",
"(?i)^Consultants' contract(s)?$",
"(?i)^Upute autorima$",
"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
"^Joshi shin kokubun$",
"^Kōtō shōgaku dokuhon nōson'yō$",
"^Jinjō shōgaku shōka$",
"^Shōgaku shūjichō$",
"^Nihon joshi dokuhon$",
"^Joshi shin dokuhon$",
"^Chūtō kanbun dokuhon$",
"^Wabun dokuhon$",
"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
"(?i)^cardiac rehabilitation$",
"(?i)^Analytical summary$",
"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$",
"^Prikazi i osvrti$",
"^Rodinný dům s provozovnou$",
"^Family house with an establishment$",
"^Shinsei chūtō shin kokugun$",
"^Pulmonary alveolar proteinosis(\\.?)$",
"^Shinshū kanbun$",
"^Viñeta(s?) de Rodríguez$",
"(?i)^RUBRIKA UREDNIKA$",
"^A Matching Model of the Academic Publication Market$",
"^Yōgaku kōyō$",
"^Internetový marketing$",
"^Internet marketing$",
"^Chūtō kokugo dokuhon$",
"^Kokugo dokuhon$",
"^Antibiotic Cover for Dental Extraction(s?)$",
"^Strategie podniku$",
"^Strategy of an Enterprise$",
"(?i)^respiratory disease(s?)(\\.?)$",
"^Award(s?) for Gallantry in Civil Defence$",
"^Podniková kultura$",
"^Corporate Culture$",
"^Severe hyponatraemia in hospital inpatient(s?)(\\.?)$",
"^Pracovní motivace$",
"^Work Motivation$",
"^Kaitei kōtō jogaku dokuhon$",
"^Konsolidovaná účetní závěrka$",
"^Consolidated Financial Statements$",
"(?i)^intracranial tumour(s?)$",
"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
"^Úroveň motivačního procesu jako způsobu vedení lidí$",
"^The level of motivation process as a leadership$",
"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
"(?i)^news and events$",
"(?i)^NOVOSTI I DOGAĐAJI$",
"^Sansū no gakushū$",
"^Posouzení informačního systému firmy a návrh změn$",
"^Information System Assessment and Proposal for ICT Modification$",
"^Stresové zatížení pracovníků ve vybrané profesi$",
"^Stress load in a specific job$",
"^Sunday: Poster Sessions, Pt.*$",
"^Monday: Poster Sessions, Pt.*$",
"^Wednesday: Poster Sessions, Pt.*",
"^Tuesday: Poster Sessions, Pt.*$",
"^Analýza reklamy$",
"^Analysis of advertising$",
"^Shōgaku shūshinsho$",
"^Shōgaku sansū$",
"^Shintei joshi kokubun$",
"^Taishō joshi kokubun dokuhon$",
"^Joshi kokubun$",
"^Účetní uzávěrka a účetní závěrka v ČR$",
"(?i)^The \"?Causes\"? of Cancer$",
"^Normas para la publicación de artículos$",
"^Editor('|s)(s|') [Rr]eply$",
"^Editor(|s)(s|) letter$",
"^Redaktoriaus žodis$",
"^DISCUSSION ON THE PRECEDING PAPER$",
"^Kōtō shōgaku shūshinsho jidōyō$",
"^Shōgaku nihon rekishi$",
"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
"^Préface$",
"^Occupational [Hh]ealth [Ss]ervices.$",
"^In Memoriam Professor Toshiyuki TAKESHIMA$",
"^Účetní závěrka ve vybraném podniku.*$",
"^Financial statements in selected company$",
"^Abdominal [Aa]ortic [Aa]neurysms.*$",
"^Pseudomyxoma peritonei$",
"^Kazalo autora$",
"(?i)^uvodna riječ$",
"^Motivace jako způsob vedení lidí$",
"^Motivation as a leadership$",
"^Polyfunkční dům$",
"^Multi\\-funkcional building$",
"^Podnikatelský plán$",
"(?i)^Podnikatelský záměr$",
"(?i)^Business Plan$",
"^Oceňování nemovitostí$",
"^Marketingová komunikace$",
"^Marketing communication$",
"^Sumario Analítico$",
"^Riječ uredništva$",
"^Savjetovanja i priredbe$",
"^Índice$",
"^(Starobosanski nadpisi).*$",
"^Vzdělávání pracovníků v organizaci$",
"^Staff training in organization$",
"^(Life Histories of North American Geometridae).*$",
"^Strategická analýza podniku$",
"^Strategic Analysis of an Enterprise$",
"^Sadržaj$",
"^Upute suradnicima$",
"^Rodinný dům$",
"(?i)^Fami(l)?ly house$",
"^Upute autorima$",
"^Strategic Analysis$",
"^Finanční analýza vybraného podniku$",
"^Finanční analýza$",
"^Riječ urednika$",
"(?i)^Content(s?)$",
"(?i)^Inhalt$",
"^Jinjō shōgaku shūshinsho jidōyō$",
"(?i)^Index$",
"^Chūgaku kokubun kyōkasho$",
"^Retrato de una mujer$",
"^Retrato de un hombre$",
"^Kōtō shōgaku dokuhon$",
"^Shotōka kokugo$",
"^Shōgaku dokuhon$",
"^Jinjō shōgaku kokugo dokuhon$",
"^Shinsei kokugo dokuhon$",
"^Teikoku dokuhon$",
"^Instructions to Authors$",
"^KİTAP TAHLİLİ$",
"^PRZEGLĄD PIŚMIENNICTWA$",
"(?i)^Presentación$",
"^İçindekiler$",
"(?i)^Tabl?e of contents$",
"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
"^Editorial( Board)?$",
"(?i)^Editorial \\(English\\)$",
"^Editörden$",
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
"^(Kiri Karl Morgensternile).*$",
"^(\\[Eksliibris Aleksandr).*\\]$",
"^(\\[Eksliibris Aleksandr).*$",
"^(Eksliibris Aleksandr).*$",
"^(Kiri A\\. de Vignolles).*$",
"^(2 kirja Karl Morgensternile).*$",
"^(Pirita kloostri idaosa arheoloogilised).*$",
"^(Kiri tundmatule).*$",
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
"^(Eksliibris Nikolai Birukovile).*$",
"^(Eksliibris Nikolai Issakovile).*$",
"^(WHP Cruise Summary Information of section).*$",
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
"^(Measurement of the spin\\-dependent structure function).*",
"(?i)^.*authors[']? reply\\.?$",
"(?i)^.*authors[']? response\\.?$",
"^Data [mM]anagement [sS]ervices\\.$",
"Research and Advanced Technology for Digital Libraries"
]
},
"synonyms": {}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,4 @@
{"websiteurl": "https://fairsharing.org", "englishname": "FAIRsharing", "officialname": "FAIRsharing", "id": "eosc________::oxford_e-research_centre::oxford_e-research_centre.fairsharing"}
{"websiteurl": "https://FAIRsharing.org", "englishname": "FAIRsharing", "officialname": "FAIRsharing", "id": "fairsharing_::2521"}
{"websiteurl": "https://fairsharing.org/", "englishname": "formerly: biosharing", "officialname": "FAIRsharing", "id": "re3data_____::r3d100010142"}
{"websiteurl": "https://fairsharing.org/", "englishname": "FAIRsharing", "officialname": "FAIRsharing", "id": "openaire____::fairsharing"}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,32 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "dedup configuration to be used",
"paramRequired": false
},
{
"paramName": "gt",
"paramLongName": "groundTruthFieldJPath",
"paramDescription": "field to be used as groundtruth",
"paramRequired": false
}
]

View File

@ -0,0 +1,26 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "path of the dedup configuration",
"paramRequired": false
}
]

View File

@ -0,0 +1,26 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "path of the dedup configuration",
"paramRequired": false
}
]

View File

@ -0,0 +1,32 @@
[
{
"paramName": "e",
"paramLongName": "entitiesPath",
"paramDescription": "the input entities",
"paramRequired": true
},
{
"paramName": "w",
"paramLongName": "workingPath",
"paramDescription": "path of the working directory",
"paramRequired": true
},
{
"paramName": "np",
"paramLongName": "numPartitions",
"paramDescription": "number of partitions for the similarity relations intermediate phases",
"paramRequired": false
},
{
"paramName": "dc",
"paramLongName": "dedupConfPath",
"paramDescription": "path of the dedup configuration",
"paramRequired": true
},
{
"paramName": "ut",
"paramLongName": "useTree",
"paramDescription": "chose the tree configuration or not",
"paramRequired": true
}
]

View File

@ -0,0 +1,70 @@
<html>
<head>
<script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
<style type="text/css">
#mynetwork {
width: 1000px;
height: 700px;
border: 1px solid lightgray;
}
th, td {
font-size: 10px;
}
</style>
</head>
<body>
<div id="mynetwork"></div>
<script type="text/javascript">
// HTML parsing with all XSS goodness
function htmlTitle(html) {
const container = document.createElement("div");
container.innerHTML = html;
return container;
}
var nodesArray = %nodes%;
var edgesArray = %edges%;
for (var i = 0; i < nodesArray.length; i++) {
nodesArray[i].title = htmlTitle(nodesArray[i].title);
};
// create an array with nodes
var nodes = new vis.DataSet(nodesArray);
// create an array with edges
var edges = new vis.DataSet(edgesArray);
// HTML parsing with all XSS goodness
function htmlTitle(html) {
const container = document.createElement("div");
container.innerHTML = html;
return container;
}
// create a network
var container = document.getElementById('mynetwork');
// provide the data in the vis format
var data = {
nodes: nodes,
edges: edges
};
var options = {
physics:{enabled: false},
edges:{physics:false},
nodes:{font:{size:10}},
layout: {improvedLayout:true}
};
// initialize your network!
var network = new vis.Network(container, data, options);
</script>
</body>
</html>

113
dnet-dedup.ipr Normal file
View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project version="4" relativePaths="false">
<component name="ProjectRootManager" version="2" assert-keyword="true" project-jdk-name="1.8" jdk-15="true"/>
<component name="CodeStyleManager">
<option name="USE_DEFAULT_CODE_STYLE_SCHEME" value="true"/>
<option name="CODE_STYLE_SCHEME" value=""/>
</component>
<component name="libraryTable"/>
<component name="CompilerConfiguration">
<option name="DEFAULT_COMPILER" value="Javac"/>
<option name="CLEAR_OUTPUT_DIRECTORY" value="false"/>
<!--
<wildcardResourcePatterns>
<entry name="${wildcardResourcePattern}"/>
</wildcardResourcePatterns>
-->
<wildcardResourcePatterns>
<entry name="!?*.java"/>
</wildcardResourcePatterns>
</component>
<component name="JavacSettings">
<option name="DEBUGGING_INFO" value="true"/>
<option name="GENERATE_NO_WARNINGS" value="false"/>
<option name="DEPRECATION" value="true"/>
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
<option name="USE_GENERICS_COMPILER" value="false"/>
</component>
<component name="JikesSettings">
<option name="DEBUGGING_INFO" value="true"/>
<option name="DEPRECATION" value="true"/>
<option name="GENERATE_NO_WARNINGS" value="false"/>
<option name="GENERATE_MAKE_FILE_DEPENDENCIES" value="false"/>
<option name="DO_FULL_DEPENDENCE_CHECK" value="false"/>
<option name="IS_INCREMENTAL_MODE" value="false"/>
<option name="IS_EMACS_ERRORS_MODE" value="true"/>
<option name="ADDITIONAL_OPTIONS_STRING" value=""/>
<option name="MAXIMUM_HEAP_SIZE" value="128"/>
</component>
<component name="AntConfiguration">
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="FILTER_TARGETS" value="false"/>
</component>
<component name="JavadocGenerationManager">
<option name="OUTPUT_DIRECTORY"/>
<option name="OPTION_SCOPE" value="protected"/>
<option name="OPTION_HIERARCHY" value="false"/>
<option name="OPTION_NAVIGATOR" value="false"/>
<option name="OPTION_INDEX" value="false"/>
<option name="OPTION_SEPARATE_INDEX" value="false"/>
<option name="OPTION_USE_1_1" value="false"/>
<option name="OPTION_DOCUMENT_TAG_USE" value="false"/>
<option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false"/>
<option name="OPTION_DOCUMENT_TAG_VERSION" value="false"/>
<option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="false"/>
<option name="OPTION_DEPRECATED_LIST" value="false"/>
<option name="OTHER_OPTIONS"/>
<option name="HEAP_SIZE"/>
<option name="OPEN_IN_BROWSER" value="false"/>
</component>
<component name="JUnitProjectSettings">
<option name="TEST_RUNNER" value="UI"/>
</component>
<component name="EntryPointsManager">
<entry_points/>
</component>
<component name="DataSourceManager"/>
<component name="ExportToHTMLSettings">
<option name="PRINT_LINE_NUMBERS" value="false"/>
<option name="OPEN_IN_BROWSER" value="false"/>
<option name="OUTPUT_DIRECTORY"/>
</component>
<component name="ImportConfiguration">
<option name="VENDOR"/>
<option name="RELEASE_TAG"/>
<option name="LOG_MESSAGE"/>
<option name="CHECKOUT_AFTER_IMPORT" value="true"/>
</component>
<component name="ProjectModuleManager">
<modules>
<!-- module filepath="$$PROJECT_DIR$$/${pom.artifactId}.iml"/ -->
<module filepath="$PROJECT_DIR$/dnet-dedup.iml"/>
<module filepath="$PROJECT_DIR$/dnet-pace-core/dnet-pace-core.iml"/>
<module filepath="$PROJECT_DIR$/dnet-dedup-test/dnet-dedup-test.iml"/>
<module filepath="$PROJECT_DIR$/dhp-build/dhp-code-style/dhp-code-style.iml"/>
<module filepath="$PROJECT_DIR$/dhp-build/dhp-build-assembly-resources/dhp-build-assembly-resources.iml"/>
<module filepath="$PROJECT_DIR$/dhp-build/dhp-build-properties-maven-plugin/dhp-build-properties-maven-plugin.iml"/>
<module filepath="$PROJECT_DIR$/dhp-build/dhp-build.iml"/>
</modules>
</component>
<UsedPathMacros>
<!--<macro name="cargo"></macro>-->
</UsedPathMacros>
</project>

418
dnet-dedup.iws Normal file
View File

@ -0,0 +1,418 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project version="4" relativePaths="false">
<component name="LvcsProjectConfiguration">
<option name="ADD_LABEL_ON_PROJECT_OPEN" value="true"/>
<option name="ADD_LABEL_ON_PROJECT_COMPILATION" value="true"/>
<option name="ADD_LABEL_ON_FILE_PACKAGE_COMPILATION" value="true"/>
<option name="ADD_LABEL_ON_PROJECT_MAKE" value="true"/>
<option name="ADD_LABEL_ON_RUNNING" value="true"/>
<option name="ADD_LABEL_ON_DEBUGGING" value="true"/>
<option name="ADD_LABEL_ON_UNIT_TEST_PASSED" value="true"/>
<option name="ADD_LABEL_ON_UNIT_TEST_FAILED" value="true"/>
</component>
<component name="PropertiesComponent">
<property name="MemberChooser.copyJavadoc" value="false"/>
<property name="GoToClass.includeLibraries" value="false"/>
<property name="MemberChooser.showClasses" value="true"/>
<property name="MemberChooser.sorted" value="false"/>
<property name="GoToFile.includeJavaFiles" value="false"/>
<property name="GoToClass.toSaveIncludeLibraries" value="false"/>
</component>
<component name="ToolWindowManager">
<frame x="-4" y="-4" width="1032" height="746" extended-state="6"/>
<editor active="false"/>
<layout>
<window_info id="CVS" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="7"/>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="0"/>
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="1"/>
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="6"/>
<window_info id="Aspects" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="-1"/>
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="1"/>
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="2"/>
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.4" order="4"/>
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="sliding" type="sliding" visible="false" weight="0.4" order="0"/>
<window_info id="Web" active="false" anchor="left" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="2"/>
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.33" order="0"/>
<window_info id="EJB" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="3"/>
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="docked" type="docked" visible="false" weight="0.25" order="5"/>
</layout>
</component>
<component name="ErrorTreeViewConfiguration">
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="HIDE_WARNINGS" value="false"/>
</component>
<component name="StructureViewFactory">
<option name="SORT_MODE" value="0"/>
<option name="GROUP_INHERITED" value="true"/>
<option name="AUTOSCROLL_MODE" value="true"/>
<option name="SHOW_FIELDS" value="true"/>
<option name="AUTOSCROLL_FROM_SOURCE" value="false"/>
<option name="GROUP_GETTERS_AND_SETTERS" value="true"/>
<option name="SHOW_INHERITED" value="false"/>
<option name="HIDE_NOT_PUBLIC" value="false"/>
</component>
<component name="ProjectViewSettings">
<navigator currentView="ProjectPane" flattenPackages="false" showMembers="false" showStructure="false" autoscrollToSource="false" splitterProportion="0.5"/>
<view id="ProjectPane">
<expanded_node type="directory" url="file://$PROJECT_DIR$"/>
</view>
<view id="SourcepathPane"/>
<view id="ClasspathPane"/>
</component>
<component name="Commander">
<leftPanel view="Project"/>
<rightPanel view="Project"/>
<splitter proportion="0.5"/>
</component>
<component name="AspectsView"/>
<component name="SelectInManager"/>
<component name="HierarchyBrowserManager">
<option name="SHOW_PACKAGES" value="false"/>
<option name="IS_AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="SORT_ALPHABETICALLY" value="false"/>
</component>
<component name="TodoView" selected-index="0">
<todo-panel id="selected-file">
<are-packages-shown value="false"/>
<flatten-packages value="false"/>
<is-autoscroll-to-source value="true"/>
</todo-panel>
<todo-panel id="all">
<are-packages-shown value="true"/>
<flatten-packages value="false"/>
<is-autoscroll-to-source value="true"/>
</todo-panel>
</component>
<component name="editorManager"/>
<component name="editorHistoryManager"/>
<component name="DaemonCodeAnalyzer">
<disable_hints/>
</component>
<component name="InspectionManager">
<option name="AUTOSCROLL_TO_SOURCE" value="false"/>
<option name="SPLITTER_PROPORTION" value="0.5"/>
<profile name="Default"/>
</component>
<component name="BookmarkManager"/>
<component name="DebuggerManager">
<line_breakpoints/>
<exception_breakpoints>
<breakpoint_any>
<option name="NOTIFY_CAUGHT" value="true"/>
<option name="NOTIFY_UNCAUGHT" value="true"/>
<option name="ENABLED" value="false"/>
<option name="SUSPEND_VM" value="true"/>
<option name="COUNT_FILTER_ENABLED" value="false"/>
<option name="COUNT_FILTER" value="0"/>
<option name="CONDITION_ENABLED" value="false"/>
<option name="CONDITION"/>
<option name="LOG_ENABLED" value="false"/>
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
<option name="LOG_MESSAGE"/>
<option name="CLASS_FILTERS_ENABLED" value="false"/>
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
<option name="SUSPEND_POLICY" value="SuspendAll"/>
</breakpoint_any>
</exception_breakpoints>
<field_breakpoints/>
<method_breakpoints/>
</component>
<component name="DebuggerSettings">
<option name="TRACING_FILTERS_ENABLED" value="true"/>
<option name="TOSTRING_CLASSES_ENABLED" value="false"/>
<option name="VALUE_LOOKUP_DELAY" value="700"/>
<option name="DEBUGGER_TRANSPORT" value="0"/>
<option name="FORCE_CLASSIC_VM" value="true"/>
<option name="HIDE_DEBUGGER_ON_PROCESS_TERMINATION" value="false"/>
<option name="SKIP_SYNTHETIC_METHODS" value="true"/>
<option name="SKIP_CONSTRUCTORS" value="false"/>
<option name="STEP_THREAD_SUSPEND_POLICY" value="SuspendThread"/>
<default_breakpoint_settings>
<option name="NOTIFY_CAUGHT" value="true"/>
<option name="NOTIFY_UNCAUGHT" value="true"/>
<option name="WATCH_MODIFICATION" value="true"/>
<option name="WATCH_ACCESS" value="true"/>
<option name="WATCH_ENTRY" value="true"/>
<option name="WATCH_EXIT" value="true"/>
<option name="ENABLED" value="true"/>
<option name="SUSPEND_VM" value="true"/>
<option name="COUNT_FILTER_ENABLED" value="false"/>
<option name="COUNT_FILTER" value="0"/>
<option name="CONDITION_ENABLED" value="false"/>
<option name="CONDITION"/>
<option name="LOG_ENABLED" value="false"/>
<option name="LOG_EXPRESSION_ENABLED" value="false"/>
<option name="LOG_MESSAGE"/>
<option name="CLASS_FILTERS_ENABLED" value="false"/>
<option name="INVERSE_CLASS_FILLTERS" value="false"/>
<option name="SUSPEND_POLICY" value="SuspendAll"/>
</default_breakpoint_settings>
<filter>
<option name="PATTERN" value="com.sun.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="java.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="javax.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="org.omg.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="sun.*"/>
<option name="ENABLED" value="true"/>
</filter>
<filter>
<option name="PATTERN" value="junit.*"/>
<option name="ENABLED" value="true"/>
</filter>
</component>
<component name="CompilerWorkspaceConfiguration">
<option name="COMPILE_IN_BACKGROUND" value="false"/>
<option name="AUTO_SHOW_ERRORS_IN_EDITOR" value="true"/>
</component>
<component name="RunManager">
<activeType name="Application"/>
<configuration selected="false" default="true" type="Applet" factoryName="Applet">
<module name=""/>
<option name="MAIN_CLASS_NAME"/>
<option name="HTML_FILE_NAME"/>
<option name="HTML_USED" value="false"/>
<option name="WIDTH" value="400"/>
<option name="HEIGHT" value="300"/>
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy"/>
<option name="VM_PARAMETERS"/>
</configuration>
<configuration selected="false" default="true" type="Remote" factoryName="Remote">
<option name="USE_SOCKET_TRANSPORT" value="true"/>
<option name="SERVER_MODE" value="false"/>
<option name="SHMEM_ADDRESS" value="javadebug"/>
<option name="HOST" value="localhost"/>
<option name="PORT" value="5005"/>
</configuration>
<configuration selected="false" default="true" type="Application" factoryName="Application">
<option name="MAIN_CLASS_NAME"/>
<option name="VM_PARAMETERS"/>
<option name="PROGRAM_PARAMETERS"/>
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
<module name=""/>
</configuration>
<configuration selected="false" default="true" type="JUnit" factoryName="JUnit">
<module name=""/>
<option name="PACKAGE_NAME"/>
<option name="MAIN_CLASS_NAME"/>
<option name="METHOD_NAME"/>
<option name="TEST_OBJECT" value="class"/>
<option name="VM_PARAMETERS"/>
<option name="PARAMETERS"/>
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$"/>
<option name="ADDITIONAL_CLASS_PATH"/>
<option name="TEST_SEARCH_SCOPE">
<value defaultName="wholeProject"/>
</option>
</configuration>
</component>
<component name="VcsManagerConfiguration">
<option name="ACTIVE_VCS_NAME" value="git"/>
<option name="STATE" value="0"/>
</component>
<component name="VssConfiguration">
<CheckoutOptions>
<option name="COMMENT" value=""/>
<option name="DO_NOT_GET_LATEST_VERSION" value="false"/>
<option name="REPLACE_WRITABLE" value="false"/>
<option name="RECURSIVE" value="false"/>
</CheckoutOptions>
<CheckinOptions>
<option name="COMMENT" value=""/>
<option name="KEEP_CHECKED_OUT" value="false"/>
<option name="RECURSIVE" value="false"/>
</CheckinOptions>
<AddOptions>
<option name="COMMENT" value=""/>
<option name="STORE_ONLY_LATEST_VERSION" value="false"/>
<option name="CHECK_OUT_IMMEDIATELY" value="false"/>
<option name="FILE_TYPE" value="0"/>
</AddOptions>
<UndocheckoutOptions>
<option name="MAKE_WRITABLE" value="false"/>
<option name="REPLACE_LOCAL_COPY" value="0"/>
<option name="RECURSIVE" value="false"/>
</UndocheckoutOptions>
<DiffOptions>
<option name="IGNORE_WHITE_SPACE" value="false"/>
<option name="IGNORE_CASE" value="false"/>
</DiffOptions>
<GetOptions>
<option name="REPLACE_WRITABLE" value="0"/>
<option name="MAKE_WRITABLE" value="false"/>
<option name="RECURSIVE" value="false"/>
</GetOptions>
<option name="CLIENT_PATH" value=""/>
<option name="SRCSAFEINI_PATH" value=""/>
<option name="USER_NAME" value=""/>
<option name="PWD" value=""/>
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
<option name="SHOW_ADD_OPTIONS" value="true"/>
<option name="SHOW_UNDOCHECKOUT_OPTIONS" value="true"/>
<option name="SHOW_DIFF_OPTIONS" value="true"/>
<option name="SHOW_GET_OPTIONS" value="true"/>
<option name="USE_EXTERNAL_DIFF" value="false"/>
<option name="EXTERNAL_DIFF_PATH" value=""/>
<option name="REUSE_LAST_COMMENT" value="false"/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
</component>
<component name="CheckinPanelState"/>
<component name="WebViewSettings">
<webview flattenPackages="false" showMembers="false" autoscrollToSource="false"/>
</component>
<component name="EjbViewSettings">
<EjbView showMembers="false" autoscrollToSource="false"/>
</component>
<component name="AppServerRunManager"/>
<component name="StarteamConfiguration">
<option name="SERVER" value=""/>
<option name="PORT" value="49201"/>
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="PROJECT" value=""/>
<option name="VIEW" value=""/>
<option name="ALTERNATIVE_WORKING_PATH" value=""/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
</component>
<component name="Cvs2Configuration">
<option name="ON_FILE_ADDING" value="0"/>
<option name="ON_FILE_REMOVING" value="0"/>
<option name="PRUNE_EMPTY_DIRECTORIES" value="true"/>
<option name="SHOW_UPDATE_OPTIONS" value="true"/>
<option name="SHOW_ADD_OPTIONS" value="true"/>
<option name="SHOW_REMOVE_OPTIONS" value="true"/>
<option name="MERGING_MODE" value="0"/>
<option name="MERGE_WITH_BRANCH1_NAME" value="HEAD"/>
<option name="MERGE_WITH_BRANCH2_NAME" value="HEAD"/>
<option name="RESET_STICKY" value="false"/>
<option name="CREATE_NEW_DIRECTORIES" value="true"/>
<option name="DEFAULT_TEXT_FILE_SUBSTITUTION" value="kv"/>
<option name="PROCESS_UNKNOWN_FILES" value="false"/>
<option name="PROCESS_DELETED_FILES" value="false"/>
<option name="SHOW_EDIT_DIALOG" value="true"/>
<option name="RESERVED_EDIT" value="false"/>
<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6"/>
<option name="SHOW_CHECKOUT_OPTIONS" value="true"/>
<option name="CHECKOUT_DATE_OR_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="UPDATE_DATE_OR_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="SHOW_CHANGES_REVISION_SETTINGS">
<value>
<option name="BRANCH" value=""/>
<option name="DATE" value=""/>
<option name="USE_BRANCH" value="false"/>
<option name="USE_DATE" value="false"/>
</value>
</option>
<option name="SHOW_OUTPUT" value="false"/>
<option name="SHOW_FILE_HISTORY_AS_TREE" value="false"/>
<option name="UPDATE_GROUP_BY_PACKAGES" value="false"/>
<option name="ADD_WATCH_INDEX" value="0"/>
<option name="REMOVE_WATCH_INDEX" value="0"/>
<option name="UPDATE_KEYWORD_SUBSTITUTION"/>
<option name="MAKE_NEW_FILES_READONLY" value="false"/>
<option name="SHOW_CORRUPTED_PROJECT_FILES" value="0"/>
<option name="TAG_AFTER_FILE_COMMIT" value="false"/>
<option name="TAG_AFTER_FILE_COMMIT_NAME" value=""/>
<option name="TAG_AFTER_PROJECT_COMMIT" value="false"/>
<option name="TAG_AFTER_PROJECT_COMMIT_NAME" value=""/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="FORCE_NON_EMPTY_COMMENT" value="false"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
</component>
<component name="CvsTabbedWindow"/>
<component name="SvnConfiguration">
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="AUTO_ADD_FILES" value="0"/>
<option name="AUTO_DEL_FILES" value="0"/>
</component>
<component name="PerforceConfiguration">
<option name="PORT" value="magic:1666"/>
<option name="USER" value=""/>
<option name="PASSWORD" value=""/>
<option name="CLIENT" value=""/>
<option name="TRACE" value="false"/>
<option name="PERFORCE_STATUS" value="true"/>
<option name="CHANGELIST_OPTION" value="false"/>
<option name="SYSTEMROOT" value=""/>
<option name="P4_EXECUTABLE" value="p4"/>
<option name="SHOW_BRANCH_HISTORY" value="false"/>
<option name="GENERATE_COMMENT" value="false"/>
<option name="SYNC_OPTION" value="Sync"/>
<option name="PUT_FOCUS_INTO_COMMENT" value="false"/>
<option name="SHOW_CHECKIN_OPTIONS" value="true"/>
<option name="FORCE_NON_EMPTY_COMMENT" value="true"/>
<option name="LAST_COMMIT_MESSAGE" value=""/>
<option name="SAVE_LAST_COMMIT_MESSAGE" value="true"/>
<option name="CHECKIN_DIALOG_SPLITTER_PROPORTION" value="0.8"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="OPTIMIZE_IMPORTS_BEFORE_FILE_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false"/>
<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false"/>
<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8"/>
<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5"/>
</component>
</project>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-dedup</artifactId>
<version>4.0.3</version>
<version>4.1.13-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
@ -43,9 +43,10 @@
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
@ -66,7 +67,10 @@
<artifactId>json-path</artifactId>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
</dependency>
</dependencies>

View File

@ -1,59 +1,59 @@
package eu.dnetlib.pace.clustering;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Document;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldListImpl;
import eu.dnetlib.pace.model.MapDocument;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Pattern;
public class BlacklistAwareClusteringCombiner extends ClusteringCombiner {
private static final Log log = LogFactory.getLog(BlacklistAwareClusteringCombiner.class);
public static Collection<String> filterAndCombine(final MapDocument a, final Config conf) {
Document filtered = filter(a, conf.blacklists());
return combine(filtered, conf);
}
public static Collection<String> filterAndCombine(final MapDocument a, final Config conf) {
private static MapDocument filter(final MapDocument a, final Map<String, List<Pattern>> blacklists) {
if (blacklists == null || blacklists.isEmpty()) {
return a;
}
final Document filtered = new BlacklistAwareClusteringCombiner().filter(a, conf.blacklists());
return combine(filtered, conf);
}
final Map<String, Field> filtered = Maps.newHashMap(a.getFieldMap());
private MapDocument filter(final MapDocument a, final Map<String, List<String>> blacklists) {
final Map<String, Field> filtered = Maps.newHashMap(a.getFieldMap());
if (blacklists != null) {
for (final Entry<String, Field> e : filtered.entrySet()) {
for (final Entry<String, List<Pattern>> e : blacklists.entrySet()) {
Field fields = a.getFieldMap().get(e.getKey());
if (fields != null) {
final FieldListImpl fl = new FieldListImpl();
final FieldListImpl fl = new FieldListImpl();
fl.addAll(Lists.newArrayList(Iterables.filter(e.getValue(), new FieldFilter(e.getKey(), blacklists))));
filtered.put(e.getKey(), fl);
}
}
return new MapDocument(a.getIdentifier(), filtered);
}
for (Field f : fields) {
if (!isBlackListed(f.stringValue(), e.getValue())) {
fl.add(f);
}
}
filtered.put(e.getKey(), fl);
}
}
return new MapDocument(a.getIdentifier(), filtered);
}
private static boolean isBlackListed(String value, List<Pattern> blacklist) {
for (Pattern pattern : blacklist) {
if (pattern.matcher(value).matches()) {
return true;
}
}
return false;
}
/**
* Tries to match the fields in the regex blacklist.
*
* @param fieldName
* @param value
* @return true if the field matches, false otherwise
*/
protected boolean regexMatches(final String fieldName, final String value, final Map<String, Set<String>> blacklists) {
if (blacklists.containsKey(fieldName)) {
for (final String regex : blacklists.get(fieldName)) {
if (value.matches(regex)) return true;
}
}
return false;
}
}

View File

@ -1,7 +1,9 @@
package eu.dnetlib.pace.clustering;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import com.google.common.collect.Sets;
@ -9,21 +11,50 @@ import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.ClusteringDef;
import eu.dnetlib.pace.model.Document;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldValueImpl;
import org.apache.commons.lang3.StringUtils;
public class ClusteringCombiner {
public static Collection<String> combine(final Document a, final Config conf) {
return new ClusteringCombiner().doCombine(a, conf);
}
private static String SEPARATOR = ":";
private static String COLLAPSE_ON= "collapseOn";
private Collection<String> doCombine(final Document a, final Config conf) {
public static Collection<String> combine(final Document a, final Config conf) {
final Collection<String> res = Sets.newLinkedHashSet();
for (final ClusteringDef cd : conf.clusterings()) {
for (final String fieldName : cd.getFields()) {
final Field values = a.values(fieldName);
res.addAll(cd.clusteringFunction().apply(conf, (List<Field>) values));
String prefix = getPrefix(cd, fieldName);
Field values = a.values(fieldName);
List<Field> fields = new ArrayList<>();
if (values instanceof FieldValueImpl) {
fields.add(values);
}
else {
fields.addAll((List<Field>) values);
}
res.addAll(
cd.clusteringFunction()
.apply(conf, fields)
.stream()
.map(k -> prefix + SEPARATOR +k)
.collect(Collectors.toList())
);
}
}
return res;
}
private static String getPrefix(ClusteringDef cd, String fieldName) {
return cd.getName()+ SEPARATOR +
cd.getParams().keySet()
.stream()
.filter(k -> k.contains(COLLAPSE_ON))
.findFirst()
.map(k -> StringUtils.substringAfter(k, SEPARATOR))
.orElse(fieldName);
}
}

View File

@ -1,48 +0,0 @@
package eu.dnetlib.pace.clustering;
import java.util.List;
import java.util.Map;
import com.google.common.base.Predicate;
import eu.dnetlib.pace.model.Field;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class FieldFilter implements Predicate<Field> {
private static final Log log = LogFactory.getLog(FieldFilter.class);
private Map<String, List<String>> blacklists;
private String filedName;
public FieldFilter(final String fieldName, final Map<String, List<String>> blacklists) {
this.filedName = fieldName;
this.blacklists = blacklists;
}
@Override
public boolean apply(final Field f) {
return !regexMatches(filedName, f.stringValue(), blacklists);
}
/**
* Tries to match the fields in the regex blacklist.
*
* @param fieldName
* @param value
* @return true if the field matches, false otherwise
*/
protected boolean regexMatches(final String fieldName, final String value, final Map<String, List<String>> blacklists) {
if (blacklists.containsKey(fieldName)) {
final Iterable<String> regexes = blacklists.get(fieldName);
for (final String regex : regexes) {
if (StringUtils.isBlank(regex)) return false;
if (value.matches(regex)) return true;
}
}
return false;
}
}

View File

@ -41,7 +41,7 @@ public class KeywordsClustering extends AbstractClusteringFunction {
public Collection<String> apply(final Config conf, List<Field> fields) {
return fields.stream().filter(f -> !f.isEmpty())
.map(Field::stringValue)
.map(this::cleanup) //TODO can I add this to the AbstractClusteringFunction without overriding the method here?
.map(this::cleanup)
.map(this::normalize)
.map(s -> filterAllStopWords(s))
.map(s -> doApply(conf, s))

View File

@ -0,0 +1,77 @@
package eu.dnetlib.pace.clustering;
import com.google.common.collect.Lists;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.Person;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.stream.Collectors;
@ClusteringClass("lnfi")
public class LastNameFirstInitial extends AbstractClusteringFunction{
private boolean DEFAULT_AGGRESSIVE = true;
public LastNameFirstInitial(final Map<String, Integer> params) {
super(params);
}
@Override
public Collection<String> apply(Config conf, List<Field> fields) {
return fields.stream().filter(f -> !f.isEmpty())
.map(Field::stringValue)
.map(this::normalize)
.map(s -> doApply(conf, s))
.map(c -> filterBlacklisted(c, ngramBlacklist))
.flatMap(c -> c.stream())
.filter(StringUtils::isNotBlank)
.collect(Collectors.toCollection(HashSet::new));
}
@Override
protected String normalize(final String s) {
return fixAliases(transliterate(nfd(unicodeNormalization(s))))
// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
.replaceAll("[^ \\w]+", "")
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
.replaceAll("(\\p{Punct})+", " ")
.replaceAll("(\\d)+", " ")
.replaceAll("(\\n)+", " ")
.trim();
}
@Override
protected Collection<String> doApply(final Config conf, final String s) {
final List<String> res = Lists.newArrayList();
final boolean aggressive = (Boolean) (getParams().containsKey("aggressive") ? getParams().get("aggressive") : DEFAULT_AGGRESSIVE);
Person p = new Person(s, aggressive);
if (p.isAccurate()) {
String lastName = p.getNormalisedSurname().toLowerCase();
String firstInitial = p.getNormalisedFirstName().toLowerCase().substring(0,1);
res.add(firstInitial.concat(lastName));
}
else { // is not accurate, meaning it has no defined name and surname
List<String> fullname = Arrays.asList(p.getNormalisedFullname().split(" "));
if (fullname.size() == 1) {
res.add(p.getNormalisedFullname().toLowerCase());
}
else if (fullname.size() == 2) {
res.add(fullname.get(0).substring(0,1).concat(fullname.get(1)).toLowerCase());
res.add(fullname.get(1).substring(0,1).concat(fullname.get(0)).toLowerCase());
}
else {
res.add(fullname.get(0).substring(0,1).concat(fullname.get(fullname.size()-1)).toLowerCase());
res.add(fullname.get(fullname.size()-1).substring(0,1).concat(fullname.get(0)).toLowerCase());
}
}
return res;
}
}

View File

@ -9,7 +9,7 @@ import com.google.common.collect.Lists;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Person;
@ClusteringClass("personhash")
@ClusteringClass("personHash")
public class PersonHash extends AbstractClusteringFunction {
private boolean DEFAULT_AGGRESSIVE = false;

View File

@ -0,0 +1,90 @@
package eu.dnetlib.pace.clustering;
import com.google.common.collect.Sets;
import eu.dnetlib.pace.config.Config;
import java.util.*;
import java.util.stream.Collectors;
@ClusteringClass("wordsStatsSuffixPrefixChain")
public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction {
public WordsStatsSuffixPrefixChain(Map<String, Integer> params) {
super(params);
}
@Override
protected Collection<String> doApply(Config conf, String s) {
return suffixPrefixChain(s, param("mod"));
}
private Collection<String> suffixPrefixChain(String s, int mod) {
//create the list of words from the string (remove short words)
List<String> wordsList =
Arrays.stream(s.split(" "))
.filter(si -> si.length() > 3)
.collect(Collectors.toList());
final int words = wordsList.size();
final int letters = s.length();
//create the prefix: number of words + number of letters/mod
String prefix = words + "-" + letters/mod + "-";
return doSuffixPrefixChain(wordsList, prefix);
}
private Collection<String> doSuffixPrefixChain(List<String> wordsList, String prefix) {
Set<String> set = Sets.newLinkedHashSet();
switch(wordsList.size()){
case 0:
case 1:
break;
case 2:
set.add(
prefix +
suffix(wordsList.get(0), 3) +
prefix(wordsList.get(1), 3)
);
set.add(
prefix +
prefix(wordsList.get(0), 3) +
suffix(wordsList.get(1), 3)
);
break;
default:
set.add(
prefix +
suffix(wordsList.get(0), 3) +
prefix(wordsList.get(1), 3) +
suffix(wordsList.get(2), 3)
);
set.add(
prefix +
prefix(wordsList.get(0), 3) +
suffix(wordsList.get(1), 3) +
prefix(wordsList.get(2), 3)
);
break;
}
return set;
}
private String suffix(String s, int len) {
return s.substring(s.length()-len);
}
private String prefix(String s, int len) {
return s.substring(0, len);
}
}

View File

@ -3,8 +3,8 @@ package eu.dnetlib.pace.common;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.ibm.icu.text.Transliterator;
import eu.dnetlib.pace.clustering.NGramUtils;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldList;
@ -14,6 +14,7 @@ import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.*;
import java.util.regex.Matcher;
@ -31,6 +32,7 @@ public abstract class AbstractPaceFunctions {
private static Map<String, String> cityMap = AbstractPaceFunctions.loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
//list of stopwords in different languages
protected static Set<String> stopwords_gr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_gr.txt");
protected static Set<String> stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
protected static Set<String> stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
protected static Set<String> stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
@ -38,9 +40,15 @@ public abstract class AbstractPaceFunctions {
protected static Set<String> stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
protected static Set<String> stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");
//transliterator
protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
//blacklist of ngrams: to avoid generic keys
protected static Set<String> ngramBlacklist = loadFromClasspath("/eu/dnetlib/pace/config/ngram_blacklist.txt");
//html regex for normalization
public final String HTML_REGEX = "<[^>]*>";
private static final String alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ";
private static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
private static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
@ -59,14 +67,14 @@ public abstract class AbstractPaceFunctions {
}
protected String cleanup(final String s) {
final String s0 = unicodeNormalization(s.toLowerCase());
final String s1 = fixAliases(s0);
final String s2 = nfd(s1);
final String s3 = s2.replaceAll("&ndash;", " ");
final String s4 = s3.replaceAll("&amp;", " ");
final String s5 = s4.replaceAll("&quot;", " ");
final String s6 = s5.replaceAll("&minus;", " ");
final String s7 = s6.replaceAll("([0-9]+)", " $1 ");
final String s1 = s.replaceAll(HTML_REGEX, "");
final String s2 = unicodeNormalization(s1.toLowerCase());
final String s3 = nfd(s2);
final String s4 = fixXML(s3);
final String s5 = s4.replaceAll("([0-9]+)", " $1 ");
final String s6 = transliterate(s5);
final String s7 = fixAliases(s6);
final String s8 = s7.replaceAll("[^\\p{ASCII}]", "");
final String s9 = s8.replaceAll("[\\p{Punct}]", " ");
final String s10 = s9.replaceAll("\\n", " ");
@ -75,6 +83,14 @@ public abstract class AbstractPaceFunctions {
return s12;
}
protected String fixXML(final String a){
return a.replaceAll("&ndash;", " ")
.replaceAll("&amp;", " ")
.replaceAll("&quot;", " ")
.replaceAll("&minus;", " ");
}
protected boolean checkNumbers(final String a, final String b) {
final String numbersA = getNumbers(a);
final String numbersB = getNumbers(b);
@ -112,19 +128,31 @@ public abstract class AbstractPaceFunctions {
protected static String fixAliases(final String s) {
final StringBuilder sb = new StringBuilder();
for (final char ch : Lists.charactersOf(s)) {
s.chars().forEach(ch -> {
final int i = StringUtils.indexOf(aliases_from, ch);
sb.append(i >= 0 ? aliases_to.charAt(i) : ch);
}
sb.append(i >= 0 ? aliases_to.charAt(i) : (char)ch);
});
return sb.toString();
}
protected static String transliterate(final String s) {
try {
return transliterator.transliterate(s);
}
catch(Exception e) {
return s;
}
}
protected String removeSymbols(final String s) {
final StringBuilder sb = new StringBuilder();
for (final char ch : Lists.charactersOf(s)) {
sb.append(StringUtils.contains(alpha, ch) ? ch : " ");
}
s.chars().forEach(ch -> {
sb.append(StringUtils.contains(alpha, ch) ? (char)ch : ' ');
});
return sb.toString().replaceAll("\\s+", " ");
}
@ -137,7 +165,7 @@ public abstract class AbstractPaceFunctions {
}
protected String normalize(final String s) {
return nfd(unicodeNormalization(s))
return fixAliases(transliterate(nfd(unicodeNormalization(s))))
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
.replaceAll("[^ \\w]+", "")
@ -152,6 +180,11 @@ public abstract class AbstractPaceFunctions {
return Normalizer.normalize(s, Normalizer.Form.NFD);
}
public String utf8(final String s) {
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
return new String(bytes, StandardCharsets.UTF_8);
}
public String unicodeNormalization(final String s) {
Matcher m = hexUnicodePattern.matcher(s);
@ -185,6 +218,7 @@ public abstract class AbstractPaceFunctions {
s = filterStopWords(s, stopwords_fr);
s = filterStopWords(s, stopwords_pt);
s = filterStopWords(s, stopwords_es);
s = filterStopWords(s, stopwords_gr);
return s;
}
@ -200,10 +234,13 @@ public abstract class AbstractPaceFunctions {
}
public static Set<String> loadFromClasspath(final String classpath) {
Transliterator transliterator = Transliterator.getInstance("Any-Eng");
final Set<String> h = Sets.newHashSet();
try {
for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath))) {
h.add(s);
for (final String s : IOUtils.readLines(NGramUtils.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
h.add(fixAliases(transliterator.transliterate(s))); //transliteration of the stopwords
}
} catch (final Throwable e) {
return Sets.newHashSet();
@ -212,14 +249,17 @@ public abstract class AbstractPaceFunctions {
}
public static Map<String, String> loadMapFromClasspath(final String classpath) {
Transliterator transliterator = Transliterator.getInstance("Any-Eng");
final Map<String, String> m = new HashMap<>();
try {
for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath))) {
for (final String s : IOUtils.readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) {
//string is like this: code;word1;word2;word3
String[] line = s.split(";");
String value = line[0];
for (int i = 1; i < line.length; i++) {
m.put(line[i].toLowerCase(), value);
m.put(fixAliases(transliterator.transliterate(line[i].toLowerCase())), value);
}
}
} catch (final Throwable e) {
@ -307,10 +347,11 @@ public abstract class AbstractPaceFunctions {
public static <T> String readFromClasspath(final String filename, final Class<T> clazz) {
final StringWriter sw = new StringWriter();
try {
IOUtils.copy(clazz.getResourceAsStream(filename), sw);
IOUtils.copy(clazz.getResourceAsStream(filename), sw, StandardCharsets.UTF_8);
return sw.toString();
} catch (final IOException e) {
throw new RuntimeException("cannot load resource from classpath: " + filename);
}
}
}

View File

@ -2,6 +2,7 @@ package eu.dnetlib.pace.config;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import eu.dnetlib.pace.model.ClusteringDef;
import eu.dnetlib.pace.model.FieldDef;
@ -47,7 +48,7 @@ public interface Config {
*
* @return the map
*/
public Map<String, List<String>> blacklists();
public Map<String, List<Pattern>> blacklists();
/**

View File

@ -1,5 +1,6 @@
package eu.dnetlib.pace.config;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.pace.model.ClusteringDef;
@ -7,15 +8,19 @@ import eu.dnetlib.pace.model.FieldDef;
import eu.dnetlib.pace.util.PaceException;
import org.antlr.stringtemplate.StringTemplate;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import eu.dnetlib.pace.tree.support.TreeNodeDef;
@ -31,6 +36,9 @@ public class DedupConfig implements Config, Serializable {
private WfConfig wf;
@JsonIgnore
private Map<String, List<Pattern>> blacklists;
private static Map<String, String> defaults = Maps.newHashMap();
static {
@ -57,6 +65,12 @@ public class DedupConfig implements Config, Serializable {
config = new ObjectMapper().readValue(json, DedupConfig.class);
config.getPace().initModel();
config.getPace().initTranslationMap();
config.blacklists = config.getPace().getBlacklists().entrySet()
.stream()
.collect(Collectors.toMap(e -> e.getKey(),
e ->e.getValue().stream().filter(s -> !StringUtils.isBlank(s)).map(Pattern::compile).collect(Collectors.toList()) ));
return config;
} catch (IOException e) {
throw new PaceException("Error in parsing configuration json", e);
@ -88,7 +102,7 @@ public class DedupConfig implements Config, Serializable {
}
private String readFromClasspath(final String resource) throws IOException {
return IOUtils.toString(getClass().getResource(resource));
return IOUtils.toString(getClass().getResource(resource), StandardCharsets.UTF_8);
}
public PaceConfig getPace() {
@ -137,8 +151,8 @@ public class DedupConfig implements Config, Serializable {
}
@Override
public Map<String, List<String>> blacklists() {
return getPace().getBlacklists();
public Map<String, List<Pattern>> blacklists() {
return blacklists;
}
@Override

View File

@ -3,6 +3,7 @@ package eu.dnetlib.pace.config;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.common.collect.Maps;
import com.ibm.icu.text.Transliterator;
import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.model.ClusteringDef;
import eu.dnetlib.pace.model.FieldDef;
@ -43,10 +44,12 @@ public class PaceConfig extends AbstractPaceFunctions implements Serializable {
public void initTranslationMap(){
translationMap = Maps.newHashMap();
Transliterator transliterator = Transliterator.getInstance("Any-Eng");
for (String key : synonyms.keySet()) {
for (String term : synonyms.get(key)){
translationMap.put(
normalize(term.toLowerCase()),
fixAliases(transliterator.transliterate(term.toLowerCase())),
key);
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.pace.config;
public enum Type {
String, Int, List, JSON, URL
String, Int, List, JSON, URL, StringConcat, DoubleArray
}

View File

@ -20,4 +20,6 @@ public interface FieldValue extends Field {
*/
public void setValue(final Object value);
public double[] doubleArrayValue();
}

View File

@ -58,8 +58,10 @@ public class FieldValueImpl extends AbstractField implements FieldValue {
throw new RuntimeException(value.toString());
}
case URL:
String str = value.toString();
return StringUtils.isBlank(str) || !isValidURL(str);
String str = value.toString();
return StringUtils.isBlank(str) || !isValidURL(str);
case DoubleArray:
return doubleArrayValue().length==0;
default:
return true;
}
@ -116,6 +118,10 @@ public class FieldValueImpl extends AbstractField implements FieldValue {
// }
}
public double[] doubleArrayValue() {
return (double[])getValue();
}
/*
* (non-Javadoc)
*

View File

@ -43,7 +43,7 @@ public class Person {
// s = s.replaceAll("[\\W&&[^,-]]", "");
}
if (s.contains(",")) {
if (s.contains(",")) { //if the name contains a comma it is easy derivable the name and the surname
final String[] arr = s.split(",");
if (arr.length == 1) {
fullname = splitTerms(arr[0]);

View File

@ -0,0 +1,154 @@
package eu.dnetlib.pace.tree;
import com.google.common.collect.Iterables;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldList;
import eu.dnetlib.pace.model.Person;
import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
import com.wcohen.ss.AbstractStringDistance;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ComparatorClass("authorsMatch")
public class AuthorsMatch extends AbstractComparator {
Map<String, String> params;
private double SURNAME_THRESHOLD;
private double NAME_THRESHOLD;
private double FULLNAME_THRESHOLD;
private String MODE; //full or surname
private int SIZE_THRESHOLD;
private String TYPE; //count or percentage
private int common;
public AuthorsMatch(Map<String, String> params){
super(params, new com.wcohen.ss.JaroWinkler());
this.params = params;
MODE = params.getOrDefault("mode", "full");
SURNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("surname_th", "0.95"));
NAME_THRESHOLD = Double.parseDouble(params.getOrDefault("name_th", "0.95"));
FULLNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("fullname_th", "0.9"));
SIZE_THRESHOLD = Integer.parseInt(params.getOrDefault("size_th", "20"));
TYPE = params.getOrDefault("type", "percentage");
common = 0;
}
protected AuthorsMatch(double w, AbstractStringDistance ssalgo) {
super(w, ssalgo);
}
@Override
public double compare(final Field a, final Field b, final Config conf) {
if (a.isEmpty() || b.isEmpty())
return -1;
if (((FieldList) a).size() > SIZE_THRESHOLD || ((FieldList) b).size() > SIZE_THRESHOLD)
return 1.0;
List<Person> aList = ((FieldList) a).stringList().stream().map(author -> new Person(author, false)).collect(Collectors.toList());
List<Person> bList = ((FieldList) b).stringList().stream().map(author -> new Person(author, false)).collect(Collectors.toList());
common = 0;
//compare each element of List1 with each element of List2
for (Person p1 : aList)
for (Person p2 : bList) {
//both persons are inaccurate
if (!p1.isAccurate() && !p2.isAccurate()) {
//compare just normalized fullnames
String fullname1 = normalization(p1.getNormalisedFullname().isEmpty()? p1.getOriginal() : p1.getNormalisedFullname());
String fullname2 = normalization(p2.getNormalisedFullname().isEmpty()? p2.getOriginal() : p2.getNormalisedFullname());
if (ssalgo.score(fullname1, fullname2) > FULLNAME_THRESHOLD) {
common += 1;
break;
}
}
//one person is inaccurate
if (p1.isAccurate() ^ p2.isAccurate()) {
//prepare data
//data for the accurate person
String name = normalization(p1.isAccurate()? p1.getNormalisedFirstName() : p2.getNormalisedFirstName());
String surname = normalization(p1.isAccurate()? p1.getNormalisedSurname() : p2.getNormalisedSurname());
//data for the inaccurate person
String fullname = normalization(
p1.isAccurate() ? ((p2.getNormalisedFullname().isEmpty()) ? p2.getOriginal() : p2.getNormalisedFullname()) : (p1.getNormalisedFullname().isEmpty() ? p1.getOriginal() : p1.getNormalisedFullname())
);
if (fullname.contains(surname)) {
if (MODE.equals("full")) {
if (fullname.contains(name)) {
common += 1;
break;
}
}
else { //MODE equals "surname"
common += 1;
break;
}
}
}
//both persons are accurate
if (p1.isAccurate() && p2.isAccurate()) {
if (compareSurname(p1, p2)) {
if (MODE.equals("full")) {
if(compareFirstname(p1, p2)) {
common += 1;
break;
}
}
else { //MODE equals "surname"
common += 1;
break;
}
}
}
}
//normalization factor to compute the score
int normFactor = aList.size() == bList.size() ? aList.size() : (aList.size() + bList.size() - common);
if(TYPE.equals("percentage")) {
return (double) common / normFactor;
}
else {
return (double) common;
}
}
public boolean compareSurname(Person p1, Person p2) {
return ssalgo.score(normalization(p1.getNormalisedSurname()), normalization(p2.getNormalisedSurname())) > SURNAME_THRESHOLD;
}
public boolean compareFirstname(Person p1, Person p2) {
if(p1.getNormalisedFirstName().length()<=2 || p2.getNormalisedFirstName().length()<=2) {
if (firstLC(p1.getNormalisedFirstName()).equals(firstLC(p2.getNormalisedFirstName())))
return true;
}
return ssalgo.score(normalization(p1.getNormalisedFirstName()), normalization(p2.getNormalisedFirstName())) > NAME_THRESHOLD;
}
public String normalization(String s) {
return normalize(utf8(cleanup(s)));
}
}

View File

@ -1,57 +0,0 @@
package eu.dnetlib.pace.tree;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.tree.support.AbstractComparator;
import eu.dnetlib.pace.tree.support.ComparatorClass;
import java.util.Map;
/**
* The Class Contains match
*
* @author miconis
* */
@ComparatorClass("containsMatch")
public class ContainsMatch extends AbstractComparator {
private Map<String, String> params;
public ContainsMatch(Map<String, String> params) {
super(params);
this.params = params;
}
@Override
public double distance(final String a, final String b, final Config conf) {
//read parameters
boolean caseSensitive = Boolean.parseBoolean(params.getOrDefault("caseSensitive", "false"));
String string = params.get("string");
String agg = params.get("bool");
String ca = a;
String cb = b;
if (!caseSensitive) {
ca = a.toLowerCase();
cb = b.toLowerCase();
}
switch(agg) {
case "AND":
if(ca.contains(string) && cb.contains(string))
return 1.0;
break;
case "OR":
if(ca.contains(string) || cb.contains(string))
return 1.0;
break;
case "XOR":
if(ca.contains(string) ^ cb.contains(string))
return 1.0;
break;
default:
return 0.0;
}
return 0.0;
}
}

Some files were not shown because too many files have changed in this diff Show More