diff --git a/.gitignore b/.gitignore
index f4fb46f2e..14cd4d345 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,8 +3,6 @@
*.iws
*.ipr
*.iml
-*.ipr
-*.iws
*~
.vscode
.metals
@@ -27,4 +25,5 @@ spark-warehouse
/**/job-override.properties
/**/*.log
/**/.factorypath
-
+/**/.scalafmt.conf
+/.java-version
diff --git a/.scalafmt.conf b/.scalafmt.conf
new file mode 100644
index 000000000..0b5dbe0b4
--- /dev/null
+++ b/.scalafmt.conf
@@ -0,0 +1,21 @@
+style = defaultWithAlign
+
+align.openParenCallSite = false
+align.openParenDefnSite = false
+align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}]
+continuationIndent.callSite = 2
+continuationIndent.defnSite = 2
+danglingParentheses = true
+indentOperator = spray
+maxColumn = 120
+newlines.alwaysBeforeTopLevelStatements = true
+project.excludeFilters = [".*\\.sbt"]
+rewrite.rules = [AvoidInfix]
+rewrite.rules = [ExpandImportSelectors]
+rewrite.rules = [RedundantBraces]
+rewrite.rules = [RedundantParens]
+rewrite.rules = [SortImports]
+rewrite.rules = [SortModifiers]
+rewrite.rules = [PreferCurlyFors]
+spaces.inImportCurlyBraces = false
+unindentTopLevelOperators = true
\ No newline at end of file
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 000000000..aff151f94
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,43 @@
+# Contributor Code of Conduct
+
+Openness, transparency and our community-driven participatory approach guide us in our day-to-day interactions and decision-making. Our open source projects are no exception. Trust, respect, collaboration and transparency are core values we believe should live and breathe within our projects. Our community welcomes participants from around the world with different experiences, unique perspectives, and great ideas to share.
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment include:
+
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Attempting collaboration before conflict
+- Focusing on what is best for the community
+- Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+- Violence, threats of violence, or inciting others to commit self-harm
+- The use of sexualized language or imagery and unwelcome sexual attention or advances
+- Trolling, intentionally spreading misinformation, insulting/derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or electronic address, without explicit permission
+- Abuse of the reporting process to intentionally harass or exclude others
+- Advocating for, or encouraging, any of the above behavior
+- Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), [version 1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html).
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000..13a359c86
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,10 @@
+# Contributing to D-Net Hadoop
+
+:+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
+
+This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it).
+
+The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules.
+Use your best judgment, and feel free to propose changes to this document in a pull request.
+
+All contributions are welcome, all contributions will be considered to be contributed under the [project license](LICENSE.md).
diff --git a/LICENSE b/LICENSE.md
similarity index 100%
rename from LICENSE
rename to LICENSE.md
diff --git a/README.md b/README.md
index 0a0bd82ab..b6575814d 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,133 @@
# dnet-hadoop
-Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning.
\ No newline at end of file
+
+Dnet-hadoop is the project that defined all the [OOZIE workflows](https://oozie.apache.org/) for the OpenAIRE Graph construction, processing, provisioning.
+
+This project adheres to the Contributor Covenant [code of conduct](CODE_OF_CONDUCT.md).
+By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it).
+
+This project is licensed under the [AGPL v3 or later version](#LICENSE.md).
+
+How to build, package and run oozie workflows
+====================
+
+Oozie-installer is a utility allowing building, uploading and running oozie workflows. In practice, it creates a `*.tar.gz`
+package that contains resources that define a workflow and some helper scripts.
+
+This module is automatically executed when running:
+
+`mvn package -Poozie-package -Dworkflow.source.dir=classpath/to/parent/directory/of/oozie_app`
+
+on module having set:
+
+```
+
+ eu.dnetlib.dhp
+ dhp-workflows
+
+```
+
+in `pom.xml` file. `oozie-package` profile initializes oozie workflow packaging, `workflow.source.dir` property points to
+a workflow (notice: this is not a relative path but a classpath to directory usually holding `oozie_app` subdirectory).
+
+The outcome of this packaging is `oozie-package.tar.gz` file containing inside all the resources required to run Oozie workflow:
+
+- jar packages
+- workflow definitions
+- job properties
+- maintenance scripts
+
+Required properties
+====================
+
+In order to include proper workflow within package, `workflow.source.dir` property has to be set. It could be provided
+by setting `-Dworkflow.source.dir=some/job/dir` maven parameter.
+
+In oder to define full set of cluster environment properties one should create `~/.dhp/application.properties` file with
+the following properties:
+
+- `dhp.hadoop.frontend.user.name` - your user name on hadoop cluster and frontend machine
+- `dhp.hadoop.frontend.host.name` - frontend host name
+- `dhp.hadoop.frontend.temp.dir` - frontend directory for temporary files
+- `dhp.hadoop.frontend.port.ssh` - frontend machine ssh port
+- `oozieServiceLoc` - oozie service location required by run_workflow.sh script executing oozie job
+- `nameNode` - name node address
+- `jobTracker` - job tracker address
+- `oozie.execution.log.file.location` - location of file that will be created when executing oozie job, it contains output
+produced by `run_workflow.sh` script (needed to obtain oozie job id)
+- `maven.executable` - mvn command location, requires parameterization due to a different setup of CI cluster
+- `sparkDriverMemory` - amount of memory assigned to spark jobs driver
+- `sparkExecutorMemory` - amount of memory assigned to spark jobs executors
+- `sparkExecutorCores` - number of cores assigned to spark jobs executors
+
+All values will be overriden with the ones from `job.properties` and eventually `job-override.properties` stored in module's
+main folder.
+
+When overriding properties from `job.properties`, `job-override.properties` file can be created in main module directory
+(the one containing `pom.xml` file) and define all new properties which will override existing properties.
+One can provide those properties one by one as command line `-D` arguments.
+
+Properties overriding order is the following:
+
+1. `pom.xml` defined properties (located in the project root dir)
+2. `~/.dhp/application.properties` defined properties
+3. `${workflow.source.dir}/job.properties`
+4. `job-override.properties` (located in the project root dir)
+5. `maven -Dparam=value`
+
+where the maven `-Dparam` property is overriding all the other ones.
+
+Workflow definition requirements
+====================
+
+`workflow.source.dir` property should point to the following directory structure:
+
+ [${workflow.source.dir}]
+ |
+ |-job.properties (optional)
+ |
+ \-[oozie_app]
+ |
+ \-workflow.xml
+
+This property can be set using maven `-D` switch.
+
+`[oozie_app]` is the default directory name however it can be set to any value as soon as `oozieAppDir` property is
+provided with directory name as value.
+
+Sub-workflows are supported as well and sub-workflow directories should be nested within `[oozie_app]` directory.
+
+Creating oozie installer step-by-step
+=====================================
+
+Automated oozie-installer steps are the following:
+
+1. creating jar packages: `*.jar` and `*tests.jar` along with copying all dependencies in `target/dependencies`
+2. reading properties from maven, `~/.dhp/application.properties`, `job.properties`, `job-override.properties`
+3. invoking priming mechanism linking resources from import.txt file (currently resolving subworkflow resources)
+4. assembling shell scripts for preparing Hadoop filesystem, uploading Oozie application and starting workflow
+5. copying whole `${workflow.source.dir}` content to `target/${oozie.package.file.name}`
+6. generating updated `job.properties` file in `target/${oozie.package.file.name}` based on maven,
+`~/.dhp/application.properties`, `job.properties` and `job-override.properties`
+7. creating `lib` directory (or multiple directories for sub-workflows for each nested directory) and copying jar packages
+created at step (1) to each one of them
+8. bundling whole `${oozie.package.file.name}` directory into single tar.gz package
+
+Uploading oozie package and running workflow on cluster
+=======================================================
+
+In order to simplify deployment and execution process two dedicated profiles were introduced:
+
+- `deploy`
+- `run`
+
+to be used along with `oozie-package` profile e.g. by providing `-Poozie-package,deploy,run` maven parameters.
+
+The `deploy` profile supplements packaging process with:
+1) uploading oozie-package via scp to `/home/${user.name}/oozie-packages` directory on `${dhp.hadoop.frontend.host.name}` machine
+2) extracting uploaded package
+3) uploading oozie content to hadoop cluster HDFS location defined in `oozie.wf.application.path` property (generated dynamically by maven build process, based on `${dhp.hadoop.frontend.user.name}` and `workflow.source.dir` properties)
+
+The `run` profile introduces:
+1) executing oozie application uploaded to HDFS cluster using `deploy` command. Triggers `run_workflow.sh` script providing runtime properties defined in `job.properties` file.
+
+Notice: ssh access to frontend machine has to be configured on system level and it is preferable to set key-based authentication in order to simplify remote operations.
\ No newline at end of file
diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml
index 012ff89a3..44165995d 100644
--- a/dhp-build/dhp-build-assembly-resources/pom.xml
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-build-assembly-resources
diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
index 256017e2c..7579bdf45 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-build-properties-maven-plugin
diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml
index 77aa2aedb..5a86efe17 100644
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp-code-style
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
jar
@@ -22,9 +22,20 @@
dnet45-releases
https://maven.d4science.org/nexus/content/repositories/dnet45-releases
+
+ DHPSite
+ ${dhp.site.stage.path}/dhp-build/dhp-code-style
+
+
+
+ org.apache.maven.wagon
+ wagon-ssh
+ 2.10
+
+
@@ -35,14 +46,19 @@
org.apache.maven.plugins
maven-site-plugin
- 3.7.1
+ 3.9.1
+
+ true
+
+
UTF-8
+ sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop
\ No newline at end of file
diff --git a/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf
new file mode 100644
index 000000000..0b5dbe0b4
--- /dev/null
+++ b/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf
@@ -0,0 +1,21 @@
+style = defaultWithAlign
+
+align.openParenCallSite = false
+align.openParenDefnSite = false
+align.tokens = [{code = "->"}, {code = "<-"}, {code = "=>", owner = "Case"}]
+continuationIndent.callSite = 2
+continuationIndent.defnSite = 2
+danglingParentheses = true
+indentOperator = spray
+maxColumn = 120
+newlines.alwaysBeforeTopLevelStatements = true
+project.excludeFilters = [".*\\.sbt"]
+rewrite.rules = [AvoidInfix]
+rewrite.rules = [ExpandImportSelectors]
+rewrite.rules = [RedundantBraces]
+rewrite.rules = [RedundantParens]
+rewrite.rules = [SortImports]
+rewrite.rules = [SortModifiers]
+rewrite.rules = [PreferCurlyFors]
+spaces.inImportCurlyBraces = false
+unindentTopLevelOperators = true
\ No newline at end of file
diff --git a/dhp-build/dhp-code-style/src/site/site.xml b/dhp-build/dhp-code-style/src/site/site.xml
new file mode 100644
index 000000000..634a2c154
--- /dev/null
+++ b/dhp-build/dhp-code-style/src/site/site.xml
@@ -0,0 +1,21 @@
+
+
+
+ org.apache.maven.skins
+ maven-fluido-skin
+ 1.8
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml
index 12b999b9c..9040ea94e 100644
--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@@ -4,12 +4,15 @@
eu.dnetlib.dhp
dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
dhp-build
pom
This module is a container for the build tools used in dnet-hadoop
+
+ true
+
dhp-code-style
@@ -17,4 +20,12 @@
dhp-build-properties-maven-plugin
+
+
+
+ DHPSite
+ ${dhp.site.stage.path}/dhp-build/
+
+
+
diff --git a/dhp-build/src/site/site.xml b/dhp-build/src/site/site.xml
new file mode 100644
index 000000000..2d9d769a2
--- /dev/null
+++ b/dhp-build/src/site/site.xml
@@ -0,0 +1,22 @@
+
+
+
+ org.apache.maven.skins
+ maven-fluido-skin
+ 1.8
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index c057123b1..6198bd81e 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp
- 1.2.4-SNAPSHOT
+ 1.2.5-SNAPSHOT
../pom.xml
@@ -13,9 +13,60 @@
dhp-common
jar
+
+
+ DHPSite
+ ${dhp.site.stage.path}/dhp-common
+
+
+
This module contains common utilities meant to be used across the dnet-hadoop submodules
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ ${net.alchim31.maven.version}
+
+
+ scala-compile-first
+ initialize
+
+ add-source
+ compile
+
+
+
+ scala-test-compile
+ process-test-resources
+
+ testCompile
+
+
+
+ scala-doc
+ process-resources
+
+ doc
+
+
+
+
+ true
+ ${scala.binary.version}
+ ${scala.version}
+
+
+
+
+
+
+ eu.dnetlib.dhp
+ dhp-pace-core
+ ${project.version}
+
org.apache.hadoop
@@ -32,11 +83,11 @@
org.apache.spark
- spark-core_2.11
+ spark-core_${scala.binary.version}
org.apache.spark
- spark-sql_2.11
+ spark-sql_${scala.binary.version}
@@ -98,11 +149,6 @@
okhttp
-
- eu.dnetlib
- dnet-pace-core
-
-
org.apache.httpcomponents
httpclient
@@ -115,7 +161,7 @@
eu.dnetlib.dhp
- dhp-schemas
+ ${dhp-schemas.artifact}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
index a62a0ac79..0477d6399 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java
@@ -10,6 +10,12 @@ public class Constants {
public static final Map accessRightsCoarMap = Maps.newHashMap();
public static final Map coarCodeLabelMap = Maps.newHashMap();
+ public static final String ROR_NS_PREFIX = "ror_________";
+
+ public static final String ROR_OPENAIRE_ID = "10|openaire____::993a7ae7a863813cf95028b50708e222";
+
+ public static final String ROR_DATASOURCE_NAME = "Research Organization Registry (ROR)";
+
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
private Constants() {
@@ -45,6 +51,7 @@ public class Constants {
public static final String RETRY_DELAY = "retryDelay";
public static final String CONNECT_TIMEOUT = "connectTimeOut";
public static final String READ_TIMEOUT = "readTimeOut";
+ public static final String REQUEST_METHOD = "requestMethod";
public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
deleted file mode 100644
index 8ceee5c8a..000000000
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java
+++ /dev/null
@@ -1,413 +0,0 @@
-
-package eu.dnetlib.dhp.common;
-
-import java.io.Serializable;
-import java.util.*;
-import java.util.stream.Collectors;
-
-import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.dump.oaf.*;
-import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityInstance;
-import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult;
-import eu.dnetlib.dhp.schema.oaf.DataInfo;
-import eu.dnetlib.dhp.schema.oaf.Field;
-import eu.dnetlib.dhp.schema.oaf.Journal;
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-
-public class GraphResultMapper implements Serializable {
-
- public static Result map(
- E in) {
-
- CommunityResult out = new CommunityResult();
-
- eu.dnetlib.dhp.schema.oaf.Result input = (eu.dnetlib.dhp.schema.oaf.Result) in;
- Optional ort = Optional.ofNullable(input.getResulttype());
- if (ort.isPresent()) {
- switch (ort.get().getClassid()) {
- case "publication":
- Optional journal = Optional
- .ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
- if (journal.isPresent()) {
- Journal j = journal.get();
- Container c = new Container();
- c.setConferencedate(j.getConferencedate());
- c.setConferenceplace(j.getConferenceplace());
- c.setEdition(j.getEdition());
- c.setEp(j.getEp());
- c.setIss(j.getIss());
- c.setIssnLinking(j.getIssnLinking());
- c.setIssnOnline(j.getIssnOnline());
- c.setIssnPrinted(j.getIssnPrinted());
- c.setName(j.getName());
- c.setSp(j.getSp());
- c.setVol(j.getVol());
- out.setContainer(c);
- out.setType(ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE.getClassname());
- }
- break;
- case "dataset":
- eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
- Optional.ofNullable(id.getSize()).ifPresent(v -> out.setSize(v.getValue()));
- Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
-
- out
- .setGeolocation(
- Optional
- .ofNullable(id.getGeolocation())
- .map(
- igl -> igl
- .stream()
- .filter(Objects::nonNull)
- .map(gli -> {
- GeoLocation gl = new GeoLocation();
- gl.setBox(gli.getBox());
- gl.setPlace(gli.getPlace());
- gl.setPoint(gli.getPoint());
- return gl;
- })
- .collect(Collectors.toList()))
- .orElse(null));
-
- out.setType(ModelConstants.DATASET_DEFAULT_RESULTTYPE.getClassname());
- break;
- case "software":
-
- eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
- Optional
- .ofNullable(is.getCodeRepositoryUrl())
- .ifPresent(value -> out.setCodeRepositoryUrl(value.getValue()));
- Optional
- .ofNullable(is.getDocumentationUrl())
- .ifPresent(
- value -> out
- .setDocumentationUrl(
- value
- .stream()
- .map(Field::getValue)
- .collect(Collectors.toList())));
-
- Optional
- .ofNullable(is.getProgrammingLanguage())
- .ifPresent(value -> out.setProgrammingLanguage(value.getClassid()));
-
- out.setType(ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE.getClassname());
- break;
- case "other":
-
- eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
- out
- .setContactgroup(
- Optional
- .ofNullable(ir.getContactgroup())
- .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
- .orElse(null));
-
- out
- .setContactperson(
- Optional
- .ofNullable(ir.getContactperson())
- .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
- .orElse(null));
- out
- .setTool(
- Optional
- .ofNullable(ir.getTool())
- .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
- .orElse(null));
-
- out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
-
- break;
- }
-
- Optional
- .ofNullable(input.getAuthor())
- .ifPresent(
- ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList())));
-
- // I do not map Access Right UNKNOWN or OTHER
-
- Optional oar = Optional.ofNullable(input.getBestaccessright());
- if (oar.isPresent()) {
- if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
- String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
- out
- .setBestaccessright(
- AccessRight
- .newInstance(
- code,
- Constants.coarCodeLabelMap.get(code),
- Constants.COAR_ACCESS_RIGHT_SCHEMA));
- }
- }
-
- final List contributorList = new ArrayList<>();
- Optional
- .ofNullable(input.getContributor())
- .ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
- out.setContributor(contributorList);
-
- Optional
- .ofNullable(input.getCountry())
- .ifPresent(
- value -> out
- .setCountry(
- value
- .stream()
- .map(
- c -> {
- if (c.getClassid().equals((ModelConstants.UNKNOWN))) {
- return null;
- }
- Country country = new Country();
- country.setCode(c.getClassid());
- country.setLabel(c.getClassname());
- Optional
- .ofNullable(c.getDataInfo())
- .ifPresent(
- provenance -> country
- .setProvenance(
- Provenance
- .newInstance(
- provenance
- .getProvenanceaction()
- .getClassname(),
- c.getDataInfo().getTrust())));
- return country;
- })
- .filter(Objects::nonNull)
- .collect(Collectors.toList())));
-
- final List coverageList = new ArrayList<>();
- Optional
- .ofNullable(input.getCoverage())
- .ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
- out.setCoverage(coverageList);
-
- out.setDateofcollection(input.getDateofcollection());
-
- final List descriptionList = new ArrayList<>();
- Optional
- .ofNullable(input.getDescription())
- .ifPresent(value -> value.forEach(d -> descriptionList.add(d.getValue())));
- out.setDescription(descriptionList);
- Optional> oStr = Optional.ofNullable(input.getEmbargoenddate());
- if (oStr.isPresent()) {
- out.setEmbargoenddate(oStr.get().getValue());
- }
-
- final List formatList = new ArrayList<>();
- Optional
- .ofNullable(input.getFormat())
- .ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
- out.setFormat(formatList);
- out.setId(input.getId());
- out.setOriginalId(input.getOriginalId());
-
- Optional> oInst = Optional
- .ofNullable(input.getInstance());
-
- if (oInst.isPresent()) {
- out
- .setInstance(
- oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList()));
-
- }
-
- Optional oL = Optional.ofNullable(input.getLanguage());
- if (oL.isPresent()) {
- eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
- out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
- }
- Optional oLong = Optional.ofNullable(input.getLastupdatetimestamp());
- if (oLong.isPresent()) {
- out.setLastupdatetimestamp(oLong.get());
- }
- Optional> otitle = Optional.ofNullable(input.getTitle());
- if (otitle.isPresent()) {
- List iTitle = otitle
- .get()
- .stream()
- .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
- .collect(Collectors.toList());
- if (!iTitle.isEmpty()) {
- out.setMaintitle(iTitle.get(0).getValue());
- }
-
- iTitle = otitle
- .get()
- .stream()
- .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
- .collect(Collectors.toList());
- if (!iTitle.isEmpty()) {
- out.setSubtitle(iTitle.get(0).getValue());
- }
-
- }
-
- List pids = new ArrayList<>();
- Optional
- .ofNullable(input.getPid())
- .ifPresent(
- value -> value
- .stream()
- .forEach(
- p -> pids
- .add(
- ControlledField
- .newInstance(p.getQualifier().getClassid(), p.getValue()))));
- out.setPid(pids);
- oStr = Optional.ofNullable(input.getDateofacceptance());
- if (oStr.isPresent()) {
- out.setPublicationdate(oStr.get().getValue());
- }
- oStr = Optional.ofNullable(input.getPublisher());
- if (oStr.isPresent()) {
- out.setPublisher(oStr.get().getValue());
- }
-
- List sourceList = new ArrayList<>();
- Optional
- .ofNullable(input.getSource())
- .ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
- // out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
- List subjectList = new ArrayList<>();
- Optional
- .ofNullable(input.getSubject())
- .ifPresent(
- value -> value
- .forEach(s -> subjectList.add(getSubject(s))));
-
- out.setSubjects(subjectList);
-
- out.setType(input.getResulttype().getClassid());
- }
-
- out
- .setCollectedfrom(
- input
- .getCollectedfrom()
- .stream()
- .map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
- .collect(Collectors.toList()));
-
- return out;
-
- }
-
- private static CommunityInstance getInstance(eu.dnetlib.dhp.schema.oaf.Instance i) {
- CommunityInstance instance = new CommunityInstance();
-
- setCommonValue(i, instance);
-
- instance
- .setCollectedfrom(
- KeyValue
- .newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
-
- instance
- .setHostedby(
- KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
-
- return instance;
-
- }
-
- private static void setCommonValue(eu.dnetlib.dhp.schema.oaf.Instance i, I instance) {
- Optional opAr = Optional
- .ofNullable(i.getAccessright());
- if (opAr.isPresent()) {
- if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
- String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
- instance
- .setAccessright(
- AccessRight
- .newInstance(
- code,
- Constants.coarCodeLabelMap.get(code),
- Constants.COAR_ACCESS_RIGHT_SCHEMA));
- }
- }
-
- Optional
- .ofNullable(i.getLicense())
- .ifPresent(value -> instance.setLicense(value.getValue()));
- Optional
- .ofNullable(i.getDateofacceptance())
- .ifPresent(value -> instance.setPublicationdate(value.getValue()));
- Optional
- .ofNullable(i.getRefereed())
- .ifPresent(value -> instance.setRefereed(value.getClassname()));
- Optional
- .ofNullable(i.getInstancetype())
- .ifPresent(value -> instance.setType(value.getClassname()));
- Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
-
- }
-
- private static Subject getSubject(StructuredProperty s) {
- Subject subject = new Subject();
- subject.setSubject(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()));
- Optional di = Optional.ofNullable(s.getDataInfo());
- if (di.isPresent()) {
- Provenance p = new Provenance();
- p.setProvenance(di.get().getProvenanceaction().getClassname());
- p.setTrust(di.get().getTrust());
- subject.setProvenance(p);
- }
-
- return subject;
- }
-
- private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
- Author a = new Author();
- a.setFullname(oa.getFullname());
- a.setName(oa.getName());
- a.setSurname(oa.getSurname());
- a.setRank(oa.getRank());
-
- Optional> oPids = Optional
- .ofNullable(oa.getPid());
- if (oPids.isPresent()) {
- Pid pid = getOrcid(oPids.get());
- if (pid != null) {
- a.setPid(pid);
- }
- }
-
- return a;
- }
-
- private static Pid getOrcid(List p) {
- for (StructuredProperty pid : p) {
- if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
- Optional di = Optional.ofNullable(pid.getDataInfo());
- if (di.isPresent()) {
- return Pid
- .newInstance(
- ControlledField
- .newInstance(
- pid.getQualifier().getClassid(),
- pid.getValue()),
- Provenance
- .newInstance(
- di.get().getProvenanceaction().getClassname(),
- di.get().getTrust()));
- } else {
- return Pid
- .newInstance(
- ControlledField
- .newInstance(
- pid.getQualifier().getClassid(),
- pid.getValue())
-
- );
- }
-
- }
- }
- return null;
- }
-
-}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MDStoreInfo.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MDStoreInfo.java
new file mode 100644
index 000000000..bd1ccca50
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MDStoreInfo.java
@@ -0,0 +1,100 @@
+
+package eu.dnetlib.dhp.common;
+
+/**
+ * This utility represent the Metadata Store information
+ * needed during the migration from mongo to HDFS to store
+ */
+public class MDStoreInfo {
+ private String mdstore;
+ private String currentId;
+ private Long latestTimestamp;
+
+ /**
+ * Instantiates a new Md store info.
+ */
+ public MDStoreInfo() {
+ }
+
+ /**
+ * Instantiates a new Md store info.
+ *
+ * @param mdstore the mdstore
+ * @param currentId the current id
+ * @param latestTimestamp the latest timestamp
+ */
+ public MDStoreInfo(String mdstore, String currentId, Long latestTimestamp) {
+ this.mdstore = mdstore;
+ this.currentId = currentId;
+ this.latestTimestamp = latestTimestamp;
+ }
+
+ /**
+ * Gets mdstore.
+ *
+ * @return the mdstore
+ */
+ public String getMdstore() {
+ return mdstore;
+ }
+
+ /**
+ * Sets mdstore.
+ *
+ * @param mdstore the mdstore
+ * @return the mdstore
+ */
+ public MDStoreInfo setMdstore(String mdstore) {
+ this.mdstore = mdstore;
+ return this;
+ }
+
+ /**
+ * Gets current id.
+ *
+ * @return the current id
+ */
+ public String getCurrentId() {
+ return currentId;
+ }
+
+ /**
+ * Sets current id.
+ *
+ * @param currentId the current id
+ * @return the current id
+ */
+ public MDStoreInfo setCurrentId(String currentId) {
+ this.currentId = currentId;
+ return this;
+ }
+
+ /**
+ * Gets latest timestamp.
+ *
+ * @return the latest timestamp
+ */
+ public Long getLatestTimestamp() {
+ return latestTimestamp;
+ }
+
+ /**
+ * Sets latest timestamp.
+ *
+ * @param latestTimestamp the latest timestamp
+ * @return the latest timestamp
+ */
+ public MDStoreInfo setLatestTimestamp(Long latestTimestamp) {
+ this.latestTimestamp = latestTimestamp;
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ return "MDStoreInfo{" +
+ "mdstore='" + mdstore + '\'' +
+ ", currentId='" + currentId + '\'' +
+ ", latestTimestamp=" + latestTimestamp +
+ '}';
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
index abb9dc148..eca433e9e 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
@@ -5,13 +5,71 @@ import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
+import java.util.Optional;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
public class MakeTarArchive implements Serializable {
+ private static final Logger log = LoggerFactory.getLogger(MakeTarArchive.class);
+
+ public static void main(String[] args) throws Exception {
+ String jsonConfiguration = IOUtils
+ .toString(
+ MakeTarArchive.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/common/input_maketar_parameters.json"));
+
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ final String outputPath = parser.get("hdfsPath");
+ log.info("hdfsPath: {}", outputPath);
+
+ final String hdfsNameNode = parser.get("nameNode");
+ log.info("nameNode: {}", hdfsNameNode);
+
+ final String inputPath = parser.get("sourcePath");
+ log.info("input path : {}", inputPath);
+
+ final int gBperSplit = Optional
+ .ofNullable(parser.get("splitSize"))
+ .map(Integer::valueOf)
+ .orElse(10);
+
+ Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", hdfsNameNode);
+
+ FileSystem fileSystem = FileSystem.get(conf);
+
+ makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit);
+
+ }
+
+ public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
+ throws IOException {
+
+ RemoteIterator dirIterator = fileSystem.listLocatedStatus(new Path(inputPath));
+
+ while (dirIterator.hasNext()) {
+ LocatedFileStatus fileStatus = dirIterator.next();
+
+ Path p = fileStatus.getPath();
+ String pathString = p.toString();
+ String entity = pathString.substring(pathString.lastIndexOf("/") + 1);
+
+ MakeTarArchive.tarMaxSize(fileSystem, pathString, outputPath + "/" + entity, entity, gBperSplit);
+ }
+ }
+
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
Path hdfsWritePath = new Path(outputPath);
if (fileSystem.exists(hdfsWritePath)) {
@@ -21,7 +79,7 @@ public class MakeTarArchive implements Serializable {
return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
}
- private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
+ private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dirName)
throws IOException {
Path hdfsWritePath = new Path(outputPath);
@@ -37,7 +95,7 @@ public class MakeTarArchive implements Serializable {
new Path(inputPath), true);
while (iterator.hasNext()) {
- writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
+ writeCurrentFile(fileSystem, dirName, iterator, ar, 0);
}
}
@@ -59,32 +117,30 @@ public class MakeTarArchive implements Serializable {
new Path(inputPath), true);
boolean next = fileStatusListIterator.hasNext();
while (next) {
- TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar");
+ try (TarArchiveOutputStream ar = getTar(fileSystem, outputPath + "_" + (partNum + 1) + ".tar")) {
- long current_size = 0;
- while (next && current_size < bytesPerSplit) {
- current_size = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, current_size);
- next = fileStatusListIterator.hasNext();
+ long currentSize = 0;
+ while (next && currentSize < bytesPerSplit) {
+ currentSize = writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, currentSize);
+ next = fileStatusListIterator.hasNext();
+ }
+
+ partNum += 1;
}
-
- partNum += 1;
- ar.close();
}
-
}
-
}
- private static long writeCurrentFile(FileSystem fileSystem, String dir_name,
+ private static long writeCurrentFile(FileSystem fileSystem, String dirName,
RemoteIterator fileStatusListIterator,
- TarArchiveOutputStream ar, long current_size) throws IOException {
+ TarArchiveOutputStream ar, long currentSize) throws IOException {
LocatedFileStatus fileStatus = fileStatusListIterator.next();
Path p = fileStatus.getPath();
- String p_string = p.toString();
- if (!p_string.endsWith("_SUCCESS")) {
- String name = p_string.substring(p_string.lastIndexOf("/") + 1);
+ String pString = p.toString();
+ if (!pString.endsWith("_SUCCESS")) {
+ String name = pString.substring(pString.lastIndexOf("/") + 1);
if (name.startsWith("part-") & name.length() > 10) {
String tmp = name.substring(0, 10);
if (name.contains(".")) {
@@ -92,9 +148,9 @@ public class MakeTarArchive implements Serializable {
}
name = tmp;
}
- TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
+ TarArchiveEntry entry = new TarArchiveEntry(dirName + "/" + name);
entry.setSize(fileStatus.getLen());
- current_size += fileStatus.getLen();
+ currentSize += fileStatus.getLen();
ar.putArchiveEntry(entry);
InputStream is = fileSystem.open(fileStatus.getPath());
@@ -110,7 +166,7 @@ public class MakeTarArchive implements Serializable {
ar.closeArchiveEntry();
}
- return current_size;
+ return currentSize;
}
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java
index d06544ae1..34aa37be5 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java
@@ -1,12 +1,12 @@
package eu.dnetlib.dhp.common;
+import static com.mongodb.client.model.Sorts.descending;
+
import java.io.Closeable;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Optional;
+import java.util.*;
+import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
@@ -38,6 +38,26 @@ public class MdstoreClient implements Closeable {
this.db = getDb(client, dbName);
}
+ private Long parseTimestamp(Document f) {
+ if (f == null || !f.containsKey("timestamp"))
+ return null;
+
+ Object ts = f.get("timestamp");
+
+ return Long.parseLong(ts.toString());
+ }
+
+ public Long getLatestTimestamp(final String collectionId) {
+ MongoCollection collection = db.getCollection(collectionId);
+ FindIterable result = collection.find().sort(descending("timestamp")).limit(1);
+ if (result == null) {
+ return null;
+ }
+
+ Document f = result.first();
+ return parseTimestamp(f);
+ }
+
public MongoCollection mdStore(final String mdId) {
BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
@@ -54,6 +74,16 @@ public class MdstoreClient implements Closeable {
return getColl(db, currentId, true);
}
+ public List mdStoreWithTimestamp(final String mdFormat, final String mdLayout,
+ final String mdInterpretation) {
+ Map res = validCollections(mdFormat, mdLayout, mdInterpretation);
+ return res
+ .entrySet()
+ .stream()
+ .map(e -> new MDStoreInfo(e.getKey(), e.getValue(), getLatestTimestamp(e.getValue())))
+ .collect(Collectors.toList());
+ }
+
public Map validCollections(
final String mdFormat, final String mdLayout, final String mdInterpretation) {
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
index 91c6c1825..fac9a7565 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java
@@ -1,18 +1,18 @@
package eu.dnetlib.dhp.common;
+import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
+import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.text.WordUtils;
+import com.ctc.wstx.dtd.LargePrefixedNameSet;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
-import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.hash.Hashing;
@@ -29,7 +29,19 @@ public class PacePerson {
private List fullname = Lists.newArrayList();
private final String original;
- private static Set particles = null;
+ private static Set particles;
+
+ static {
+ try {
+ particles = new HashSet<>(IOUtils
+ .readLines(
+ PacePerson.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/common/name_particles.txt")));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
/**
* Capitalizes a string
@@ -37,29 +49,20 @@ public class PacePerson {
* @param s the string to capitalize
* @return the input string with capital letter
*/
- public static final String capitalize(final String s) {
+ public static String capitalize(final String s) {
+ if (particles.contains(s)) {
+ return s;
+ }
return WordUtils.capitalize(s.toLowerCase(), ' ', '-');
}
/**
* Adds a dot to a string with length equals to 1
*/
- public static final String dotAbbreviations(final String s) {
+ public static String dotAbbreviations(final String s) {
return s.length() == 1 ? s + "." : s;
}
- public static Set loadFromClasspath(final String classpath) {
- final Set h = new HashSet<>();
- try {
- for (final String s : IOUtils.readLines(PacePerson.class.getResourceAsStream(classpath))) {
- h.add(s);
- }
- } catch (final Throwable e) {
- return new HashSet<>();
- }
- return h;
- }
-
/**
* The constructor of the class. It fills the fields of the class basing on the input fullname.
*
@@ -128,10 +131,6 @@ public class PacePerson {
}
private List splitTerms(final String s) {
- if (particles == null) {
- particles = loadFromClasspath("/eu/dnetlib/dhp/oa/graph/pace/name_particles.txt");
- }
-
final List list = Lists.newArrayList();
for (final String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
if (!particles.contains(part.toLowerCase())) {
@@ -187,17 +186,36 @@ public class PacePerson {
}
public List getCapitalFirstnames() {
- return Lists
- .newArrayList(
- Iterables.transform(getNameWithAbbreviations(), PacePerson::capitalize));
+ return Optional
+ .ofNullable(getNameWithAbbreviations())
+ .map(
+ name -> name
+ .stream()
+ .map(PacePerson::capitalize)
+ .collect(Collectors.toList()))
+ .orElse(new ArrayList<>());
}
public List getCapitalSurname() {
- return Lists.newArrayList(Iterables.transform(surname, PacePerson::capitalize));
+ return Optional
+ .ofNullable(getSurname())
+ .map(
+ surname -> surname
+ .stream()
+ .map(PacePerson::capitalize)
+ .collect(Collectors.toList()))
+ .orElse(new ArrayList<>());
}
public List getNameWithAbbreviations() {
- return Lists.newArrayList(Iterables.transform(name, PacePerson::dotAbbreviations));
+ return Optional
+ .ofNullable(getName())
+ .map(
+ name -> name
+ .stream()
+ .map(PacePerson::dotAbbreviations)
+ .collect(Collectors.toList()))
+ .orElse(new ArrayList<>());
}
public boolean isAccurate() {
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java
new file mode 100644
index 000000000..5d39216f1
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java
@@ -0,0 +1,81 @@
+
+package eu.dnetlib.dhp.common.action;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.common.DbClient;
+import eu.dnetlib.dhp.common.action.model.MasterDuplicate;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+
+public class ReadDatasourceMasterDuplicateFromDB {
+
+ private static final Logger log = LoggerFactory.getLogger(ReadDatasourceMasterDuplicateFromDB.class);
+
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ private static final String QUERY = "SELECT distinct dd.id as masterId, d.officialname as masterName, dd.duplicate as duplicateId "
+ +
+ "FROM dsm_dedup_services dd join dsm_services d on (dd.id = d.id);";
+
+ public static int execute(String dbUrl, String dbUser, String dbPassword, String hdfsPath, String hdfsNameNode)
+ throws IOException {
+ int count = 0;
+ try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
+ Configuration conf = new Configuration();
+ conf.set("fs.defaultFS", hdfsNameNode);
+ FileSystem fileSystem = FileSystem.get(conf);
+ FSDataOutputStream fos = fileSystem.create(new Path(hdfsPath));
+
+ log.info("running query: {}", QUERY);
+ log.info("storing results in: {}", hdfsPath);
+
+ try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8))) {
+ dbClient.processResults(QUERY, rs -> writeMap(datasourceMasterMap(rs), writer));
+ count++;
+ }
+ }
+ return count;
+ }
+
+ private static MasterDuplicate datasourceMasterMap(ResultSet rs) {
+ try {
+ final MasterDuplicate md = new MasterDuplicate();
+
+ final String duplicateId = rs.getString("duplicateId");
+ final String masterId = rs.getString("masterId");
+ final String masterName = rs.getString("masterName");
+
+ md.setDuplicateId(OafMapperUtils.createOpenaireId(10, duplicateId, true));
+ md.setMasterId(OafMapperUtils.createOpenaireId(10, masterId, true));
+ md.setMasterName(masterName);
+
+ return md;
+ } catch (final SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void writeMap(final MasterDuplicate dm, final BufferedWriter writer) {
+ try {
+ writer.write(OBJECT_MAPPER.writeValueAsString(dm));
+ writer.newLine();
+ } catch (final IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java
new file mode 100644
index 000000000..12a4407c4
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/model/MasterDuplicate.java
@@ -0,0 +1,38 @@
+
+package eu.dnetlib.dhp.common.action.model;
+
+import java.io.Serializable;
+
+/**
+ * @author miriam.baglioni
+ * @Date 21/07/22
+ */
+public class MasterDuplicate implements Serializable {
+ private String duplicateId;
+ private String masterId;
+ private String masterName;
+
+ public String getDuplicateId() {
+ return duplicateId;
+ }
+
+ public void setDuplicateId(String duplicateId) {
+ this.duplicateId = duplicateId;
+ }
+
+ public String getMasterId() {
+ return masterId;
+ }
+
+ public void setMasterId(String masterId) {
+ this.masterId = masterId;
+ }
+
+ public String getMasterName() {
+ return masterName;
+ }
+
+ public void setMasterName(String masterName) {
+ this.masterName = masterName;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
index 3f5c6ad4a..544da78f5 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@@ -3,10 +3,13 @@ package eu.dnetlib.dhp.common.api;
import java.io.*;
import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType;
+import org.jetbrains.annotations.NotNull;
import com.google.gson.Gson;
@@ -60,33 +63,31 @@ public class ZenodoAPIClient implements Serializable {
*/
public int newDeposition() throws IOException {
String json = "{}";
- OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
-
- RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
-
- Request request = new Request.Builder()
- .url(urlString)
- .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
- .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
- .post(body)
- .build();
-
- try (Response response = httpClient.newCall(request).execute()) {
-
- if (!response.isSuccessful())
- throw new IOException("Unexpected code " + response + response.body().string());
-
- // Get response body
- json = response.body().string();
-
- ZenodoModel newSubmission = new Gson().fromJson(json, ZenodoModel.class);
- this.bucket = newSubmission.getLinks().getBucket();
- this.deposition_id = newSubmission.getId();
-
- return response.code();
+ URL url = new URL(urlString);
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+ conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+ conn.setRequestMethod("POST");
+ conn.setDoOutput(true);
+ try (OutputStream os = conn.getOutputStream()) {
+ byte[] input = json.getBytes("utf-8");
+ os.write(input, 0, input.length);
}
+ String body = getBody(conn);
+
+ int responseCode = conn.getResponseCode();
+ conn.disconnect();
+
+ if (!checkOKStatus(responseCode))
+ throw new IOException("Unexpected code " + responseCode + body);
+
+ ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
+ this.bucket = newSubmission.getLinks().getBucket();
+ this.deposition_id = newSubmission.getId();
+
+ return responseCode;
}
/**
@@ -94,28 +95,48 @@ public class ZenodoAPIClient implements Serializable {
*
* @param is the inputStream for the file to upload
* @param file_name the name of the file as it will appear on Zenodo
- * @param len the size of the file
* @return the response code
*/
- public int uploadIS(InputStream is, String file_name, long len) throws IOException {
- OkHttpClient httpClient = new OkHttpClient.Builder()
- .writeTimeout(600, TimeUnit.SECONDS)
- .readTimeout(600, TimeUnit.SECONDS)
- .connectTimeout(600, TimeUnit.SECONDS)
- .build();
+ public int uploadIS(InputStream is, String file_name) throws IOException {
- Request request = new Request.Builder()
- .url(bucket + "/" + file_name)
- .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
- .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
- .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
- .build();
+ URL url = new URL(bucket + "/" + file_name);
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, "application/zip");
+ conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+ conn.setDoOutput(true);
+ conn.setRequestMethod("PUT");
+
+ byte[] buf = new byte[8192];
+ int length;
+ try (OutputStream os = conn.getOutputStream()) {
+ while ((length = is.read(buf)) != -1) {
+ os.write(buf, 0, length);
+ }
- try (Response response = httpClient.newCall(request).execute()) {
- if (!response.isSuccessful())
- throw new IOException("Unexpected code " + response + response.body().string());
- return response.code();
}
+ int responseCode = conn.getResponseCode();
+ if (!checkOKStatus(responseCode)) {
+ throw new IOException("Unexpected code " + responseCode + getBody(conn));
+ }
+
+ return responseCode;
+ }
+
+ @NotNull
+ private String getBody(HttpURLConnection conn) throws IOException {
+ String body = "{}";
+ try (BufferedReader br = new BufferedReader(
+ new InputStreamReader(conn.getInputStream(), "utf-8"))) {
+ StringBuilder response = new StringBuilder();
+ String responseLine = null;
+ while ((responseLine = br.readLine()) != null) {
+ response.append(responseLine.trim());
+ }
+
+ body = response.toString();
+
+ }
+ return body;
}
/**
@@ -127,26 +148,34 @@ public class ZenodoAPIClient implements Serializable {
*/
public int sendMretadata(String metadata) throws IOException {
- OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+ URL url = new URL(urlString + "/" + deposition_id);
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+ conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+ conn.setDoOutput(true);
+ conn.setRequestMethod("PUT");
- RequestBody body = RequestBody.create(metadata, MEDIA_TYPE_JSON);
-
- Request request = new Request.Builder()
- .url(urlString + "/" + deposition_id)
- .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
- .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
- .put(body)
- .build();
-
- try (Response response = httpClient.newCall(request).execute()) {
-
- if (!response.isSuccessful())
- throw new IOException("Unexpected code " + response + response.body().string());
-
- return response.code();
+ try (OutputStream os = conn.getOutputStream()) {
+ byte[] input = metadata.getBytes("utf-8");
+ os.write(input, 0, input.length);
}
+ final int responseCode = conn.getResponseCode();
+ conn.disconnect();
+ if (!checkOKStatus(responseCode))
+ throw new IOException("Unexpected code " + responseCode + getBody(conn));
+
+ return responseCode;
+
+ }
+
+ private boolean checkOKStatus(int responseCode) {
+
+ if (HttpURLConnection.HTTP_OK != responseCode ||
+ HttpURLConnection.HTTP_CREATED != responseCode)
+ return true;
+ return false;
}
/**
@@ -155,6 +184,7 @@ public class ZenodoAPIClient implements Serializable {
* @return response code
* @throws IOException
*/
+ @Deprecated
public int publish() throws IOException {
String json = "{}";
@@ -191,31 +221,37 @@ public class ZenodoAPIClient implements Serializable {
* @throws MissingConceptDoiException
*/
public int newVersion(String concept_rec_id) throws IOException, MissingConceptDoiException {
- setDepositionId(concept_rec_id);
+ setDepositionId(concept_rec_id, 1);
String json = "{}";
- OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+ URL url = new URL(urlString + "/" + deposition_id + "/actions/newversion");
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
- RequestBody body = RequestBody.create(json, MEDIA_TYPE_JSON);
+ conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+ conn.setDoOutput(true);
+ conn.setRequestMethod("POST");
- Request request = new Request.Builder()
- .url(urlString + "/" + deposition_id + "/actions/newversion")
- .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
- .post(body)
- .build();
-
- try (Response response = httpClient.newCall(request).execute()) {
-
- if (!response.isSuccessful())
- throw new IOException("Unexpected code " + response + response.body().string());
-
- ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
- String latest_draft = zenodoModel.getLinks().getLatest_draft();
- deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
- bucket = getBucket(latest_draft);
- return response.code();
+ try (OutputStream os = conn.getOutputStream()) {
+ byte[] input = json.getBytes("utf-8");
+ os.write(input, 0, input.length);
}
+
+ String body = getBody(conn);
+
+ int responseCode = conn.getResponseCode();
+
+ conn.disconnect();
+ if (!checkOKStatus(responseCode))
+ throw new IOException("Unexpected code " + responseCode + body);
+
+ ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+ String latest_draft = zenodoModel.getLinks().getLatest_draft();
+ deposition_id = latest_draft.substring(latest_draft.lastIndexOf("/") + 1);
+ bucket = getBucket(latest_draft);
+
+ return responseCode;
+
}
/**
@@ -233,29 +269,38 @@ public class ZenodoAPIClient implements Serializable {
this.deposition_id = deposition_id;
- OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+ String json = "{}";
- Request request = new Request.Builder()
- .url(urlString + "/" + deposition_id)
- .addHeader("Authorization", "Bearer " + access_token)
- .build();
-
- try (Response response = httpClient.newCall(request).execute()) {
-
- if (!response.isSuccessful())
- throw new IOException("Unexpected code " + response + response.body().string());
-
- ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
- bucket = zenodoModel.getLinks().getBucket();
- return response.code();
+ URL url = new URL(urlString + "/" + deposition_id);
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+ conn.setRequestMethod("POST");
+ conn.setDoOutput(true);
+ try (OutputStream os = conn.getOutputStream()) {
+ byte[] input = json.getBytes("utf-8");
+ os.write(input, 0, input.length);
}
+ String body = getBody(conn);
+
+ int responseCode = conn.getResponseCode();
+ conn.disconnect();
+
+ if (!checkOKStatus(responseCode))
+ throw new IOException("Unexpected code " + responseCode + body);
+
+ ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
+ bucket = zenodoModel.getLinks().getBucket();
+
+ return responseCode;
+
}
- private void setDepositionId(String concept_rec_id) throws IOException, MissingConceptDoiException {
+ private void setDepositionId(String concept_rec_id, Integer page) throws IOException, MissingConceptDoiException {
- ZenodoModelList zenodoModelList = new Gson().fromJson(getPrevDepositions(), ZenodoModelList.class);
+ ZenodoModelList zenodoModelList = new Gson()
+ .fromJson(getPrevDepositions(String.valueOf(page)), ZenodoModelList.class);
for (ZenodoModel zm : zenodoModelList) {
if (zm.getConceptrecid().equals(concept_rec_id)) {
@@ -263,55 +308,57 @@ public class ZenodoAPIClient implements Serializable {
return;
}
}
-
- throw new MissingConceptDoiException("The concept record id specified was missing in the list of depositions");
+ if (zenodoModelList.size() == 0)
+ throw new MissingConceptDoiException(
+ "The concept record id specified was missing in the list of depositions");
+ setDepositionId(concept_rec_id, page + 1);
}
- private String getPrevDepositions() throws IOException {
- OkHttpClient httpClient = new OkHttpClient.Builder().connectTimeout(600, TimeUnit.SECONDS).build();
+ private String getPrevDepositions(String page) throws IOException {
- Request request = new Request.Builder()
- .url(urlString)
- .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
- .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
- .get()
- .build();
+ HttpUrl.Builder urlBuilder = HttpUrl.parse(urlString).newBuilder();
+ urlBuilder.addQueryParameter("page", page);
- try (Response response = httpClient.newCall(request).execute()) {
+ URL url = new URL(urlBuilder.build().toString());
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+ conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+ conn.setDoOutput(true);
+ conn.setRequestMethod("GET");
- if (!response.isSuccessful())
- throw new IOException("Unexpected code " + response + response.body().string());
+ String body = getBody(conn);
- return response.body().string();
+ int responseCode = conn.getResponseCode();
- }
+ conn.disconnect();
+ if (!checkOKStatus(responseCode))
+ throw new IOException("Unexpected code " + responseCode + body);
+
+ return body;
}
- private String getBucket(String url) throws IOException {
- OkHttpClient httpClient = new OkHttpClient.Builder()
- .connectTimeout(600, TimeUnit.SECONDS)
- .build();
+ private String getBucket(String inputUurl) throws IOException {
- Request request = new Request.Builder()
- .url(url)
- .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
- .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
- .get()
- .build();
+ URL url = new URL(inputUurl);
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString());
+ conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + access_token);
+ conn.setDoOutput(true);
+ conn.setRequestMethod("GET");
- try (Response response = httpClient.newCall(request).execute()) {
+ String body = getBody(conn);
- if (!response.isSuccessful())
- throw new IOException("Unexpected code " + response + response.body().string());
+ int responseCode = conn.getResponseCode();
- // Get response body
- ZenodoModel zenodoModel = new Gson().fromJson(response.body().string(), ZenodoModel.class);
+ conn.disconnect();
+ if (!checkOKStatus(responseCode))
+ throw new IOException("Unexpected code " + responseCode + body);
- return zenodoModel.getLinks().getBucket();
+ ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
- }
+ return zenodoModel.getLinks().getBucket();
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java
new file mode 100644
index 000000000..fff28dbdf
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java
@@ -0,0 +1,39 @@
+
+package eu.dnetlib.dhp.common.api.context;
+
+public class CategorySummary {
+
+ private String id;
+
+ private String label;
+
+ private boolean hasConcept;
+
+ public String getId() {
+ return id;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+
+ public boolean isHasConcept() {
+ return hasConcept;
+ }
+
+ public CategorySummary setId(final String id) {
+ this.id = id;
+ return this;
+ }
+
+ public CategorySummary setLabel(final String label) {
+ this.label = label;
+ return this;
+ }
+
+ public CategorySummary setHasConcept(final boolean hasConcept) {
+ this.hasConcept = hasConcept;
+ return this;
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java
new file mode 100644
index 000000000..7213a945a
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java
@@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.common.api.context;
+
+import java.util.ArrayList;
+
+public class CategorySummaryList extends ArrayList {
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java
new file mode 100644
index 000000000..a576f9a1e
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java
@@ -0,0 +1,52 @@
+
+package eu.dnetlib.dhp.common.api.context;
+
+import java.util.List;
+
+public class ConceptSummary {
+
+ private String id;
+
+ private String label;
+
+ public boolean hasSubConcept;
+
+ private List concepts;
+
+ public String getId() {
+ return id;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+
+ public List getConcepts() {
+ return concepts;
+ }
+
+ public ConceptSummary setId(final String id) {
+ this.id = id;
+ return this;
+ }
+
+ public ConceptSummary setLabel(final String label) {
+ this.label = label;
+ return this;
+ }
+
+ public boolean isHasSubConcept() {
+ return hasSubConcept;
+ }
+
+ public ConceptSummary setHasSubConcept(final boolean hasSubConcept) {
+ this.hasSubConcept = hasSubConcept;
+ return this;
+ }
+
+ public ConceptSummary setConcept(final List concepts) {
+ this.concepts = concepts;
+ return this;
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java
new file mode 100644
index 000000000..45ccd2810
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java
@@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.common.api.context;
+
+import java.util.ArrayList;
+
+public class ConceptSummaryList extends ArrayList {
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java
new file mode 100644
index 000000000..46a0d0d5a
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java
@@ -0,0 +1,50 @@
+
+package eu.dnetlib.dhp.common.api.context;
+
+public class ContextSummary {
+
+ private String id;
+
+ private String label;
+
+ private String type;
+
+ private String status;
+
+ public String getId() {
+ return id;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+
+ public String getType() {
+ return type;
+ }
+
+ public String getStatus() {
+ return status;
+ }
+
+ public ContextSummary setId(final String id) {
+ this.id = id;
+ return this;
+ }
+
+ public ContextSummary setLabel(final String label) {
+ this.label = label;
+ return this;
+ }
+
+ public ContextSummary setType(final String type) {
+ this.type = type;
+ return this;
+ }
+
+ public ContextSummary setStatus(final String status) {
+ this.status = status;
+ return this;
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java
new file mode 100644
index 000000000..618600007
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java
@@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.common.api.context;
+
+import java.util.ArrayList;
+
+public class ContextSummaryList extends ArrayList {
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java
new file mode 100644
index 000000000..8bcf14ba4
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/DecompressTarGz.java
@@ -0,0 +1,40 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.util.zip.GZIPOutputStream;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class DecompressTarGz {
+
+ public static void doExtract(FileSystem fs, String outputPath, String tarGzPath) throws IOException {
+
+ FSDataInputStream inputFileStream = fs.open(new Path(tarGzPath));
+ try (TarArchiveInputStream tais = new TarArchiveInputStream(
+ new GzipCompressorInputStream(inputFileStream))) {
+ TarArchiveEntry entry = null;
+ while ((entry = tais.getNextTarEntry()) != null) {
+ if (!entry.isDirectory()) {
+ try (
+ FSDataOutputStream out = fs
+ .create(new Path(outputPath.concat(entry.getName()).concat(".gz")));
+ GZIPOutputStream gzipOs = new GZIPOutputStream(new BufferedOutputStream(out))) {
+
+ IOUtils.copy(tais, gzipOs);
+
+ }
+
+ }
+ }
+ }
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
index 6fcec00dd..d26d9c0e9 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
@@ -1,6 +1,9 @@
package eu.dnetlib.dhp.common.collection;
+import java.util.HashMap;
+import java.util.Map;
+
/**
* Bundles the http connection parameters driving the client behaviour.
*/
@@ -13,6 +16,8 @@ public class HttpClientParams {
public static int _connectTimeOut = 10; // seconds
public static int _readTimeOut = 30; // seconds
+ public static String _requestMethod = "GET";
+
/**
* Maximum number of allowed retires before failing
*/
@@ -38,17 +43,30 @@ public class HttpClientParams {
*/
private int readTimeOut;
+ /**
+ * Custom http headers
+ */
+ private Map headers;
+
+ /**
+ * Request method (i.e., GET, POST etc)
+ */
+ private String requestMethod;
+
public HttpClientParams() {
- this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
+ this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut, new HashMap<>(),
+ _requestMethod);
}
public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
- int readTimeOut) {
+ int readTimeOut, Map headers, String requestMethod) {
this.maxNumberOfRetry = maxNumberOfRetry;
this.requestDelay = requestDelay;
this.retryDelay = retryDelay;
this.connectTimeOut = connectTimeOut;
this.readTimeOut = readTimeOut;
+ this.headers = headers;
+ this.requestMethod = requestMethod;
}
public int getMaxNumberOfRetry() {
@@ -91,4 +109,19 @@ public class HttpClientParams {
this.readTimeOut = readTimeOut;
}
+ public Map getHeaders() {
+ return headers;
+ }
+
+ public void setHeaders(Map headers) {
+ this.headers = headers;
+ }
+
+ public String getRequestMethod() {
+ return requestMethod;
+ }
+
+ public void setRequestMethod(String requestMethod) {
+ this.requestMethod = requestMethod;
+ }
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
index dd46ab1f4..342d73cdc 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
@@ -8,10 +8,13 @@ import java.io.InputStream;
import java.net.*;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.TimeUnit;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.commons.lang3.time.DateUtils;
import org.apache.http.HttpHeaders;
+import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -94,24 +97,32 @@ public class HttpConnector2 {
throw new CollectorException(msg);
}
- log.info("Request attempt {} [{}]", retryNumber, requestUrl);
-
InputStream input = null;
+ long start = System.currentTimeMillis();
try {
if (getClientParams().getRequestDelay() > 0) {
backoffAndSleep(getClientParams().getRequestDelay());
}
+
+ log.info("Request attempt {} [{}]", retryNumber, requestUrl);
+
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
urlConn.setInstanceFollowRedirects(false);
urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
+ urlConn.setRequestMethod(getClientParams().getRequestMethod());
- if (log.isDebugEnabled()) {
- logHeaderFields(urlConn);
+ // if provided, add custom headers
+ if (!getClientParams().getHeaders().isEmpty()) {
+ for (Map.Entry headerEntry : getClientParams().getHeaders().entrySet()) {
+ urlConn.addRequestProperty(headerEntry.getKey(), headerEntry.getValue());
+ }
}
+ logHeaderFields(urlConn);
+
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
@@ -125,9 +136,7 @@ public class HttpConnector2 {
}
if (is2xx(urlConn.getResponseCode())) {
- input = urlConn.getInputStream();
- responseType = urlConn.getContentType();
- return input;
+ return getInputStream(urlConn, start);
}
if (is3xx(urlConn.getResponseCode())) {
// REDIRECTS
@@ -137,6 +146,7 @@ public class HttpConnector2 {
.put(
REPORT_PREFIX + urlConn.getResponseCode(),
String.format("Moved to: %s", newUrl));
+ logRequestTime(start);
urlConn.disconnect();
if (retryAfter > 0) {
backoffAndSleep(retryAfter);
@@ -152,26 +162,50 @@ public class HttpConnector2 {
if (retryAfter > 0) {
log
.warn(
- "{} - waiting and repeating request after suggested retry-after {} sec.",
- requestUrl, retryAfter);
+ "waiting and repeating request after suggested retry-after {} sec for URL {}",
+ retryAfter, requestUrl);
backoffAndSleep(retryAfter * 1000);
} else {
log
.warn(
- "{} - waiting and repeating request after default delay of {} sec.",
- requestUrl, getClientParams().getRetryDelay());
- backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
+ "waiting and repeating request after default delay of {} sec for URL {}",
+ getClientParams().getRetryDelay(), requestUrl);
+ backoffAndSleep(retryNumber * getClientParams().getRetryDelay());
}
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
+
+ logRequestTime(start);
+
urlConn.disconnect();
+
return attemptDownload(requestUrl, retryNumber + 1, report);
+ case 422: // UNPROCESSABLE ENTITY
+ report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
+ log.warn("waiting and repeating request after 10 sec for URL {}", requestUrl);
+ backoffAndSleep(10000);
+ urlConn.disconnect();
+ logRequestTime(start);
+ try {
+ return getInputStream(urlConn, start);
+ } catch (IOException e) {
+ log
+ .error(
+ "server returned 422 and got IOException accessing the response body from URL {}",
+ requestUrl);
+ log.error("IOException:", e);
+ return attemptDownload(requestUrl, retryNumber + 1, report);
+ }
default:
+ log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL());
+ log.error("response message: {}", urlConn.getResponseMessage());
report
.put(
REPORT_PREFIX + urlConn.getResponseCode(),
String
.format(
"%s Error: %s", requestUrl, urlConn.getResponseMessage()));
+ logRequestTime(start);
+ urlConn.disconnect();
throw new CollectorException(urlConn.getResponseCode() + " error " + report);
}
}
@@ -192,13 +226,27 @@ public class HttpConnector2 {
}
}
+ private InputStream getInputStream(HttpURLConnection urlConn, long start) throws IOException {
+ InputStream input = urlConn.getInputStream();
+ responseType = urlConn.getContentType();
+ logRequestTime(start);
+ return input;
+ }
+
+ private static void logRequestTime(long start) {
+ log
+ .info(
+ "request time elapsed: {}sec",
+ TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start));
+ }
+
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
- log.debug("StatusCode: {}", urlConn.getResponseMessage());
+ log.info("Response: {} - {}", urlConn.getResponseCode(), urlConn.getResponseMessage());
for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) {
for (String v : e.getValue()) {
- log.debug(" key: {} - value: {}", e.getKey(), v);
+ log.info(" key: {} - value: {}", e.getKey(), v);
}
}
}
@@ -218,7 +266,7 @@ public class HttpConnector2 {
for (String key : headerMap.keySet()) {
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
- return Integer.parseInt(headerMap.get(key).get(0)) + 10;
+ return Integer.parseInt(headerMap.get(key).get(0));
}
}
return -1;
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
index b3eb98d4f..94e7f5221 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
@@ -4,6 +4,7 @@ package eu.dnetlib.dhp.common.vocabulary;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
@@ -62,25 +63,46 @@ public class Vocabulary implements Serializable {
}
public VocabularyTerm getTermBySynonym(final String syn) {
- return getTerm(synonyms.get(syn.toLowerCase()));
+ return Optional
+ .ofNullable(syn)
+ .map(s -> getTerm(synonyms.get(s.toLowerCase())))
+ .orElse(null);
}
public Qualifier getTermAsQualifier(final String termId) {
- if (StringUtils.isBlank(termId)) {
+ return getTermAsQualifier(termId, false);
+ }
+
+ public Qualifier getTermAsQualifier(final String termId, boolean strict) {
+ final VocabularyTerm term = getTerm(termId);
+ if (Objects.nonNull(term)) {
+ return OafMapperUtils.qualifier(term.getId(), term.getName(), getId(), getName());
+ } else if (Objects.isNull(term) && strict) {
return OafMapperUtils.unknown(getId(), getName());
- } else if (termExists(termId)) {
- final VocabularyTerm t = getTerm(termId);
- return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName());
} else {
return OafMapperUtils.qualifier(termId, termId, getId(), getName());
}
}
public Qualifier getSynonymAsQualifier(final String syn) {
+ return getSynonymAsQualifier(syn, false);
+ }
+
+ public Qualifier getSynonymAsQualifier(final String syn, boolean strict) {
return Optional
.ofNullable(getTermBySynonym(syn))
- .map(term -> getTermAsQualifier(term.getId()))
+ .map(term -> getTermAsQualifier(term.getId(), strict))
.orElse(null);
}
+ public Qualifier lookup(String id) {
+ return lookup(id, false);
+ }
+
+ public Qualifier lookup(String id, boolean strict) {
+ return Optional
+ .ofNullable(getSynonymAsQualifier(id, strict))
+ .orElse(getTermAsQualifier(id, strict));
+ }
+
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
index d5f57849c..64b6f91af 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
@@ -57,9 +57,17 @@ public class VocabularyGroup implements Serializable {
final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn);
+
}
}
+ // add the term names as synonyms
+ vocs.vocs.values().forEach(voc -> {
+ voc.getTerms().values().forEach(term -> {
+ voc.addSynonym(term.getName().toLowerCase(), term.getId());
+ });
+ });
+
return vocs;
}
@@ -73,6 +81,13 @@ public class VocabularyGroup implements Serializable {
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
}
+ public Optional find(final String vocId) {
+ return Optional
+ .ofNullable(vocId)
+ .map(String::toLowerCase)
+ .map(vocs::get);
+ }
+
public void addTerm(final String vocId, final String id, final String name) {
if (vocabularyExists(vocId)) {
vocs.get(vocId.toLowerCase()).addTerm(id, name);
@@ -120,6 +135,24 @@ public class VocabularyGroup implements Serializable {
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
}
+ public Qualifier lookupTermBySynonym(final String vocId, final String syn) {
+ return find(vocId)
+ .map(
+ vocabulary -> Optional
+ .ofNullable(vocabulary.getTerm(syn))
+ .map(
+ term -> OafMapperUtils
+ .qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
+ .orElse(
+ Optional
+ .ofNullable(vocabulary.getTermBySynonym(syn))
+ .map(
+ term -> OafMapperUtils
+ .qualifier(term.getId(), term.getName(), vocabulary.getId(), vocabulary.getName()))
+ .orElse(null)))
+ .orElse(null);
+ }
+
/**
* getSynonymAsQualifierCaseSensitive
*
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
index aea046203..0461c9353 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@@ -119,6 +119,131 @@ public class AuthorMerger {
});
}
+ public static String normalizeFullName(final String fullname) {
+ return nfd(fullname)
+ .toLowerCase()
+ // do not compact the regexes in a single expression, would cause StackOverflowError
+ // in case
+ // of large input strings
+ .replaceAll("(\\W)+", " ")
+ .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
+ .replaceAll("(\\p{Punct})+", " ")
+ .replaceAll("(\\d)+", " ")
+ .replaceAll("(\\n)+", " ")
+
+ .trim();
+ }
+
+ private static String authorFieldToBeCompared(Author author) {
+ if (StringUtils.isNotBlank(author.getSurname())) {
+ return author.getSurname();
+
+ }
+ if (StringUtils.isNotBlank(author.getFullname())) {
+ return author.getFullname();
+ }
+ return null;
+ }
+
+ /**
+ * This method tries to figure out when two author are the same in the contest
+ * of ORCID enrichment
+ *
+ * @param left Author in the OAF entity
+ * @param right Author ORCID
+ * @return based on a heuristic on the names of the authors if they are the same.
+ */
+ public static boolean checkORCIDSimilarity(final Author left, final Author right) {
+ final Person pl = parse(left);
+ final Person pr = parse(right);
+
+ // If one of them didn't have a surname we verify if they have the fullName not empty
+ // and verify if the normalized version is equal
+ if (!(pl.getSurname() != null && pl.getSurname().stream().anyMatch(StringUtils::isNotBlank) &&
+ pr.getSurname() != null && pr.getSurname().stream().anyMatch(StringUtils::isNotBlank))) {
+
+ if (pl.getFullname() != null && !pl.getFullname().isEmpty() && pr.getFullname() != null
+ && !pr.getFullname().isEmpty()) {
+ return pl
+ .getFullname()
+ .stream()
+ .anyMatch(
+ fl -> pr.getFullname().stream().anyMatch(fr -> normalize(fl).equalsIgnoreCase(normalize(fr))));
+ } else {
+ return false;
+ }
+ }
+ // The Authors have one surname in common
+ if (pl.getSurname().stream().anyMatch(sl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(sl)))) {
+
+ // If one of them has only a surname and is the same we can say that they are the same author
+ if ((pl.getName() == null || pl.getName().stream().allMatch(StringUtils::isBlank)) ||
+ (pr.getName() == null || pr.getName().stream().allMatch(StringUtils::isBlank)))
+ return true;
+ // The authors have the same initials of Name in common
+ if (pl
+ .getName()
+ .stream()
+ .anyMatch(
+ nl -> pr
+ .getName()
+ .stream()
+ .anyMatch(nr -> nr.equalsIgnoreCase(nl))))
+ return true;
+ }
+
+ // Sometimes we noticed that publication have author wrote in inverse order Surname, Name
+ // We verify if we have an exact match between name and surname
+ if (pl.getSurname().stream().anyMatch(sl -> pr.getName().stream().anyMatch(nr -> nr.equalsIgnoreCase(sl))) &&
+ pl.getName().stream().anyMatch(nl -> pr.getSurname().stream().anyMatch(sr -> sr.equalsIgnoreCase(nl))))
+ return true;
+ else
+ return false;
+ }
+ //
+
+ /**
+ * Method to enrich ORCID information in one list of authors based on another list
+ *
+ * @param baseAuthor the Author List in the OAF Entity
+ * @param orcidAuthor The list of ORCID Author intersected
+ * @return The Author List of the OAF Entity enriched with the orcid Author
+ */
+ public static List enrichOrcid(List baseAuthor, List orcidAuthor) {
+
+ if (baseAuthor == null || baseAuthor.isEmpty())
+ return orcidAuthor;
+
+ if (orcidAuthor == null || orcidAuthor.isEmpty())
+ return baseAuthor;
+
+ if (baseAuthor.size() == 1 && orcidAuthor.size() > 10)
+ return baseAuthor;
+
+ final List oAuthor = new ArrayList<>();
+ oAuthor.addAll(orcidAuthor);
+
+ baseAuthor.forEach(ba -> {
+ Optional aMatch = oAuthor.stream().filter(oa -> checkORCIDSimilarity(ba, oa)).findFirst();
+ if (aMatch.isPresent()) {
+ final Author sameAuthor = aMatch.get();
+ addPid(ba, sameAuthor.getPid());
+ oAuthor.remove(sameAuthor);
+ }
+ });
+ return baseAuthor;
+ }
+
+ private static void addPid(final Author a, final List pids) {
+
+ if (a.getPid() == null) {
+ a.setPid(new ArrayList<>());
+ }
+
+ a.getPid().addAll(pids);
+
+ }
+
public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
: "";
@@ -171,7 +296,7 @@ public class AuthorMerger {
}
}
- private static String normalize(final String s) {
+ public static String normalize(final String s) {
String[] normalized = nfd(s)
.toLowerCase()
// do not compact the regexes in a single expression, would cause StackOverflowError
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
new file mode 100644
index 000000000..0225a5063
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
@@ -0,0 +1,194 @@
+
+package eu.dnetlib.dhp.oa.merge;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+import static org.apache.spark.sql.functions.col;
+import static org.apache.spark.sql.functions.when;
+
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ForkJoinPool;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.ReduceFunction;
+import org.apache.spark.sql.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.schema.common.EntityType;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+import scala.Tuple2;
+
+/**
+ * Groups the graph content by entity identifier to ensure ID uniqueness
+ */
+public class GroupEntitiesSparkJob {
+ private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class);
+
+ private static final Encoder OAFENTITY_KRYO_ENC = Encoders.kryo(OafEntity.class);
+
+ private ArgumentApplicationParser parser;
+
+ public GroupEntitiesSparkJob(ArgumentApplicationParser parser) {
+ this.parser = parser;
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ String jsonConfiguration = IOUtils
+ .toString(
+ GroupEntitiesSparkJob.class
+ .getResourceAsStream(
+ "/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json"));
+ final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+ parser.parseArgument(args);
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(parser.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ final String isLookupUrl = parser.get("isLookupUrl");
+ log.info("isLookupUrl: {}", isLookupUrl);
+
+ final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
+
+ new GroupEntitiesSparkJob(parser).run(isSparkSessionManaged, isLookupService);
+ }
+
+ public void run(Boolean isSparkSessionManaged, ISLookUpService isLookUpService)
+ throws ISLookUpException {
+
+ String graphInputPath = parser.get("graphInputPath");
+ log.info("graphInputPath: {}", graphInputPath);
+
+ String checkpointPath = parser.get("checkpointPath");
+ log.info("checkpointPath: {}", checkpointPath);
+
+ String outputPath = parser.get("outputPath");
+ log.info("outputPath: {}", outputPath);
+
+ boolean filterInvisible = Boolean.parseBoolean(parser.get("filterInvisible"));
+ log.info("filterInvisible: {}", filterInvisible);
+
+ SparkConf conf = new SparkConf();
+ conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+ conf.registerKryoClasses(ModelSupport.getOafModelClasses());
+
+ final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookUpService);
+
+ runWithSparkSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ HdfsSupport.remove(checkpointPath, spark.sparkContext().hadoopConfiguration());
+ groupEntities(spark, graphInputPath, checkpointPath, outputPath, filterInvisible, vocs);
+ });
+ }
+
+ private static void groupEntities(
+ SparkSession spark,
+ String inputPath,
+ String checkpointPath,
+ String outputPath,
+ boolean filterInvisible, VocabularyGroup vocs) {
+
+ Dataset allEntities = spark.emptyDataset(OAFENTITY_KRYO_ENC);
+
+ for (Map.Entry e : ModelSupport.entityTypes.entrySet()) {
+ String entity = e.getKey().name();
+ Class extends OafEntity> entityClass = e.getValue();
+ String entityInputPath = inputPath + "/" + entity;
+
+ if (!HdfsSupport.exists(entityInputPath, spark.sparkContext().hadoopConfiguration())) {
+ continue;
+ }
+
+ allEntities = allEntities
+ .union(
+ ((Dataset) spark
+ .read()
+ .schema(Encoders.bean(entityClass).schema())
+ .json(entityInputPath)
+ .filter("length(id) > 0")
+ .as(Encoders.bean(entityClass)))
+ .map((MapFunction) r -> r, OAFENTITY_KRYO_ENC));
+ }
+
+ Dataset> groupedEntities = allEntities
+ .map(
+ (MapFunction) entity -> GraphCleaningFunctions
+ .applyCoarVocabularies(entity, vocs),
+ OAFENTITY_KRYO_ENC)
+ .groupByKey((MapFunction) OafEntity::getId, Encoders.STRING())
+ .reduceGroups((ReduceFunction) OafMapperUtils::mergeEntities)
+ .map(
+ (MapFunction, Tuple2>) t -> new Tuple2<>(
+ t._2().getClass().getName(), t._2()),
+ Encoders.tuple(Encoders.STRING(), OAFENTITY_KRYO_ENC));
+
+ // pivot on "_1" (classname of the entity)
+ // created columns containing only entities of the same class
+ for (Map.Entry e : ModelSupport.entityTypes.entrySet()) {
+ String entity = e.getKey().name();
+ Class extends OafEntity> entityClass = e.getValue();
+
+ groupedEntities = groupedEntities
+ .withColumn(
+ entity,
+ when(col("_1").equalTo(entityClass.getName()), col("_2")));
+ }
+
+ groupedEntities
+ .drop("_1", "_2")
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .save(checkpointPath);
+
+ ForkJoinPool parPool = new ForkJoinPool(ModelSupport.entityTypes.size());
+
+ ModelSupport.entityTypes
+ .entrySet()
+ .stream()
+ .map(e -> parPool.submit(() -> {
+ String entity = e.getKey().name();
+ Class extends OafEntity> entityClass = e.getValue();
+
+ spark
+ .read()
+ .load(checkpointPath)
+ .select(col(entity).as("value"))
+ .filter("value IS NOT NULL")
+ .as(OAFENTITY_KRYO_ENC)
+ .map((MapFunction) r -> r, (Encoder) Encoders.bean(entityClass))
+ .filter(filterInvisible ? "dataInfo.invisible != TRUE" : "TRUE")
+ .write()
+ .mode(SaveMode.Overwrite)
+ .option("compression", "gzip")
+ .json(outputPath + "/" + entity);
+ }))
+ .collect(Collectors.toList())
+ .forEach(t -> {
+ try {
+ t.get();
+ } catch (InterruptedException | ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java
new file mode 100644
index 000000000..027bf0735
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java
@@ -0,0 +1,77 @@
+
+package eu.dnetlib.dhp.oozie;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
+
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+import org.apache.commons.lang3.time.DurationFormatUtils;
+import org.apache.commons.text.StringSubstitutor;
+import org.apache.spark.SparkConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.io.Resources;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class RunSQLSparkJob {
+ private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class);
+
+ private final ArgumentApplicationParser parser;
+
+ public RunSQLSparkJob(ArgumentApplicationParser parser) {
+ this.parser = parser;
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ Map params = new HashMap<>();
+ for (int i = 0; i < args.length - 1; i++) {
+ if (args[i].startsWith("--")) {
+ params.put(args[i].substring(2), args[++i]);
+ }
+ }
+
+ /*
+ * String jsonConfiguration = IOUtils .toString( Objects .requireNonNull( RunSQLSparkJob.class
+ * .getResourceAsStream( "/eu/dnetlib/dhp/oozie/run_sql_parameters.json"))); final ArgumentApplicationParser
+ * parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args);
+ */
+
+ Boolean isSparkSessionManaged = Optional
+ .ofNullable(params.get("isSparkSessionManaged"))
+ .map(Boolean::valueOf)
+ .orElse(Boolean.TRUE);
+ log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+ URL url = com.google.common.io.Resources.getResource(params.get("sql"));
+ String raw_sql = Resources.toString(url, StandardCharsets.UTF_8);
+
+ String sql = StringSubstitutor.replace(raw_sql, params);
+ log.info("sql: {}", sql);
+
+ SparkConf conf = new SparkConf();
+ conf.set("hive.metastore.uris", params.get("hiveMetastoreUris"));
+
+ runWithSparkHiveSession(
+ conf,
+ isSparkSessionManaged,
+ spark -> {
+ for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) {
+ log.info("executing: {}", statement);
+ long startTime = System.currentTimeMillis();
+ spark.sql(statement).show();
+ log
+ .info(
+ "executed in {}",
+ DurationFormatUtils.formatDuration(System.currentTimeMillis() - startTime, "HH:mm:ss.S"));
+ }
+ });
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java
new file mode 100644
index 000000000..1a7482685
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/DoiCleaningRule.java
@@ -0,0 +1,14 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+public class DoiCleaningRule {
+
+ public static String clean(final String doi) {
+ return doi
+ .toLowerCase()
+ .replaceAll("\\s", "")
+ .replaceAll("^doi:", "")
+ .replaceFirst(CleaningFunctions.DOI_PREFIX_REGEX, CleaningFunctions.DOI_PREFIX);
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/FundRefCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/FundRefCleaningRule.java
new file mode 100644
index 000000000..a267b8b88
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/FundRefCleaningRule.java
@@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class FundRefCleaningRule {
+
+ public static final Pattern PATTERN = Pattern.compile("\\d+");
+
+ public static String clean(final String fundRefId) {
+
+ String s = fundRefId
+ .toLowerCase()
+ .replaceAll("\\s", "");
+
+ Matcher m = PATTERN.matcher(s);
+ if (m.find()) {
+ return m.group();
+ } else {
+ return "";
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
index 592580ab8..0124e96fc 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@@ -1,6 +1,12 @@
package eu.dnetlib.dhp.schema.oaf.utils;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.OPENAIRE_META_RESOURCE_TYPE;
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
+
+import java.net.MalformedURLException;
+import java.net.URL;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
@@ -16,6 +22,8 @@ import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyTerm;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
@@ -23,6 +31,10 @@ import me.xuender.unidecode.Unidecode;
public class GraphCleaningFunctions extends CleaningFunctions {
+ public static final String DNET_PUBLISHERS = "dnet:publishers";
+
+ public static final String DNET_LICENSES = "dnet:licenses";
+
public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
public static final int ORCID_LEN = 19;
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
@@ -32,64 +44,278 @@ public class GraphCleaningFunctions extends CleaningFunctions {
public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
+ private static final String NAME_CLEANING_REGEX = "[\\r\\n\\t\\s]+";
+
+ private static final Set INVALID_AUTHOR_NAMES = new HashSet<>();
+
+ private static final Set INVALID_URLS = new HashSet<>();
+
+ private static final Set INVALID_URL_HOSTS = new HashSet<>();
+
+ private static final HashSet PEER_REVIEWED_TYPES = new HashSet<>();
+
+ static {
+ PEER_REVIEWED_TYPES.add("Article");
+ PEER_REVIEWED_TYPES.add("Part of book or chapter of book");
+ PEER_REVIEWED_TYPES.add("Book");
+ PEER_REVIEWED_TYPES.add("Doctoral thesis");
+ PEER_REVIEWED_TYPES.add("Master thesis");
+ PEER_REVIEWED_TYPES.add("Data Paper");
+ PEER_REVIEWED_TYPES.add("Thesis");
+ PEER_REVIEWED_TYPES.add("Bachelor thesis");
+ PEER_REVIEWED_TYPES.add("Conference object");
+
+ INVALID_AUTHOR_NAMES.add("(:null)");
+ INVALID_AUTHOR_NAMES.add("(:unap)");
+ INVALID_AUTHOR_NAMES.add("(:tba)");
+ INVALID_AUTHOR_NAMES.add("(:unas)");
+ INVALID_AUTHOR_NAMES.add("(:unav)");
+ INVALID_AUTHOR_NAMES.add("(:unkn)");
+ INVALID_AUTHOR_NAMES.add("(:unkn) unknown");
+ INVALID_AUTHOR_NAMES.add(":none");
+ INVALID_AUTHOR_NAMES.add(":null");
+ INVALID_AUTHOR_NAMES.add(":unas");
+ INVALID_AUTHOR_NAMES.add(":unav");
+ INVALID_AUTHOR_NAMES.add(":unkn");
+ INVALID_AUTHOR_NAMES.add("[autor desconocido]");
+ INVALID_AUTHOR_NAMES.add("[s. n.]");
+ INVALID_AUTHOR_NAMES.add("[s.n]");
+ INVALID_AUTHOR_NAMES.add("[unknown]");
+ INVALID_AUTHOR_NAMES.add("anonymous");
+ INVALID_AUTHOR_NAMES.add("n.n.");
+ INVALID_AUTHOR_NAMES.add("nn");
+ INVALID_AUTHOR_NAMES.add("no name supplied");
+ INVALID_AUTHOR_NAMES.add("none");
+ INVALID_AUTHOR_NAMES.add("none available");
+ INVALID_AUTHOR_NAMES.add("not available not available");
+ INVALID_AUTHOR_NAMES.add("null &na;");
+ INVALID_AUTHOR_NAMES.add("null anonymous");
+ INVALID_AUTHOR_NAMES.add("unbekannt");
+ INVALID_AUTHOR_NAMES.add("unknown");
+
+ INVALID_URL_HOSTS.add("creativecommons.org");
+ INVALID_URL_HOSTS.add("www.academia.edu");
+ INVALID_URL_HOSTS.add("academia.edu");
+ INVALID_URL_HOSTS.add("researchgate.net");
+ INVALID_URL_HOSTS.add("www.researchgate.net");
+
+ INVALID_URLS.add("http://repo.scoap3.org/api");
+ INVALID_URLS.add("http://ora.ox.ac.uk/objects/uuid:");
+ INVALID_URLS.add("http://ntur.lib.ntu.edu.tw/news/agent_contract.pdf");
+ INVALID_URLS.add("https://media.springer.com/full/springer-instructions-for-authors-assets/pdf/SN_BPF_EN.pdf");
+ INVALID_URLS.add("http://www.tobaccoinduceddiseases.org/dl/61aad426c96519bea4040a374c6a6110/");
+ INVALID_URLS.add("https://www.bilboard.nl/verenigingsbladen/bestuurskundige-berichten");
+ }
+
+ public static T cleanContext(T value, String contextId, String verifyParam) {
+ if (ModelSupport.isSubClass(value, Result.class)) {
+ final Result res = (Result) value;
+ if (shouldCleanContext(res, verifyParam)) {
+ res
+ .setContext(
+ res
+ .getContext()
+ .stream()
+ .filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
+ .collect(Collectors.toList()));
+ }
+ return (T) res;
+ } else {
+ return value;
+ }
+ }
+
+ private static boolean shouldCleanContext(Result res, String verifyParam) {
+ boolean titleMatch = res
+ .getTitle()
+ .stream()
+ .filter(
+ t -> t
+ .getQualifier()
+ .getClassid()
+ .equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid()))
+ .anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()));
+
+ return titleMatch && Objects.nonNull(res.getContext());
+ }
+
+ public static T cleanCountry(T value, String[] verifyParam, Set hostedBy,
+ String collectedfrom, String country) {
+ if (ModelSupport.isSubClass(value, Result.class)) {
+ final Result res = (Result) value;
+ if (res.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) ||
+ !res.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))) {
+ return (T) res;
+ }
+
+ List ids = getPidsAndAltIds(res).collect(Collectors.toList());
+ if (ids
+ .stream()
+ .anyMatch(
+ p -> p
+ .getQualifier()
+ .getClassid()
+ .equals(PidType.doi.toString()) && pidInParam(p.getValue(), verifyParam))) {
+ res
+ .setCountry(
+ res
+ .getCountry()
+ .stream()
+ .filter(
+ c -> toTakeCountry(c, country))
+ .collect(Collectors.toList()));
+ }
+
+ return (T) res;
+ } else {
+ return value;
+ }
+ }
+
+ private static Stream getPidsAndAltIds(T r) {
+ final Stream resultPids = Optional
+ .ofNullable(r.getPid())
+ .map(Collection::stream)
+ .orElse(Stream.empty());
+
+ final Stream instancePids = Optional
+ .ofNullable(r.getInstance())
+ .map(
+ instance -> instance
+ .stream()
+ .flatMap(
+ i -> Optional
+ .ofNullable(i.getPid())
+ .map(Collection::stream)
+ .orElse(Stream.empty())))
+ .orElse(Stream.empty());
+
+ final Stream instanceAltIds = Optional
+ .ofNullable(r.getInstance())
+ .map(
+ instance -> instance
+ .stream()
+ .flatMap(
+ i -> Optional
+ .ofNullable(i.getAlternateIdentifier())
+ .map(Collection::stream)
+ .orElse(Stream.empty())))
+ .orElse(Stream.empty());
+
+ return Stream
+ .concat(
+ Stream.concat(resultPids, instancePids),
+ instanceAltIds);
+ }
+
+ private static boolean pidInParam(String value, String[] verifyParam) {
+ for (String s : verifyParam)
+ if (value.startsWith(s))
+ return true;
+ return false;
+ }
+
+ private static boolean toTakeCountry(Country c, String country) {
+ // If dataInfo is not set, or dataInfo.inferenceprovenance is not set or not present then it cannot be
+ // inserted via propagation
+ if (!Optional.ofNullable(c.getDataInfo()).isPresent())
+ return true;
+ if (!Optional.ofNullable(c.getDataInfo().getInferenceprovenance()).isPresent())
+ return true;
+ return !(c
+ .getClassid()
+ .equalsIgnoreCase(country) &&
+ c.getDataInfo().getInferenceprovenance().equals("propagation"));
+ }
public static T fixVocabularyNames(T value) {
- if (value instanceof Datasource) {
- // nothing to clean here
- } else if (value instanceof Project) {
- // nothing to clean here
- } else if (value instanceof Organization) {
- Organization o = (Organization) value;
- if (Objects.nonNull(o.getCountry())) {
- fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE);
+ if (value instanceof OafEntity) {
+
+ OafEntity e = (OafEntity) value;
+
+ Optional
+ .ofNullable(e.getPid())
+ .ifPresent(pid -> pid.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES)));
+
+ if (value instanceof Result) {
+ Result r = (Result) value;
+
+ fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES);
+ fixVocabName(r.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE);
+ fixVocabName(r.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES);
+
+ if (Objects.nonNull(r.getSubject())) {
+ r.getSubject().forEach(s -> fixVocabName(s.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES));
+ }
+ if (Objects.nonNull(r.getInstance())) {
+ for (Instance i : r.getInstance()) {
+ fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES);
+ fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS);
+ Optional
+ .ofNullable(i.getPid())
+ .ifPresent(
+ pid -> pid.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES)));
+
+ }
+ }
+ if (Objects.nonNull(r.getAuthor())) {
+ r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> {
+ if (Objects.nonNull(a.getPid())) {
+ a.getPid().stream().filter(Objects::nonNull).forEach(p -> {
+ fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
+ });
+ }
+ });
+ }
+ if (value instanceof Publication) {
+
+ } else if (value instanceof Dataset) {
+
+ } else if (value instanceof OtherResearchProduct) {
+
+ } else if (value instanceof Software) {
+
+ }
+ } else if (value instanceof Datasource) {
+ // nothing to clean here
+ } else if (value instanceof Project) {
+ // nothing to clean here
+ } else if (value instanceof Organization) {
+ Organization o = (Organization) value;
+ if (Objects.nonNull(o.getCountry())) {
+ fixVocabName(o.getCountry(), ModelConstants.DNET_COUNTRY_TYPE);
+ }
+
}
} else if (value instanceof Relation) {
// nothing to clean here
- } else if (value instanceof Result) {
-
- Result r = (Result) value;
-
- fixVocabName(r.getLanguage(), ModelConstants.DNET_LANGUAGES);
- fixVocabName(r.getResourcetype(), ModelConstants.DNET_DATA_CITE_RESOURCE);
- fixVocabName(r.getBestaccessright(), ModelConstants.DNET_ACCESS_MODES);
-
- if (Objects.nonNull(r.getSubject())) {
- r.getSubject().forEach(s -> fixVocabName(s.getQualifier(), ModelConstants.DNET_SUBJECT_TYPOLOGIES));
- }
- if (Objects.nonNull(r.getInstance())) {
- for (Instance i : r.getInstance()) {
- fixVocabName(i.getAccessright(), ModelConstants.DNET_ACCESS_MODES);
- fixVocabName(i.getRefereed(), ModelConstants.DNET_REVIEW_LEVELS);
- }
- }
- if (Objects.nonNull(r.getAuthor())) {
- r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> {
- if (Objects.nonNull(a.getPid())) {
- a.getPid().stream().filter(Objects::nonNull).forEach(p -> {
- fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
- });
- }
- });
- }
- if (value instanceof Publication) {
-
- } else if (value instanceof Dataset) {
-
- } else if (value instanceof OtherResearchProduct) {
-
- } else if (value instanceof Software) {
-
- }
}
return value;
}
public static boolean filter(T value) {
+ if (!(value instanceof Relation) && (Boolean.TRUE
+ .equals(
+ Optional
+ .ofNullable(value)
+ .map(
+ o -> Optional
+ .ofNullable(o.getDataInfo())
+ .map(
+ d -> Optional
+ .ofNullable(d.getInvisible())
+ .orElse(true))
+ .orElse(false))
+ .orElse(true)))) {
+ return true;
+ }
+
if (value instanceof Datasource) {
// nothing to evaluate here
} else if (value instanceof Project) {
- // nothing to evaluate here
+ final Project p = (Project) value;
+ return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue());
} else if (value instanceof Organization) {
// nothing to evaluate here
} else if (value instanceof Relation) {
@@ -115,16 +341,411 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return true;
}
- public static T cleanup(T value) {
- if (value instanceof Datasource) {
- // nothing to clean here
- } else if (value instanceof Project) {
- // nothing to clean here
- } else if (value instanceof Organization) {
- Organization o = (Organization) value;
- if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) {
- o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
+ public static T cleanup(T value, VocabularyGroup vocs) {
+
+ if (Objects.isNull(value.getDataInfo())) {
+ final DataInfo d = new DataInfo();
+ d.setDeletedbyinference(false);
+ value.setDataInfo(d);
+ }
+
+ if (value instanceof OafEntity) {
+
+ OafEntity e = (OafEntity) value;
+ if (Objects.nonNull(e.getPid())) {
+ e.setPid(processPidCleaning(e.getPid()));
}
+
+ if (value instanceof Datasource) {
+ // nothing to clean here
+ } else if (value instanceof Project) {
+ // nothing to clean here
+ } else if (value instanceof Organization) {
+ Organization o = (Organization) value;
+ if (Objects.isNull(o.getCountry()) || StringUtils.isBlank(o.getCountry().getClassid())) {
+ o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
+ }
+ } else if (value instanceof Result) {
+ Result r = (Result) value;
+
+ if (Objects.isNull(r.getContext())) {
+ r.setContext(new ArrayList<>());
+ }
+
+ if (Objects.nonNull(r.getFulltext())
+ && (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
+ ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
+ r.setFulltext(null);
+
+ }
+
+ if (Objects.nonNull(r.getDateofacceptance())) {
+ Optional date = cleanDateField(r.getDateofacceptance());
+ if (date.isPresent()) {
+ r.getDateofacceptance().setValue(date.get());
+ } else {
+ r.setDateofacceptance(null);
+ }
+ }
+ if (Objects.nonNull(r.getRelevantdate())) {
+ r
+ .setRelevantdate(
+ r
+ .getRelevantdate()
+ .stream()
+ .filter(Objects::nonNull)
+ .filter(sp -> Objects.nonNull(sp.getQualifier()))
+ .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
+ .map(sp -> {
+ sp.setValue(GraphCleaningFunctions.cleanDate(sp.getValue()));
+ return sp;
+ })
+ .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+ .collect(Collectors.toList()));
+ }
+ if (Objects.nonNull(r.getPublisher())) {
+ if (StringUtils.isBlank(r.getPublisher().getValue())) {
+ r.setPublisher(null);
+ } else {
+ r
+ .getPublisher()
+ .setValue(
+ r
+ .getPublisher()
+ .getValue()
+ .replaceAll(NAME_CLEANING_REGEX, " "));
+
+ if (vocs.vocabularyExists(DNET_PUBLISHERS)) {
+ vocs
+ .find(DNET_PUBLISHERS)
+ .map(voc -> voc.getTermBySynonym(r.getPublisher().getValue()))
+ .map(VocabularyTerm::getName)
+ .ifPresent(publisher -> r.getPublisher().setValue(publisher));
+ }
+ }
+ }
+ if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
+ r
+ .setLanguage(
+ qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
+ }
+ if (Objects.nonNull(r.getSubject())) {
+ List subjects = Lists
+ .newArrayList(
+ r
+ .getSubject()
+ .stream()
+ .filter(Objects::nonNull)
+ .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+ .filter(sp -> Objects.nonNull(sp.getQualifier()))
+ .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
+ .map(s -> {
+ if ("dnet:result_subject".equals(s.getQualifier().getClassid())) {
+ s.getQualifier().setClassid(ModelConstants.DNET_SUBJECT_TYPOLOGIES);
+ s.getQualifier().setClassname(ModelConstants.DNET_SUBJECT_TYPOLOGIES);
+ }
+ return s;
+ })
+ .map(GraphCleaningFunctions::cleanValue)
+ .collect(
+ Collectors
+ .toMap(
+ s -> Optional
+ .ofNullable(s.getQualifier())
+ .map(q -> q.getClassid() + s.getValue())
+ .orElse(s.getValue()),
+ Function.identity(),
+ (s1, s2) -> Collections
+ .min(Lists.newArrayList(s1, s2), new SubjectProvenanceComparator())))
+ .values());
+ r.setSubject(subjects);
+ }
+ if (Objects.nonNull(r.getTitle())) {
+ r
+ .setTitle(
+ r
+ .getTitle()
+ .stream()
+ .filter(Objects::nonNull)
+ .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+ .filter(
+ sp -> {
+ final String title = sp
+ .getValue()
+ .toLowerCase();
+ final String decoded = Unidecode.decode(title);
+
+ if (StringUtils.contains(decoded, TITLE_TEST)) {
+ return decoded
+ .replaceAll(TITLE_FILTER_REGEX, "")
+ .length() > TITLE_FILTER_RESIDUAL_LENGTH;
+ }
+ return !decoded
+ .replaceAll("\\W|\\d", "")
+ .isEmpty();
+ })
+ .map(GraphCleaningFunctions::cleanValue)
+ .collect(Collectors.toList()));
+ }
+ if (Objects.nonNull(r.getFormat())) {
+ r
+ .setFormat(
+ r
+ .getFormat()
+ .stream()
+ .map(GraphCleaningFunctions::cleanValue)
+ .collect(Collectors.toList()));
+ }
+ if (Objects.nonNull(r.getDescription())) {
+ r
+ .setDescription(
+ r
+ .getDescription()
+ .stream()
+ .filter(Objects::nonNull)
+ .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+ .map(GraphCleaningFunctions::cleanValue)
+ .collect(Collectors.toList()));
+ }
+ if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
+ r
+ .setResourcetype(
+ qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
+ }
+ if (Objects.nonNull(r.getInstance())) {
+
+ for (Instance i : r.getInstance()) {
+ if (!vocs
+ .termExists(ModelConstants.DNET_PUBLICATION_RESOURCE, i.getInstancetype().getClassid())) {
+ if (r instanceof Publication) {
+ i
+ .setInstancetype(
+ OafMapperUtils
+ .qualifier(
+ "0038", "Other literature type",
+ ModelConstants.DNET_PUBLICATION_RESOURCE,
+ ModelConstants.DNET_PUBLICATION_RESOURCE));
+ } else if (r instanceof Dataset) {
+ i
+ .setInstancetype(
+ OafMapperUtils
+ .qualifier(
+ "0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE,
+ ModelConstants.DNET_PUBLICATION_RESOURCE));
+ } else if (r instanceof Software) {
+ i
+ .setInstancetype(
+ OafMapperUtils
+ .qualifier(
+ "0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE,
+ ModelConstants.DNET_PUBLICATION_RESOURCE));
+ } else if (r instanceof OtherResearchProduct) {
+ i
+ .setInstancetype(
+ OafMapperUtils
+ .qualifier(
+ "0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE,
+ ModelConstants.DNET_PUBLICATION_RESOURCE));
+ }
+ }
+
+ if (Objects.nonNull(i.getPid())) {
+ i.setPid(processPidCleaning(i.getPid()));
+ }
+ if (Objects.nonNull(i.getAlternateIdentifier())) {
+ i.setAlternateIdentifier(processPidCleaning(i.getAlternateIdentifier()));
+ }
+ Optional
+ .ofNullable(i.getPid())
+ .ifPresent(pid -> {
+ final Set pids = Sets.newHashSet(pid);
+ Optional
+ .ofNullable(i.getAlternateIdentifier())
+ .ifPresent(altId -> {
+ final Set altIds = Sets.newHashSet(altId);
+ i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
+ });
+ });
+
+ if (Objects.isNull(i.getAccessright())
+ || StringUtils.isBlank(i.getAccessright().getClassid())) {
+ i
+ .setAccessright(
+ accessRight(
+ ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
+ ModelConstants.DNET_ACCESS_MODES));
+ }
+ if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
+ i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
+ }
+ if (Objects.isNull(i.getRefereed()) || StringUtils.isBlank(i.getRefereed().getClassid())) {
+ i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
+ }
+
+ if (Objects.nonNull(i.getLicense()) && Objects.nonNull(i.getLicense().getValue())) {
+ vocs
+ .find(DNET_LICENSES)
+ .map(voc -> voc.getTermBySynonym(i.getLicense().getValue()))
+ .map(VocabularyTerm::getId)
+ .ifPresent(license -> i.getLicense().setValue(license));
+ }
+
+ // from the script from Dimitris
+ if ("0000".equals(i.getRefereed().getClassid())) {
+ final boolean isFromCrossref = Optional
+ .ofNullable(i.getCollectedfrom())
+ .map(KeyValue::getKey)
+ .map(id -> id.equals(ModelConstants.CROSSREF_ID))
+ .orElse(false);
+ final boolean hasDoi = Optional
+ .ofNullable(i.getPid())
+ .map(
+ pid -> pid
+ .stream()
+ .anyMatch(
+ p -> PidType.doi.toString().equals(p.getQualifier().getClassid())))
+ .orElse(false);
+ final boolean isPeerReviewedType = PEER_REVIEWED_TYPES
+ .contains(i.getInstancetype().getClassname());
+ final boolean noOtherLitType = r
+ .getInstance()
+ .stream()
+ .noneMatch(ii -> "Other literature type".equals(ii.getInstancetype().getClassname()));
+ if (isFromCrossref && hasDoi && isPeerReviewedType && noOtherLitType) {
+ i.setRefereed(qualifier("0001", "peerReviewed", ModelConstants.DNET_REVIEW_LEVELS));
+ } else {
+ i.setRefereed(qualifier("0002", "nonPeerReviewed", ModelConstants.DNET_REVIEW_LEVELS));
+ }
+ }
+
+ if (Objects.nonNull(i.getDateofacceptance())) {
+ Optional date = cleanDateField(i.getDateofacceptance());
+ if (date.isPresent()) {
+ i.getDateofacceptance().setValue(date.get());
+ } else {
+ i.setDateofacceptance(null);
+ }
+ }
+ if (StringUtils.isNotBlank(i.getFulltext()) &&
+ (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
+ ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
+ i.setFulltext(null);
+ }
+ if (Objects.nonNull(i.getUrl())) {
+ i
+ .setUrl(
+ i
+ .getUrl()
+ .stream()
+ .filter(GraphCleaningFunctions::urlFilter)
+ .collect(Collectors.toList()));
+ }
+ }
+ }
+ if (Objects.isNull(r.getBestaccessright())
+ || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
+ Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
+ if (Objects.isNull(bestaccessrights)) {
+ r
+ .setBestaccessright(
+ qualifier(
+ ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
+ ModelConstants.DNET_ACCESS_MODES));
+ } else {
+ r.setBestaccessright(bestaccessrights);
+ }
+ }
+ if (Objects.nonNull(r.getAuthor())) {
+ r
+ .setAuthor(
+ r
+ .getAuthor()
+ .stream()
+ .filter(Objects::nonNull)
+ .filter(GraphCleaningFunctions::isValidAuthorName)
+ .map(GraphCleaningFunctions::cleanupAuthor)
+ .collect(Collectors.toList()));
+
+ boolean nullRank = r
+ .getAuthor()
+ .stream()
+ .anyMatch(a -> Objects.isNull(a.getRank()));
+ if (nullRank) {
+ int i = 1;
+ for (Author author : r.getAuthor()) {
+ author.setRank(i++);
+ }
+ }
+
+ for (Author a : r.getAuthor()) {
+ if (Objects.isNull(a.getPid())) {
+ a.setPid(Lists.newArrayList());
+ } else {
+ a
+ .setPid(
+ a
+ .getPid()
+ .stream()
+ .filter(Objects::nonNull)
+ .filter(p -> Objects.nonNull(p.getQualifier()))
+ .filter(p -> StringUtils.isNotBlank(p.getValue()))
+ .filter(
+ p -> StringUtils
+ .contains(StringUtils.lowerCase(p.getQualifier().getClassid()), ORCID))
+ .map(p -> {
+ // hack to distinguish orcid from orcid_pending
+ String pidProvenance = getProvenance(p.getDataInfo());
+ if (p
+ .getQualifier()
+ .getClassid()
+ .toLowerCase()
+ .contains(ModelConstants.ORCID)) {
+ if (pidProvenance
+ .equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY) ||
+ pidProvenance.equals("ORCID_ENRICHMENT")) {
+ p.getQualifier().setClassid(ModelConstants.ORCID);
+ } else {
+ p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
+ }
+ final String orcid = p
+ .getValue()
+ .trim()
+ .toLowerCase()
+ .replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
+ if (orcid.length() == ORCID_LEN) {
+ p.setValue(orcid);
+ } else {
+ p.setValue("");
+ }
+ }
+ return p;
+ })
+ .filter(p -> StringUtils.isNotBlank(p.getValue()))
+ .collect(
+ Collectors
+ .toMap(
+ p -> p.getQualifier().getClassid() + p.getValue(),
+ Function.identity(),
+ (p1, p2) -> p1,
+ LinkedHashMap::new))
+ .values()
+ .stream()
+ .collect(Collectors.toList()));
+ }
+ }
+ }
+ if (value instanceof Publication) {
+
+ } else if (value instanceof Dataset) {
+
+ } else if (value instanceof OtherResearchProduct) {
+
+ } else if (value instanceof Software) {
+
+ }
+
+ }
+
} else if (value instanceof Relation) {
Relation r = (Relation) value;
@@ -136,253 +757,40 @@ public class GraphCleaningFunctions extends CleaningFunctions {
r.setValidationDate(null);
r.setValidated(false);
}
- } else if (value instanceof Result) {
-
- Result r = (Result) value;
-
- if (Objects.nonNull(r.getDateofacceptance())) {
- Optional date = cleanDateField(r.getDateofacceptance());
- if (date.isPresent()) {
- r.getDateofacceptance().setValue(date.get());
- } else {
- r.setDateofacceptance(null);
- }
- }
- if (Objects.nonNull(r.getRelevantdate())) {
- r
- .setRelevantdate(
- r
- .getRelevantdate()
- .stream()
- .filter(Objects::nonNull)
- .filter(sp -> Objects.nonNull(sp.getQualifier()))
- .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
- .map(sp -> {
- sp.setValue(GraphCleaningFunctions.cleanDate(sp.getValue()));
- return sp;
- })
- .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
- .collect(Collectors.toList()));
- }
- if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
- r.setPublisher(null);
- }
- if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
- r
- .setLanguage(
- qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
- }
- if (Objects.nonNull(r.getSubject())) {
- r
- .setSubject(
- r
- .getSubject()
- .stream()
- .filter(Objects::nonNull)
- .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
- .filter(sp -> Objects.nonNull(sp.getQualifier()))
- .filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
- .map(GraphCleaningFunctions::cleanValue)
- .collect(Collectors.toList()));
- }
- if (Objects.nonNull(r.getTitle())) {
- r
- .setTitle(
- r
- .getTitle()
- .stream()
- .filter(Objects::nonNull)
- .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
- .filter(
- sp -> {
- final String title = sp
- .getValue()
- .toLowerCase();
- final String decoded = Unidecode.decode(title);
-
- if (StringUtils.contains(decoded, TITLE_TEST)) {
- return decoded
- .replaceAll(TITLE_FILTER_REGEX, "")
- .length() > TITLE_FILTER_RESIDUAL_LENGTH;
- }
- return !decoded
- .replaceAll("\\W|\\d", "")
- .isEmpty();
- })
- .map(GraphCleaningFunctions::cleanValue)
- .collect(Collectors.toList()));
- }
- if (Objects.nonNull(r.getDescription())) {
- r
- .setDescription(
- r
- .getDescription()
- .stream()
- .filter(Objects::nonNull)
- .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
- .map(GraphCleaningFunctions::cleanValue)
- .collect(Collectors.toList()));
- }
- if (Objects.nonNull(r.getPid())) {
- r.setPid(processPidCleaning(r.getPid()));
- }
- if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
- r
- .setResourcetype(
- qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
- }
- if (Objects.nonNull(r.getInstance())) {
-
- for (Instance i : r.getInstance()) {
- if (Objects.nonNull(i.getPid())) {
- i.setPid(processPidCleaning(i.getPid()));
- }
- if (Objects.nonNull(i.getAlternateIdentifier())) {
- i.setAlternateIdentifier(processPidCleaning(i.getAlternateIdentifier()));
- }
- Optional
- .ofNullable(i.getPid())
- .ifPresent(pid -> {
- final Set pids = Sets.newHashSet(pid);
- Optional
- .ofNullable(i.getAlternateIdentifier())
- .ifPresent(altId -> {
- final Set altIds = Sets.newHashSet(altId);
- i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
- });
- });
-
- if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
- i
- .setAccessright(
- accessRight(
- ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
- ModelConstants.DNET_ACCESS_MODES));
- }
- if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
- i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
- }
- if (Objects.isNull(i.getRefereed())) {
- i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
- }
- if (Objects.nonNull(i.getDateofacceptance())) {
- Optional date = cleanDateField(i.getDateofacceptance());
- if (date.isPresent()) {
- i.getDateofacceptance().setValue(date.get());
- } else {
- i.setDateofacceptance(null);
- }
- }
- }
- }
- if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
- Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
- if (Objects.isNull(bestaccessrights)) {
- r
- .setBestaccessright(
- qualifier(
- ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
- ModelConstants.DNET_ACCESS_MODES));
- } else {
- r.setBestaccessright(bestaccessrights);
- }
- }
- if (Objects.nonNull(r.getAuthor())) {
- r
- .setAuthor(
- r
- .getAuthor()
- .stream()
- .filter(Objects::nonNull)
- .filter(a -> StringUtils.isNotBlank(a.getFullname()))
- .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
- .collect(Collectors.toList()));
-
- boolean nullRank = r
- .getAuthor()
- .stream()
- .anyMatch(a -> Objects.isNull(a.getRank()));
- if (nullRank) {
- int i = 1;
- for (Author author : r.getAuthor()) {
- author.setRank(i++);
- }
- }
-
- for (Author a : r.getAuthor()) {
- if (Objects.isNull(a.getPid())) {
- a.setPid(Lists.newArrayList());
- } else {
- a
- .setPid(
- a
- .getPid()
- .stream()
- .filter(Objects::nonNull)
- .filter(p -> Objects.nonNull(p.getQualifier()))
- .filter(p -> StringUtils.isNotBlank(p.getValue()))
- .map(p -> {
- // hack to distinguish orcid from orcid_pending
- String pidProvenance = Optional
- .ofNullable(p.getDataInfo())
- .map(
- d -> Optional
- .ofNullable(d.getProvenanceaction())
- .map(Qualifier::getClassid)
- .orElse(""))
- .orElse("");
- if (p
- .getQualifier()
- .getClassid()
- .toLowerCase()
- .contains(ModelConstants.ORCID)) {
- if (pidProvenance
- .equals(ModelConstants.SYSIMPORT_CROSSWALK_ENTITYREGISTRY)) {
- p.getQualifier().setClassid(ModelConstants.ORCID);
- } else {
- p.getQualifier().setClassid(ModelConstants.ORCID_PENDING);
- }
- final String orcid = p
- .getValue()
- .trim()
- .toLowerCase()
- .replaceAll(ORCID_CLEANING_REGEX, "$1-$2-$3-$4");
- if (orcid.length() == ORCID_LEN) {
- p.setValue(orcid);
- } else {
- p.setValue("");
- }
- }
- return p;
- })
- .filter(p -> StringUtils.isNotBlank(p.getValue()))
- .collect(
- Collectors
- .toMap(
- p -> p.getQualifier().getClassid() + p.getValue(),
- Function.identity(),
- (p1, p2) -> p1,
- LinkedHashMap::new))
- .values()
- .stream()
- .collect(Collectors.toList()));
- }
- }
- }
- if (value instanceof Publication) {
-
- } else if (value instanceof Dataset) {
-
- } else if (value instanceof OtherResearchProduct) {
-
- } else if (value instanceof Software) {
-
- }
}
return value;
}
+ private static Author cleanupAuthor(Author author) {
+ if (StringUtils.isNotBlank(author.getFullname())) {
+ author
+ .setFullname(
+ author
+ .getFullname()
+ .replaceAll(NAME_CLEANING_REGEX, " ")
+ .replace("\"", "\\\""));
+ }
+ if (StringUtils.isNotBlank(author.getName())) {
+ author
+ .setName(
+ author
+ .getName()
+ .replaceAll(NAME_CLEANING_REGEX, " ")
+ .replace("\"", "\\\""));
+ }
+ if (StringUtils.isNotBlank(author.getSurname())) {
+ author
+ .setSurname(
+ author
+ .getSurname()
+ .replaceAll(NAME_CLEANING_REGEX, " ")
+ .replace("\"", "\\\""));
+ }
+
+ return author;
+ }
+
private static Optional cleanDateField(Field dateofacceptance) {
return Optional
.ofNullable(dateofacceptance)
@@ -416,12 +824,30 @@ public class GraphCleaningFunctions extends CleaningFunctions {
// HELPERS
private static boolean isValidAuthorName(Author a) {
- return !Stream
- .of(a.getFullname(), a.getName(), a.getSurname())
- .filter(s -> s != null && !s.isEmpty())
- .collect(Collectors.joining(""))
- .toLowerCase()
- .matches(INVALID_AUTHOR_REGEX);
+ return StringUtils.isNotBlank(a.getFullname()) &&
+ StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")) &&
+ !INVALID_AUTHOR_NAMES.contains(StringUtils.lowerCase(a.getFullname()).trim()) &&
+ !Stream
+ .of(a.getFullname(), a.getName(), a.getSurname())
+ .filter(StringUtils::isNotBlank)
+ .collect(Collectors.joining(""))
+ .toLowerCase()
+ .matches(INVALID_AUTHOR_REGEX);
+ }
+
+ private static boolean urlFilter(String u) {
+ try {
+ final URL url = new URL(u);
+ if (StringUtils.isBlank(url.getPath()) || "/".equals(url.getPath())) {
+ return false;
+ }
+ if (INVALID_URL_HOSTS.contains(url.getHost())) {
+ return false;
+ }
+ return !INVALID_URLS.contains(url.toString());
+ } catch (MalformedURLException ex) {
+ return false;
+ }
}
private static List processPidCleaning(List pids) {
@@ -432,7 +858,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
.filter(sp -> Objects.nonNull(sp.getQualifier()))
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
- .map(CleaningFunctions::normalizePidValue)
+ .map(PidCleaner::normalizePidValue)
.filter(CleaningFunctions::pidFilter)
.collect(Collectors.toList());
}
@@ -461,9 +887,115 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s;
}
+ protected static Subject cleanValue(Subject s) {
+ s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
+ return s;
+ }
+
protected static Field cleanValue(Field s) {
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
return s;
}
+ public static OafEntity applyCoarVocabularies(OafEntity entity, VocabularyGroup vocs) {
+
+ if (entity instanceof Result) {
+ final Result result = (Result) entity;
+
+ Optional
+ .ofNullable(result.getInstance())
+ .ifPresent(
+ instances -> instances
+ .forEach(
+ instance -> {
+ if (Objects.isNull(instance.getInstanceTypeMapping())) {
+ List mapping = Lists.newArrayList();
+ mapping
+ .add(
+ OafMapperUtils
+ .instanceTypeMapping(
+ instance.getInstancetype().getClassname(),
+ OPENAIRE_COAR_RESOURCE_TYPES_3_1));
+ instance.setInstanceTypeMapping(mapping);
+ }
+ Optional optionalItm = instance
+ .getInstanceTypeMapping()
+ .stream()
+ .filter(GraphCleaningFunctions::originalResourceType)
+ .findFirst();
+ if (optionalItm.isPresent()) {
+ InstanceTypeMapping coarItm = optionalItm.get();
+ Optional
+ .ofNullable(
+ vocs
+ .lookupTermBySynonym(
+ OPENAIRE_COAR_RESOURCE_TYPES_3_1, coarItm.getOriginalType()))
+ .ifPresent(type -> {
+ coarItm.setTypeCode(type.getClassid());
+ coarItm.setTypeLabel(type.getClassname());
+ });
+ final List mappings = Lists.newArrayList();
+ if (vocs.vocabularyExists(OPENAIRE_USER_RESOURCE_TYPES)) {
+ Optional
+ .ofNullable(
+ vocs
+ .lookupTermBySynonym(
+ OPENAIRE_USER_RESOURCE_TYPES, coarItm.getTypeCode()))
+ .ifPresent(
+ type -> mappings
+ .add(
+ OafMapperUtils
+ .instanceTypeMapping(coarItm.getTypeCode(), type)));
+ }
+ if (!mappings.isEmpty()) {
+ instance.getInstanceTypeMapping().addAll(mappings);
+ }
+ }
+ }));
+ result.setMetaResourceType(getMetaResourceType(result.getInstance(), vocs));
+ }
+
+ return entity;
+ }
+
+ private static boolean originalResourceType(InstanceTypeMapping itm) {
+ return StringUtils.isNotBlank(itm.getOriginalType()) &&
+ OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName()) &&
+ StringUtils.isBlank(itm.getTypeCode()) &&
+ StringUtils.isBlank(itm.getTypeLabel());
+ }
+
+ private static Qualifier getMetaResourceType(final List instances, final VocabularyGroup vocs) {
+ return Optional
+ .ofNullable(instances)
+ .map(ii -> {
+ if (vocs.vocabularyExists(OPENAIRE_META_RESOURCE_TYPE)) {
+ Optional itm = ii
+ .stream()
+ .filter(Objects::nonNull)
+ .flatMap(
+ i -> Optional
+ .ofNullable(i.getInstanceTypeMapping())
+ .map(Collection::stream)
+ .orElse(Stream.empty()))
+ .filter(t -> OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(t.getVocabularyName()))
+ .findFirst();
+
+ if (!itm.isPresent() || Objects.isNull(itm.get().getTypeCode())) {
+ return null;
+ } else {
+ final String typeCode = itm.get().getTypeCode();
+ return Optional
+ .ofNullable(vocs.lookupTermBySynonym(OPENAIRE_META_RESOURCE_TYPE, typeCode))
+ .orElseThrow(
+ () -> new IllegalStateException("unable to find a synonym for '" + typeCode + "' in " +
+ OPENAIRE_META_RESOURCE_TYPE));
+ }
+ } else {
+ throw new IllegalStateException("vocabulary '" + OPENAIRE_META_RESOURCE_TYPE + "' not available");
+ }
+ })
+ .orElse(null);
+ }
+
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GridCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GridCleaningRule.java
new file mode 100644
index 000000000..37ab91dd5
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GridCleaningRule.java
@@ -0,0 +1,24 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class GridCleaningRule {
+
+ public static final Pattern PATTERN = Pattern.compile("(?\\d{4,6}\\.[0-9a-z]{1,2})");
+
+ public static String clean(String grid) {
+ String s = grid
+ .replaceAll("\\s", "")
+ .toLowerCase();
+
+ Matcher m = PATTERN.matcher(s);
+ if (m.find()) {
+ return "grid." + m.group("grid");
+ }
+
+ return "";
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ISNICleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ISNICleaningRule.java
new file mode 100644
index 000000000..bcd8279cc
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/ISNICleaningRule.java
@@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+// https://www.wikidata.org/wiki/Property:P213
+public class ISNICleaningRule {
+
+ public static final Pattern PATTERN = Pattern.compile("([0]{4}) ?([0-9]{4}) ?([0-9]{4}) ?([0-9]{3}[0-9X])");
+
+ public static String clean(final String isni) {
+
+ Matcher m = PATTERN.matcher(isni);
+ if (m.find()) {
+ return String.join("", m.group(1), m.group(2), m.group(3), m.group(4));
+ } else {
+ return "";
+ }
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
index 720fe47fb..4cecd0895 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
@@ -3,6 +3,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+import java.sql.Array;
+import java.sql.SQLException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
@@ -47,6 +49,17 @@ public class OafMapperUtils {
}
public static Result mergeResults(Result left, Result right) {
+
+ final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
+ final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
+
+ if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
+ return left;
+ }
+ if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
+ return right;
+ }
+
if (new ResultTypeComparator().compare(left, right) < 0) {
left.mergeFrom(right);
return left;
@@ -56,6 +69,18 @@ public class OafMapperUtils {
}
}
+ private static boolean isFromDelegatedAuthority(Result r) {
+ return Optional
+ .ofNullable(r.getInstance())
+ .map(
+ instance -> instance
+ .stream()
+ .filter(i -> Objects.nonNull(i.getCollectedfrom()))
+ .map(i -> i.getCollectedfrom().getKey())
+ .anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
+ .orElse(false);
+ }
+
public static KeyValue keyValue(final String k, final String v) {
final KeyValue kv = new KeyValue();
kv.setKey(k);
@@ -95,6 +120,17 @@ public class OafMapperUtils {
.collect(Collectors.toList());
}
+ public static List listValues(Array values) throws SQLException {
+ if (Objects.isNull(values)) {
+ return null;
+ }
+ return Arrays
+ .stream((T[]) values.getArray())
+ .filter(Objects::nonNull)
+ .distinct()
+ .collect(Collectors.toList());
+ }
+
public static List> listFields(final DataInfo info, final List values) {
return values
.stream()
@@ -104,8 +140,30 @@ public class OafMapperUtils {
.collect(Collectors.toList());
}
+ public static InstanceTypeMapping instanceTypeMapping(String originalType, String code, String label,
+ String vocabularyName) {
+ final InstanceTypeMapping m = new InstanceTypeMapping();
+ m.setVocabularyName(vocabularyName);
+ m.setOriginalType(originalType);
+ m.setTypeCode(code);
+ m.setTypeLabel(label);
+ return m;
+ }
+
+ public static InstanceTypeMapping instanceTypeMapping(String originalType, Qualifier term) {
+ return instanceTypeMapping(originalType, term.getClassid(), term.getClassname(), term.getSchemeid());
+ }
+
+ public static InstanceTypeMapping instanceTypeMapping(String originalType) {
+ return instanceTypeMapping(originalType, null, null, null);
+ }
+
+ public static InstanceTypeMapping instanceTypeMapping(String originalType, String vocabularyName) {
+ return instanceTypeMapping(originalType, null, null, vocabularyName);
+ }
+
public static Qualifier unknown(final String schemeid, final String schemename) {
- return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
+ return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
}
public static AccessRight accessRight(
@@ -153,6 +211,17 @@ public class OafMapperUtils {
return q;
}
+ public static Subject subject(
+ final String value,
+ final String classid,
+ final String classname,
+ final String schemeid,
+ final String schemename,
+ final DataInfo dataInfo) {
+
+ return subject(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
+ }
+
public static StructuredProperty structuredProperty(
final String value,
final String classid,
@@ -164,6 +233,20 @@ public class OafMapperUtils {
return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
}
+ public static Subject subject(
+ final String value,
+ final Qualifier qualifier,
+ final DataInfo dataInfo) {
+ if (value == null) {
+ return null;
+ }
+ final Subject s = new Subject();
+ s.setValue(value);
+ s.setQualifier(qualifier);
+ s.setDataInfo(dataInfo);
+ return s;
+ }
+
public static StructuredProperty structuredProperty(
final String value,
final Qualifier qualifier,
@@ -368,4 +451,88 @@ public class OafMapperUtils {
}
return null;
}
+
+ public static KeyValue newKeyValueInstance(String key, String value, DataInfo dataInfo) {
+ KeyValue kv = new KeyValue();
+ kv.setDataInfo(dataInfo);
+ kv.setKey(key);
+ kv.setValue(value);
+ return kv;
+ }
+
+ public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) {
+ Measure m = new Measure();
+ m.setId(id);
+ m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo)));
+ return m;
+ }
+
+ public static Relation getRelation(final String source,
+ final String target,
+ final String relType,
+ final String subRelType,
+ final String relClass,
+ final OafEntity entity) {
+ return getRelation(source, target, relType, subRelType, relClass, entity, null);
+ }
+
+ public static Relation getRelation(final String source,
+ final String target,
+ final String relType,
+ final String subRelType,
+ final String relClass,
+ final OafEntity entity,
+ final String validationDate) {
+ return getRelation(
+ source, target, relType, subRelType, relClass, entity.getCollectedfrom(), entity.getDataInfo(),
+ entity.getLastupdatetimestamp(), validationDate, null);
+ }
+
+ public static Relation getRelation(final String source,
+ final String target,
+ final String relType,
+ final String subRelType,
+ final String relClass,
+ final List collectedfrom,
+ final DataInfo dataInfo,
+ final Long lastupdatetimestamp) {
+ return getRelation(
+ source, target, relType, subRelType, relClass, collectedfrom, dataInfo, lastupdatetimestamp, null, null);
+ }
+
+ public static Relation getRelation(final String source,
+ final String target,
+ final String relType,
+ final String subRelType,
+ final String relClass,
+ final List collectedfrom,
+ final DataInfo dataInfo,
+ final Long lastupdatetimestamp,
+ final String validationDate,
+ final List properties) {
+ final Relation rel = new Relation();
+ rel.setRelType(relType);
+ rel.setSubRelType(subRelType);
+ rel.setRelClass(relClass);
+ rel.setSource(source);
+ rel.setTarget(target);
+ rel.setCollectedfrom(collectedfrom);
+ rel.setDataInfo(dataInfo);
+ rel.setLastupdatetimestamp(lastupdatetimestamp);
+ rel.setValidated(StringUtils.isNotBlank(validationDate));
+ rel.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null);
+ rel.setProperties(properties);
+ return rel;
+ }
+
+ public static String getProvenance(DataInfo dataInfo) {
+ return Optional
+ .ofNullable(dataInfo)
+ .map(
+ d -> Optional
+ .ofNullable(d.getProvenanceaction())
+ .map(Qualifier::getClassid)
+ .orElse(""))
+ .orElse("");
+ }
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PICCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PICCleaningRule.java
new file mode 100644
index 000000000..a2213ed9f
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PICCleaningRule.java
@@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class PICCleaningRule {
+
+ public static final Pattern PATTERN = Pattern.compile("\\d{9}");
+
+ public static String clean(final String pic) {
+
+ Matcher m = PATTERN.matcher(pic);
+ if (m.find()) {
+ return m.group();
+ } else {
+ return "";
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidCleaner.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidCleaner.java
new file mode 100644
index 000000000..114c2b3af
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidCleaner.java
@@ -0,0 +1,62 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Optional;
+
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class PidCleaner {
+
+ /**
+ * Utility method that normalises PID values on a per-type basis.
+ * @param pid the PID whose value will be normalised.
+ * @return the PID containing the normalised value.
+ */
+ public static StructuredProperty normalizePidValue(StructuredProperty pid) {
+ pid
+ .setValue(
+ normalizePidValue(
+ pid.getQualifier().getClassid(),
+ pid.getValue()));
+
+ return pid;
+ }
+
+ public static String normalizePidValue(String pidType, String pidValue) {
+ String value = Optional
+ .ofNullable(pidValue)
+ .map(String::trim)
+ .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
+
+ switch (pidType) {
+
+ // TODO add cleaning for more PID types as needed
+
+ // Result
+ case "doi":
+ return DoiCleaningRule.clean(value);
+ case "pmid":
+ return PmidCleaningRule.clean(value);
+ case "pmc":
+ return PmcCleaningRule.clean(value);
+ case "handle":
+ case "arXiv":
+ return value;
+
+ // Organization
+ case "GRID":
+ return GridCleaningRule.clean(value);
+ case "ISNI":
+ return ISNICleaningRule.clean(value);
+ case "ROR":
+ return RorCleaningRule.clean(value);
+ case "PIC":
+ return PICCleaningRule.clean(value);
+ case "FundRef":
+ return FundRefCleaningRule.clean(value);
+ default:
+ return value;
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmcCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmcCleaningRule.java
new file mode 100644
index 000000000..903041d43
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmcCleaningRule.java
@@ -0,0 +1,24 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class PmcCleaningRule {
+
+ public static final Pattern PATTERN = Pattern.compile("PMC\\d{1,8}");
+
+ public static String clean(String pmc) {
+ String s = pmc
+ .replaceAll("\\s", "")
+ .toUpperCase();
+
+ final Matcher m = PATTERN.matcher(s);
+
+ if (m.find()) {
+ return m.group();
+ }
+ return "";
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java
new file mode 100644
index 000000000..c0c451b88
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRule.java
@@ -0,0 +1,25 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+// https://researchguides.stevens.edu/c.php?g=442331&p=6577176
+public class PmidCleaningRule {
+
+ public static final Pattern PATTERN = Pattern.compile("0*(\\d{1,8})");
+
+ public static String clean(String pmid) {
+ String s = pmid
+ .toLowerCase()
+ .replaceAll("\\s", "");
+
+ final Matcher m = PATTERN.matcher(s);
+
+ if (m.find()) {
+ return m.group(1);
+ }
+ return "";
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/RorCleaningRule.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/RorCleaningRule.java
new file mode 100644
index 000000000..f6685f19d
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/RorCleaningRule.java
@@ -0,0 +1,27 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+// https://ror.readme.io/docs/ror-identifier-pattern
+public class RorCleaningRule {
+
+ public static final String ROR_PREFIX = "https://ror.org/";
+
+ private static final Pattern PATTERN = Pattern.compile("(?0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2})");
+
+ public static String clean(String ror) {
+ String s = ror
+ .replaceAll("\\s", "")
+ .toLowerCase();
+
+ Matcher m = PATTERN.matcher(s);
+
+ if (m.find()) {
+ return ROR_PREFIX + m.group("ror");
+ }
+ return "";
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/SubjectProvenanceComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/SubjectProvenanceComparator.java
new file mode 100644
index 000000000..f4e3c8841
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/SubjectProvenanceComparator.java
@@ -0,0 +1,46 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
+import static org.apache.commons.lang3.StringUtils.isBlank;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.oaf.Subject;
+
+public class SubjectProvenanceComparator implements Comparator {
+
+ @Override
+ public int compare(Subject left, Subject right) {
+
+ String lProv = getProvenance(left.getDataInfo());
+ String rProv = getProvenance(right.getDataInfo());
+
+ if (isBlank(lProv) && isBlank(rProv))
+ return 0;
+ if (isBlank(lProv))
+ return 1;
+ if (isBlank(rProv))
+ return -1;
+ if (lProv.equals(rProv))
+ return 0;
+ if (lProv.toLowerCase().contains("crosswalk"))
+ return -1;
+ if (rProv.toLowerCase().contains("crosswalk"))
+ return 1;
+ if (lProv.toLowerCase().contains("user"))
+ return -1;
+ if (rProv.toLowerCase().contains("user"))
+ return 1;
+ if (lProv.toLowerCase().contains("propagation"))
+ return -1;
+ if (rProv.toLowerCase().contains("propagation"))
+ return 1;
+ if (lProv.toLowerCase().contains("iis"))
+ return -1;
+ if (rProv.toLowerCase().contains("iis"))
+ return 1;
+
+ return 0;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
index 5a59bc0df..e10d0c500 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java
@@ -75,9 +75,14 @@ public class DHPUtils {
final HttpGet req = new HttpGet(url);
+ log.info("MDStoreManager request: {}", req);
+
try (final CloseableHttpClient client = HttpClients.createDefault()) {
try (final CloseableHttpResponse response = client.execute(req)) {
final String json = IOUtils.toString(response.getEntity().getContent());
+
+ log.info("MDStoreManager response: {}", json);
+
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
return Arrays
.stream(mdstores)
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/common/input_maketar_parameters.json
similarity index 100%
rename from dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_maketar_parameters.json
rename to dhp-common/src/main/resources/eu/dnetlib/dhp/common/input_maketar_parameters.json
diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/common/name_particles.txt b/dhp-common/src/main/resources/eu/dnetlib/dhp/common/name_particles.txt
new file mode 100644
index 000000000..07cf06a98
--- /dev/null
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/common/name_particles.txt
@@ -0,0 +1,8 @@
+van
+von
+der
+de
+dell
+sig
+mr
+mrs
\ No newline at end of file
diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json
new file mode 100644
index 000000000..512878457
--- /dev/null
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oa/merge/group_graph_entities_parameters.json
@@ -0,0 +1,38 @@
+[
+ {
+ "paramName": "issm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "when true will stop SparkSession after job execution",
+ "paramRequired": false
+ },
+ {
+ "paramName": "gin",
+ "paramLongName": "graphInputPath",
+ "paramDescription": "the input graph root path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "cp",
+ "paramLongName": "checkpointPath",
+ "paramDescription": "checkpoint directory",
+ "paramRequired": true
+ },
+ {
+ "paramName": "out",
+ "paramLongName": "outputPath",
+ "paramDescription": "the output graph root path",
+ "paramRequired": true
+ },
+ {
+ "paramName": "fi",
+ "paramLongName": "filterInvisible",
+ "paramDescription": "if true filters out invisible entities",
+ "paramRequired": true
+ },
+ {
+ "paramName": "isu",
+ "paramLongName": "isLookupUrl",
+ "paramDescription": "url to the ISLookup Service",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json
new file mode 100644
index 000000000..355f38e2f
--- /dev/null
+++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json
@@ -0,0 +1,20 @@
+[
+ {
+ "paramName": "issm",
+ "paramLongName": "isSparkSessionManaged",
+ "paramDescription": "when true will stop SparkSession after job execution",
+ "paramRequired": false
+ },
+ {
+ "paramName": "hmu",
+ "paramLongName": "hiveMetastoreUris",
+ "paramDescription": "the hive metastore uris",
+ "paramRequired": true
+ },
+ {
+ "paramName": "sql",
+ "paramLongName": "sql",
+ "paramDescription": "sql script to execute",
+ "paramRequired": true
+ }
+]
\ No newline at end of file
diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
new file mode 100644
index 000000000..f8afe9af4
--- /dev/null
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala
@@ -0,0 +1,73 @@
+package eu.dnetlib.dhp.application
+
+import scala.io.Source
+
+/** This is the main Interface SparkApplication
+ * where all the Spark Scala class should inherit
+ */
+trait SparkScalaApplication {
+
+ /** This is the path in the classpath of the json
+ * describes all the argument needed to run
+ */
+ val propertyPath: String
+
+ /** Utility to parse the arguments using the
+ * property json in the classpath identified from
+ * the variable propertyPath
+ *
+ * @param args the list of arguments
+ */
+ def parseArguments(args: Array[String]): ArgumentApplicationParser = {
+ val parser = new ArgumentApplicationParser(
+ Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString
+ )
+ parser.parseArgument(args)
+ parser
+ }
+
+ /** Here all the spark applications runs this method
+ * where the whole logic of the spark node is defined
+ */
+ def run(): Unit
+}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.slf4j.Logger
+
+abstract class AbstractScalaApplication(
+ val propertyPath: String,
+ val args: Array[String],
+ log: Logger
+) extends SparkScalaApplication {
+
+ var parser: ArgumentApplicationParser = null
+
+ var spark: SparkSession = null
+
+ def initialize(): SparkScalaApplication = {
+ parser = parseArguments(args)
+ spark = createSparkSession()
+ this
+ }
+
+ /** Utility for creating a spark session starting from parser
+ *
+ * @return a spark Session
+ */
+ private def createSparkSession(): SparkSession = {
+ require(parser != null)
+
+ val conf: SparkConf = new SparkConf()
+ val master = parser.get("master")
+ log.info(s"Creating Spark session: Master: $master")
+ SparkSession
+ .builder()
+ .config(conf)
+ .appName(getClass.getSimpleName)
+ .master(master)
+ .getOrCreate()
+ }
+
+}
diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogModel.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogModel.scala
new file mode 100644
index 000000000..d74ec3f69
--- /dev/null
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogModel.scala
@@ -0,0 +1,10 @@
+package eu.dnetlib.dhp.application.dedup.log
+
+case class DedupLogModel(
+ tag: String,
+ configuration: String,
+ entity: String,
+ startTS: Long,
+ endTS: Long,
+ totalMs: Long
+) {}
diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogWriter.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogWriter.scala
new file mode 100644
index 000000000..4409c01d9
--- /dev/null
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/dedup/log/DedupLogWriter.scala
@@ -0,0 +1,14 @@
+package eu.dnetlib.dhp.application.dedup.log
+
+import org.apache.spark.sql.{SaveMode, SparkSession}
+
+class DedupLogWriter(path: String) {
+
+ def appendLog(dedupLogModel: DedupLogModel, spark: SparkSession): Unit = {
+ import spark.implicits._
+ val df = spark.createDataset[DedupLogModel](data = List(dedupLogModel))
+ df.write.mode(SaveMode.Append).save(path)
+
+ }
+
+}
diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala
new file mode 100644
index 000000000..a995016a8
--- /dev/null
+++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala
@@ -0,0 +1,442 @@
+package eu.dnetlib.dhp.sx.graph.scholix
+
+import eu.dnetlib.dhp.schema.oaf.{Publication, Relation, Result, StructuredProperty}
+import eu.dnetlib.dhp.schema.sx.scholix._
+import eu.dnetlib.dhp.schema.sx.summary.{CollectedFromType, SchemeValue, ScholixSummary, Typology}
+import eu.dnetlib.dhp.utils.DHPUtils
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.{Encoder, Encoders}
+import org.json4s
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods.parse
+import scala.collection.JavaConverters._
+import scala.io.Source
+
+object ScholixUtils extends Serializable {
+
+ val DNET_IDENTIFIER_SCHEMA: String = "DNET Identifier"
+
+ val DATE_RELATION_KEY: String = "RelationDate"
+
+ case class RelationVocabulary(original: String, inverse: String) {}
+
+ case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {}
+
+ val relations: Map[String, RelationVocabulary] = {
+ val input = Source
+ .fromInputStream(
+ getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json")
+ )
+ .mkString
+ implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
+
+ lazy val json: json4s.JValue = parse(input)
+
+ json.extract[Map[String, RelationVocabulary]]
+ }
+
+ def extractRelationDate(relation: Relation): String = {
+
+ if (relation.getProperties == null || !relation.getProperties.isEmpty)
+ null
+ else {
+ val date = relation.getProperties.asScala
+ .find(p => DATE_RELATION_KEY.equalsIgnoreCase(p.getKey))
+ .map(p => p.getValue)
+ if (date.isDefined)
+ date.get
+ else
+ null
+ }
+ }
+
+ def extractRelationDate(summary: ScholixSummary): String = {
+
+ if (summary.getDate == null || summary.getDate.isEmpty)
+ null
+ else {
+ summary.getDate.get(0)
+ }
+ }
+
+ def inverseRelationShip(rel: ScholixRelationship): ScholixRelationship = {
+ new ScholixRelationship(rel.getInverse, rel.getSchema, rel.getName)
+
+ }
+
+ def generateScholixResourceFromResult(r: Result): ScholixResource = {
+ generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r))
+ }
+
+ val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] =
+ new Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] with Serializable {
+ override def zero: RelatedEntities = null
+
+ override def reduce(b: RelatedEntities, a: (String, String, Long)): RelatedEntities = {
+ val relatedDataset = if ("dataset".equalsIgnoreCase(a._2)) a._3 else 0
+ val relatedPublication = if ("publication".equalsIgnoreCase(a._2)) a._3 else 0
+
+ if (b == null)
+ RelatedEntities(a._1, relatedDataset, relatedPublication)
+ else
+ RelatedEntities(
+ a._1,
+ b.relatedDataset + relatedDataset,
+ b.relatedPublication + relatedPublication
+ )
+ }
+
+ override def merge(b1: RelatedEntities, b2: RelatedEntities): RelatedEntities = {
+ if (b1 != null && b2 != null)
+ RelatedEntities(
+ b1.id,
+ b1.relatedDataset + b2.relatedDataset,
+ b1.relatedPublication + b2.relatedPublication
+ )
+ else if (b1 != null)
+ b1
+ else
+ b2
+ }
+
+ override def finish(reduction: RelatedEntities): RelatedEntities = reduction
+
+ override def bufferEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities])
+
+ override def outputEncoder: Encoder[RelatedEntities] = Encoders.bean(classOf[RelatedEntities])
+ }
+
+ val scholixAggregator: Aggregator[(String, Scholix), Scholix, Scholix] =
+ new Aggregator[(String, Scholix), Scholix, Scholix] with Serializable {
+ override def zero: Scholix = null
+
+ def scholix_complete(s: Scholix): Boolean = {
+ if (s == null || s.getIdentifier == null) {
+ false
+ } else if (s.getSource == null || s.getTarget == null) {
+ false
+ } else if (s.getLinkprovider == null || s.getLinkprovider.isEmpty)
+ false
+ else
+ true
+ }
+
+ override def reduce(b: Scholix, a: (String, Scholix)): Scholix = {
+ if (scholix_complete(b)) b else a._2
+ }
+
+ override def merge(b1: Scholix, b2: Scholix): Scholix = {
+ if (scholix_complete(b1)) b1 else b2
+ }
+
+ override def finish(reduction: Scholix): Scholix = reduction
+
+ override def bufferEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
+
+ override def outputEncoder: Encoder[Scholix] = Encoders.kryo[Scholix]
+ }
+
+ def createInverseScholixRelation(scholix: Scholix): Scholix = {
+ val s = new Scholix
+ s.setPublicationDate(scholix.getPublicationDate)
+ s.setPublisher(scholix.getPublisher)
+ s.setLinkprovider(scholix.getLinkprovider)
+ s.setRelationship(inverseRelationShip(scholix.getRelationship))
+ s.setSource(scholix.getTarget)
+ s.setTarget(scholix.getSource)
+ s.setIdentifier(
+ DHPUtils.md5(
+ s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
+ )
+ )
+ s
+
+ }
+
+ def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = {
+ if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) {
+ val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d =>
+ new ScholixEntityId(d.getProvider.getName, d.getProvider.getIdentifiers)
+ }(collection.breakOut)
+ l
+ } else List()
+ }
+
+ def extractCollectedFrom(summary: ScholixSummary): List[ScholixEntityId] = {
+ if (summary.getDatasources != null && !summary.getDatasources.isEmpty) {
+ val l: List[ScholixEntityId] = summary.getDatasources.asScala.map { d =>
+ new ScholixEntityId(
+ d.getDatasourceName,
+ List(new ScholixIdentifier(d.getDatasourceId, "DNET Identifier", null)).asJava
+ )
+ }(collection.breakOut)
+ l
+ } else List()
+ }
+
+ def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = {
+ if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) {
+
+ val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c =>
+ new ScholixEntityId(
+ c.getValue,
+ List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava
+ )
+ }.toList
+ l
+ } else List()
+ }
+
+ def generateCompleteScholix(scholix: Scholix, target: ScholixSummary): Scholix = {
+ val s = new Scholix
+ s.setPublicationDate(scholix.getPublicationDate)
+ s.setPublisher(scholix.getPublisher)
+ s.setLinkprovider(scholix.getLinkprovider)
+ s.setRelationship(scholix.getRelationship)
+ s.setSource(scholix.getSource)
+ s.setTarget(generateScholixResourceFromSummary(target))
+ s.setIdentifier(
+ DHPUtils.md5(
+ s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
+ )
+ )
+ s
+ }
+
+ def generateCompleteScholix(scholix: Scholix, target: ScholixResource): Scholix = {
+ val s = new Scholix
+ s.setPublicationDate(scholix.getPublicationDate)
+ s.setPublisher(scholix.getPublisher)
+ s.setLinkprovider(scholix.getLinkprovider)
+ s.setRelationship(scholix.getRelationship)
+ s.setSource(scholix.getSource)
+ s.setTarget(target)
+ s.setIdentifier(
+ DHPUtils.md5(
+ s"${s.getSource.getIdentifier}::${s.getRelationship.getName}::${s.getTarget.getIdentifier}"
+ )
+ )
+ s
+ }
+
+ def generateScholixResourceFromSummary(summaryObject: ScholixSummary): ScholixResource = {
+ val r = new ScholixResource
+ r.setIdentifier(summaryObject.getLocalIdentifier)
+ r.setDnetIdentifier(summaryObject.getId)
+
+ r.setObjectType(summaryObject.getTypology.toString)
+ r.setObjectSubType(summaryObject.getSubType)
+
+ if (summaryObject.getTitle != null && !summaryObject.getTitle.isEmpty)
+ r.setTitle(summaryObject.getTitle.get(0))
+
+ if (summaryObject.getAuthor != null && !summaryObject.getAuthor.isEmpty) {
+ val l: List[ScholixEntityId] =
+ summaryObject.getAuthor.asScala.map(a => new ScholixEntityId(a, null)).toList
+ if (l.nonEmpty)
+ r.setCreator(l.asJava)
+ }
+
+ if (summaryObject.getDate != null && !summaryObject.getDate.isEmpty)
+ r.setPublicationDate(summaryObject.getDate.get(0))
+ if (summaryObject.getPublisher != null && !summaryObject.getPublisher.isEmpty) {
+ val plist: List[ScholixEntityId] =
+ summaryObject.getPublisher.asScala.map(p => new ScholixEntityId(p, null)).toList
+
+ if (plist.nonEmpty)
+ r.setPublisher(plist.asJava)
+ }
+
+ if (summaryObject.getDatasources != null && !summaryObject.getDatasources.isEmpty) {
+
+ val l: List[ScholixCollectedFrom] = summaryObject.getDatasources.asScala
+ .map(c =>
+ new ScholixCollectedFrom(
+ new ScholixEntityId(
+ c.getDatasourceName,
+ List(new ScholixIdentifier(c.getDatasourceId, DNET_IDENTIFIER_SCHEMA, null)).asJava
+ ),
+ "collected",
+ "complete"
+ )
+ )
+ .toList
+
+ if (l.nonEmpty)
+ r.setCollectedFrom(l.asJava)
+
+ }
+ r
+ }
+
+ def scholixFromSource(relation: Relation, source: ScholixResource): Scholix = {
+ if (relation == null || source == null)
+ return null
+ val s = new Scholix
+ var l: List[ScholixEntityId] = extractCollectedFrom(relation)
+ if (l.isEmpty)
+ l = extractCollectedFrom(source)
+ if (l.isEmpty)
+ return null
+ s.setLinkprovider(l.asJava)
+ var d = extractRelationDate(relation)
+ if (d == null)
+ d = source.getPublicationDate
+
+ s.setPublicationDate(d)
+
+ if (source.getPublisher != null && !source.getPublisher.isEmpty) {
+ s.setPublisher(source.getPublisher)
+ }
+
+ val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
+ if (semanticRelation == null)
+ return null
+ s.setRelationship(
+ new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
+ )
+ s.setSource(source)
+
+ s
+ }
+
+ def scholixFromSource(relation: Relation, source: ScholixSummary): Scholix = {
+
+ if (relation == null || source == null)
+ return null
+
+ val s = new Scholix
+
+ var l: List[ScholixEntityId] = extractCollectedFrom(relation)
+ if (l.isEmpty)
+ l = extractCollectedFrom(source)
+ if (l.isEmpty)
+ return null
+
+ s.setLinkprovider(l.asJava)
+
+ var d = extractRelationDate(relation)
+ if (d == null)
+ d = extractRelationDate(source)
+
+ s.setPublicationDate(d)
+
+ if (source.getPublisher != null && !source.getPublisher.isEmpty) {
+ val l: List[ScholixEntityId] = source.getPublisher.asScala
+ .map { p =>
+ new ScholixEntityId(p, null)
+ }(collection.breakOut)
+
+ if (l.nonEmpty)
+ s.setPublisher(l.asJava)
+ }
+
+ val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
+ if (semanticRelation == null)
+ return null
+ s.setRelationship(
+ new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse)
+ )
+ s.setSource(generateScholixResourceFromSummary(source))
+
+ s
+ }
+
+ def findURLForPID(
+ pidValue: List[StructuredProperty],
+ urls: List[String]
+ ): List[(StructuredProperty, String)] = {
+ pidValue.map { p =>
+ val pv = p.getValue
+
+ val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase))
+ (p, r.orNull)
+ }
+ }
+
+ def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = {
+ if (r.getInstance() == null || r.getInstance().isEmpty)
+ return List()
+ r.getInstance()
+ .asScala
+ .filter(i => i.getUrl != null && !i.getUrl.isEmpty)
+ .filter(i => i.getPid != null && i.getUrl != null)
+ .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList))
+ .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2))
+ .distinct
+ .toList
+ }
+
+ def resultToSummary(r: Result): ScholixSummary = {
+ val s = new ScholixSummary
+ s.setId(r.getId)
+ if (r.getPid == null || r.getPid.isEmpty)
+ return null
+
+ val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(r)
+ if (persistentIdentifiers.isEmpty)
+ return null
+ s.setLocalIdentifier(persistentIdentifiers.asJava)
+ if (r.isInstanceOf[Publication])
+ s.setTypology(Typology.publication)
+ else
+ s.setTypology(Typology.dataset)
+
+ s.setSubType(r.getInstance().get(0).getInstancetype.getClassname)
+
+ if (r.getTitle != null && r.getTitle.asScala.nonEmpty) {
+ val titles: List[String] = r.getTitle.asScala.map(t => t.getValue).toList
+ if (titles.nonEmpty)
+ s.setTitle(titles.asJava)
+ else
+ return null
+ }
+
+ if (r.getAuthor != null && !r.getAuthor.isEmpty) {
+ val authors: List[String] = r.getAuthor.asScala.map(a => a.getFullname).toList
+ if (authors.nonEmpty)
+ s.setAuthor(authors.asJava)
+ }
+ if (r.getInstance() != null) {
+ val dt: List[String] = r
+ .getInstance()
+ .asScala
+ .filter(i => i.getDateofacceptance != null)
+ .map(i => i.getDateofacceptance.getValue)
+ .toList
+ if (dt.nonEmpty)
+ s.setDate(dt.distinct.asJava)
+ }
+ if (r.getDescription != null && !r.getDescription.isEmpty) {
+ val d = r.getDescription.asScala.find(f => f != null && f.getValue != null)
+ if (d.isDefined)
+ s.setDescription(d.get.getValue)
+ }
+
+ if (r.getSubject != null && !r.getSubject.isEmpty) {
+ val subjects: List[SchemeValue] = r.getSubject.asScala
+ .map(s => new SchemeValue(s.getQualifier.getClassname, s.getValue))
+ .toList
+ if (subjects.nonEmpty)
+ s.setSubject(subjects.asJava)
+ }
+
+ if (r.getPublisher != null)
+ s.setPublisher(List(r.getPublisher.getValue).asJava)
+
+ if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) {
+ val cf: List[CollectedFromType] = r.getCollectedfrom.asScala
+ .map(c => new CollectedFromType(c.getValue, c.getKey, "complete"))
+ .toList
+ if (cf.nonEmpty)
+ s.setDatasources(cf.distinct.asJava)
+ }
+
+ s.setRelatedDatasets(0)
+ s.setRelatedPublications(0)
+ s.setRelatedUnknown(0)
+
+ s
+ }
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java
new file mode 100644
index 000000000..f87f6e313
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/MdStoreClientTest.java
@@ -0,0 +1,36 @@
+
+package eu.dnetlib.dhp.common;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class MdStoreClientTest {
+
+ // @Test
+ public void testMongoCollection() throws IOException {
+ final MdstoreClient client = new MdstoreClient("mongodb://localhost:27017", "mdstore");
+
+ final ObjectMapper mapper = new ObjectMapper();
+
+ final List infos = client.mdStoreWithTimestamp("ODF", "store", "cleaned");
+
+ infos.forEach(System.out::println);
+
+ final String s = mapper.writeValueAsString(infos);
+
+ Path fileName = Paths.get("/Users/sandro/mdstore_info.json");
+
+ // Writing into the file
+ Files.write(fileName, s.getBytes(StandardCharsets.UTF_8));
+
+ }
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
index 2ccaed3e4..92c1dcda3 100644
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java
@@ -33,7 +33,7 @@ class ZenodoAPIClientTest {
InputStream is = new FileInputStream(file);
- Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
+ Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
@@ -56,7 +56,7 @@ class ZenodoAPIClientTest {
InputStream is = new FileInputStream(file);
- Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz", file.length()));
+ Assertions.assertEquals(200, client.uploadIS(is, "COVID-19.json.gz"));
String metadata = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/common/api/metadata.json"));
@@ -80,7 +80,7 @@ class ZenodoAPIClientTest {
InputStream is = new FileInputStream(file);
- Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
+ Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
Assertions.assertEquals(202, client.publish());
@@ -100,7 +100,7 @@ class ZenodoAPIClientTest {
InputStream is = new FileInputStream(file);
- Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition", file.length()));
+ Assertions.assertEquals(200, client.uploadIS(is, "newVersion_deposition"));
Assertions.assertEquals(202, client.publish());
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java
deleted file mode 100644
index 3a7a41a1b..000000000
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java
+++ /dev/null
@@ -1,100 +0,0 @@
-
-package eu.dnetlib.dhp.oa.merge;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.IOException;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import eu.dnetlib.dhp.schema.oaf.Author;
-import eu.dnetlib.dhp.schema.oaf.Publication;
-import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
-import eu.dnetlib.pace.util.MapDocumentUtil;
-import scala.Tuple2;
-
-class AuthorMergerTest {
-
- private String publicationsBasePath;
-
- private List> authors;
-
- @BeforeEach
- public void setUp() throws Exception {
-
- publicationsBasePath = Paths
- .get(AuthorMergerTest.class.getResource("/eu/dnetlib/dhp/oa/merge").toURI())
- .toFile()
- .getAbsolutePath();
-
- authors = readSample(publicationsBasePath + "/publications_with_authors.json", Publication.class)
- .stream()
- .map(p -> p._2().getAuthor())
- .collect(Collectors.toList());
-
- }
-
- @Test
- void mergeTest() { // used in the dedup: threshold set to 0.95
-
- for (List authors1 : authors) {
- System.out.println("List " + (authors.indexOf(authors1) + 1));
- for (Author author : authors1) {
- System.out.println(authorToString(author));
- }
- }
-
- List merge = AuthorMerger.merge(authors);
-
- System.out.println("Merge ");
- for (Author author : merge) {
- System.out.println(authorToString(author));
- }
-
- Assertions.assertEquals(7, merge.size());
-
- }
-
- public List> readSample(String path, Class clazz) {
- List> res = new ArrayList<>();
- BufferedReader reader;
- try {
- reader = new BufferedReader(new FileReader(path));
- String line = reader.readLine();
- while (line != null) {
- res
- .add(
- new Tuple2<>(
- MapDocumentUtil.getJPathString("$.id", line),
- new ObjectMapper().readValue(line, clazz)));
- // read next line
- line = reader.readLine();
- }
- reader.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- return res;
- }
-
- public String authorToString(Author a) {
-
- String print = "Fullname = ";
- print += a.getFullname() + " pid = [";
- if (a.getPid() != null)
- for (StructuredProperty sp : a.getPid()) {
- print += sp.toComparableString() + " ";
- }
- print += "]";
- return print;
- }
-}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/GridCleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/GridCleaningRuleTest.java
new file mode 100644
index 000000000..1b9163d46
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/GridCleaningRuleTest.java
@@ -0,0 +1,18 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class GridCleaningRuleTest {
+
+ @Test
+ void testCleaning() {
+ assertEquals("grid.493784.5", GridCleaningRule.clean("grid.493784.5"));
+ assertEquals("grid.493784.5x", GridCleaningRule.clean("grid.493784.5x"));
+ assertEquals("grid.493784.5x", GridCleaningRule.clean("493784.5x"));
+ assertEquals("", GridCleaningRule.clean("493x784.5x"));
+ }
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/ISNICleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/ISNICleaningRuleTest.java
new file mode 100644
index 000000000..e51d1e05c
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/ISNICleaningRuleTest.java
@@ -0,0 +1,19 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class ISNICleaningRuleTest {
+
+ @Test
+ void testCleaning() {
+ assertEquals("0000000463436020", ISNICleaningRule.clean("0000 0004 6343 6020"));
+ assertEquals("0000000463436020", ISNICleaningRule.clean("0000000463436020"));
+ assertEquals("", ISNICleaningRule.clean("Q30256598"));
+ assertEquals("0000000493403529", ISNICleaningRule.clean("ISNI:0000000493403529"));
+ assertEquals("000000008614884X", ISNICleaningRule.clean("0000 0000 8614 884X"));
+ }
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
index 4068f0abb..9111ac2df 100644
--- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java
@@ -44,105 +44,104 @@ class OafMapperUtilsTest {
@Test
void testDateValidation() {
- assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
- assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
- assertTrue(GraphCleaningFunctions.doCleanDate(" 2016-04-05").isPresent());
+ assertNotNull(GraphCleaningFunctions.cleanDate("2016-05-07T12:41:19.202Z "));
+ assertNotNull(GraphCleaningFunctions.cleanDate("2020-09-10 11:08:52 "));
+ assertNotNull(GraphCleaningFunctions.cleanDate(" 2016-04-05"));
- assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
+ assertEquals("2016-04-05", GraphCleaningFunctions.cleanDate("2016 Apr 05"));
- assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
- assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 2006").get());
- assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 MST 2006").get());
- assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
- assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
- assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
- assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
- assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
- assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
- assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
+ assertEquals("2009-05-08", GraphCleaningFunctions.cleanDate("May 8, 2009 5:57:51 PM"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, 1970"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct 7, '70"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 1970"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("oct. 7, 70"));
+ assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 2006"));
+ assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 2 15:04:05 MST 2006"));
+ assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon Jan 02 15:04:05 -0700 2006"));
+ assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Monday, 02-Jan-06 15:04:05 MST"));
+ assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 MST"));
+ assertEquals("2017-07-11", GraphCleaningFunctions.cleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)"));
+ assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("Mon, 02 Jan 2006 15:04:05 -0700"));
+ assertEquals("2018-01-04", GraphCleaningFunctions.cleanDate("Thu, 4 Jan 2018 17:53:36 +0000"));
+ assertEquals("2015-08-10", GraphCleaningFunctions.cleanDate("Mon Aug 10 15:44:11 UTC+0100 2015"));
assertEquals(
"2015-07-03",
- GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
- assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
- assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
- assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
- assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
- assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
- assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
- assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
- assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
- assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
- assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
- assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
- assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
- assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
- assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
- assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
- assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
- assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
- assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
- assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
- assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
- assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
- assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
- assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
- assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
- assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
- assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
- assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
- assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
- assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
- assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
- assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
- assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
- assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
- assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
- assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
- assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
- assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
- assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
- assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
- assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
- assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
- assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
- assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
- assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
- assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
- assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
- assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
- assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
- assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
- assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
- assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
- assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
- assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
+ GraphCleaningFunctions.cleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)"));
+ assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 10:09am"));
+ assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012 at 10:09am PST-08"));
+ assertEquals("2012-09-17", GraphCleaningFunctions.cleanDate("September 17, 2012, 10:10:09"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7, 1970"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("October 7th, 1970"));
+ assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006, 19:17"));
+ assertEquals("2006-02-12", GraphCleaningFunctions.cleanDate("12 Feb 2006 19:17"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 70"));
+ assertEquals("1970-10-07", GraphCleaningFunctions.cleanDate("7 oct 1970"));
+ assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("03 February 2013"));
+ assertEquals("2013-07-01", GraphCleaningFunctions.cleanDate("1 July 2013"));
+ assertEquals("2013-02-03", GraphCleaningFunctions.cleanDate("2013-Feb-03"));
+ assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3/31/2014"));
+ assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03/31/2014"));
+ assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08/21/71"));
+ assertEquals("1971-01-08", GraphCleaningFunctions.cleanDate("8/1/71"));
+ assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/2014 22:05"));
+ assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("04/08/2014 22:05"));
+ assertEquals("2014-08-04", GraphCleaningFunctions.cleanDate("4/8/14 22:05"));
+ assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("04/2/2014 03:00:51"));
+ assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00:00 AM"));
+ assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00:01 PM"));
+ assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 01:00 PM"));
+ assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 1:00 PM"));
+ assertEquals("1965-08-08", GraphCleaningFunctions.cleanDate("8/8/1965 12:00 AM"));
+ assertEquals("2014-02-04", GraphCleaningFunctions.cleanDate("4/02/2014 03:00:51"));
+ assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59"));
+ assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("03/19/2012 10:11:59.3186369"));
+ assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/3/31"));
+ assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("2014/03/31"));
+ assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/4/8 22:05"));
+ assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014/04/08 22:05"));
+ assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/04/2 03:00:51"));
+ assertEquals("2014-04-02", GraphCleaningFunctions.cleanDate("2014/4/02 03:00:51"));
+ assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59"));
+ assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("2012/03/19 10:11:59.3186369"));
+ assertEquals("2014-04-08", GraphCleaningFunctions.cleanDate("2014年04月08日"));
+ assertEquals("2006-01-02", GraphCleaningFunctions.cleanDate("2006-01-02T15:04:05+0000"));
+ assertEquals("2009-08-13", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09-07:00"));
+ assertEquals("2009-08-12", GraphCleaningFunctions.cleanDate("2009-08-12T22:15:09"));
+ assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.3186369"));
+ assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000"));
+ assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 17:24:37.123"));
+ assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43"));
+ assertEquals("2013-04-01", GraphCleaningFunctions.cleanDate("2013-04-01 22:43:22"));
+ assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 UTC"));
+ assertEquals("2014-12-16", GraphCleaningFunctions.cleanDate("2014-12-16 06:20:00 GMT"));
+ assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 05:24:37 PM"));
+ assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800"));
+ assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:43 +0800 +08"));
+ assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26 13:13:44 +09:00"));
+ assertEquals("2012-08-03", GraphCleaningFunctions.cleanDate("2012-08-03 18:31:59.257000000 +0000 UTC"));
+ assertEquals("2015-09-30", GraphCleaningFunctions.cleanDate("2015-09-30 18:48:56.35272715 +0000 UTC"));
+ assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 GMT"));
+ assertEquals("2015-02-18", GraphCleaningFunctions.cleanDate("2015-02-18 00:12:00 +0000 UTC"));
assertEquals(
- "2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
+ "2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001"));
assertEquals(
- "2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
- assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
- assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
- assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
- assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
- assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
- assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
- assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
- assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
- assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
- assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
- assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
- assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
- assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
- assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
- assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
- assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
+ "2015-02-08", GraphCleaningFunctions.cleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001"));
+ assertEquals("2017-07-19", GraphCleaningFunctions.cleanDate("2017-07-19 03:21:51+00:00"));
+ assertEquals("2014-04-26", GraphCleaningFunctions.cleanDate("2014-04-26"));
+ assertEquals("2014-04-01", GraphCleaningFunctions.cleanDate("2014-04"));
+ assertEquals("2014-01-01", GraphCleaningFunctions.cleanDate("2014"));
+ assertEquals("2014-05-11", GraphCleaningFunctions.cleanDate("2014-05-11 08:20:13,787"));
+ assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("3.31.2014"));
+ assertEquals("2014-03-31", GraphCleaningFunctions.cleanDate("03.31.2014"));
+ assertEquals("1971-08-21", GraphCleaningFunctions.cleanDate("08.21.71"));
+ assertEquals("2014-03-01", GraphCleaningFunctions.cleanDate("2014.03"));
+ assertEquals("2014-03-30", GraphCleaningFunctions.cleanDate("2014.03.30"));
+ assertEquals("2014-06-01", GraphCleaningFunctions.cleanDate("20140601"));
+ assertEquals("2014-07-22", GraphCleaningFunctions.cleanDate("20140722105203"));
+ assertEquals("2012-03-19", GraphCleaningFunctions.cleanDate("1332151919"));
+ assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
+ assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
+ assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
}
@@ -185,6 +184,22 @@ class OafMapperUtilsTest {
.getClassid());
}
+ @Test
+ void testDelegatedAuthority() throws IOException {
+ Dataset d1 = read("dataset_2.json", Dataset.class);
+ Dataset d2 = read("dataset_delegated.json", Dataset.class);
+
+ assertEquals(1, d2.getCollectedfrom().size());
+ assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
+
+ Result res = OafMapperUtils.mergeResults(d1, d2);
+
+ assertEquals(d2, res);
+
+ System.out.println(OBJECT_MAPPER.writeValueAsString(res));
+
+ }
+
protected HashSet cfId(List collectedfrom) {
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PICCleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PICCleaningRuleTest.java
new file mode 100644
index 000000000..3736033c3
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PICCleaningRuleTest.java
@@ -0,0 +1,19 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class PICCleaningRuleTest {
+
+ @Test
+ void testCleaning() {
+ assertEquals("887624982", PICCleaningRule.clean("887624982"));
+ assertEquals("", PICCleaningRule.clean("887 624982"));
+ assertEquals("887624982", PICCleaningRule.clean(" 887624982 "));
+ assertEquals("887624982", PICCleaningRule.clean(" 887624982x "));
+ assertEquals("887624982", PICCleaningRule.clean(" 88762498200 "));
+ }
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmcCleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmcCleaningRuleTest.java
new file mode 100644
index 000000000..e53ebae89
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmcCleaningRuleTest.java
@@ -0,0 +1,19 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class PmcCleaningRuleTest {
+
+ @Test
+ void testCleaning() {
+ assertEquals("PMC1234", PmcCleaningRule.clean("PMC1234"));
+ assertEquals("PMC1234", PmcCleaningRule.clean(" PMC1234"));
+ assertEquals("PMC12345678", PmcCleaningRule.clean("PMC12345678"));
+ assertEquals("PMC12345678", PmcCleaningRule.clean("PMC123456789"));
+ assertEquals("PMC12345678", PmcCleaningRule.clean("PMC 12345678"));
+ }
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java
new file mode 100644
index 000000000..295eac85f
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/PmidCleaningRuleTest.java
@@ -0,0 +1,24 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class PmidCleaningRuleTest {
+
+ @Test
+ void testCleaning() {
+ // leading zeros are removed
+ assertEquals("1234", PmidCleaningRule.clean("01234"));
+ // tolerant to spaces in the middle
+ assertEquals("1234567", PmidCleaningRule.clean("0123 4567"));
+ // stop parsing at first not numerical char
+ assertEquals("123", PmidCleaningRule.clean("0123x4567"));
+ // invalid id leading to empty result
+ assertEquals("", PmidCleaningRule.clean("abc"));
+ // valid id with zeroes in the number
+ assertEquals("20794075", PmidCleaningRule.clean("20794075"));
+ }
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/RorCleaningRuleTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/RorCleaningRuleTest.java
new file mode 100644
index 000000000..5d5c03959
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/RorCleaningRuleTest.java
@@ -0,0 +1,17 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+class RorCleaningRuleTest {
+
+ @Test
+ void testCleaning() {
+ assertEquals("https://ror.org/05rpz9w55", RorCleaningRule.clean("https://ror.org/05rpz9w55"));
+ assertEquals("https://ror.org/05rpz9w55", RorCleaningRule.clean("05rpz9w55"));
+ assertEquals("", RorCleaningRule.clean("05rpz9w_55"));
+ }
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
new file mode 100644
index 000000000..c0a8d6927
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/oa/merge/AuthorMergerTest.java
@@ -0,0 +1,114 @@
+
+package eu.dnetlib.oa.merge;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Objects;
+
+import org.junit.jupiter.api.Test;
+import org.junit.platform.commons.util.StringUtils;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.merge.AuthorMerger;
+import eu.dnetlib.dhp.schema.oaf.Author;
+
+public class AuthorMergerTest {
+
+ @Test
+ public void testEnrcichAuthor() throws Exception {
+ final ObjectMapper mapper = new ObjectMapper();
+
+ BufferedReader pr = new BufferedReader(new InputStreamReader(
+ Objects
+ .requireNonNull(
+ AuthorMergerTest.class
+ .getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json"))));
+ BufferedReader or = new BufferedReader(new InputStreamReader(
+ Objects
+ .requireNonNull(
+ AuthorMergerTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json"))));
+
+ TypeReference> aclass = new TypeReference>() {
+ };
+ String pubLine;
+
+ int i = 0;
+ while ((pubLine = pr.readLine()) != null) {
+ final String pubId = pubLine;
+ final String MatchPidOrcid = or.readLine();
+ final String pubOrcid = or.readLine();
+
+ final String data = pr.readLine();
+
+ if (StringUtils.isNotBlank(data)) {
+ List publicationAuthors = mapper.readValue(data, aclass);
+ List orcidAuthors = mapper.readValue(or.readLine(), aclass);
+ System.out.printf("OAF ID = %s \n", pubId);
+ System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
+ System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
+ System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
+ System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);
+
+ long originalAuthorWithPiD = publicationAuthors
+ .stream()
+ .filter(
+ a -> a.getPid() != null && a
+ .getPid()
+ .stream()
+ .anyMatch(
+ p -> p.getQualifier() != null
+ && p.getQualifier().getClassid().toLowerCase().contains("orcid")))
+ .count();
+ long start = System.currentTimeMillis();
+
+// final List enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
+ final List enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
+
+ long enrichedAuthorWithPid = enrichedList
+ .stream()
+ .filter(
+ a -> a.getPid() != null && a
+ .getPid()
+ .stream()
+ .anyMatch(
+ p -> p.getQualifier() != null
+ && p.getQualifier().getClassid().toLowerCase().contains("orcid")))
+ .count();
+
+ long totalTime = (System.currentTimeMillis() - start) / 1000;
+ System.out
+ .printf(
+ "Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
+ enrichedAuthorWithPid);
+
+ System.out.println("=================");
+ }
+ }
+ }
+
+ @Test
+ public void checkSimilarityTest() {
+ final Author left = new Author();
+ left.setName("Anand");
+ left.setSurname("Rachna");
+ left.setFullname("Anand, Rachna");
+
+ System.out.println(AuthorMerger.normalizeFullName(left.getFullname()));
+
+ final Author right = new Author();
+ right.setName("Rachna");
+ right.setSurname("Anand");
+ right.setFullname("Rachna, Anand");
+// System.out.println(AuthorMerger.normalize(right.getFullname()));
+ boolean same = AuthorMerger.checkORCIDSimilarity(left, right);
+
+ assertTrue(same);
+
+ }
+
+}
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
new file mode 100644
index 000000000..ec521b3b7
--- /dev/null
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_orcid_sample.json
@@ -0,0 +1,3 @@
+WrappedArray(arXiv1507.08202)
+50|arXiv_dedup_::34e03f2336b8b28286550425e65634ea
+[{"fullname":"Liron Barak","name":"Liron","surname":"Barak","rank":null,"pid":[{"value":"0000-0002-3436-2726","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Giuseppe Avolio","name":"Giuseppe","surname":"Avolio","rank":null,"pid":[{"value":"0000-0003-2664-3437","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tobias Golling","name":"Tobias","surname":"Golling","rank":null,"pid":[{"value":"0000-0001-8535-6687","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ivan Sykora","name":"Ivan","surname":"Sykora","rank":null,"pid":[{"value":"0000-0003-3447-5621","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Joao Carvalho","name":"Joao","surname":"Carvalho","rank":null,"pid":[{"value":"0000-0002-3015-7821","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jonathan David Long","name":"Jonathan David","surname":"Long","rank":null,"pid":[{"value":"0000-0002-2115-9382","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Heather Russell","name":"Heather","surname":"Russell","rank":null,"pid":[{"value":"0000-0003-4181-0678","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alison Lister","name":"Alison","surname":"Lister","rank":null,"pid":[{"value":"0000-0002-1552-3651","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Malte Backhaus","name":"Malte","surname":"Backhaus","rank":null,"pid":[{"value":"0000-0002-5888-2304","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Xin Wu","name":"Xin","surname":"Wu","rank":null,"pid":[{"value":"0000-0001-7655-389X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Luke Lambourne","name":"Luke","surname":"Lambourne","rank":null,"pid":[{"value":"0000-0002-7001-7575","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kazunori Hanagaki","name":"Kazunori","surname":"Hanagaki","rank":null,"pid":[{"value":"0000-0003-0676-0441","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Krzysztof Sliwa","name":"Krzysztof","surname":"Sliwa","rank":null,"pid":[{"value":"0000-0002-1201-4771","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jiri Chudoba","name":"Jiri","surname":"Chudoba","rank":null,"pid":[{"value":"0000-0002-6425-2579","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jörn Große-Knetter","name":"Jörn","surname":"Große-Knetter","rank":null,"pid":[{"value":"0000-0003-3085-7067","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Katharine Leney","name":"Katharine","surname":"Leney","rank":null,"pid":[{"value":"0000-0002-1525-2695","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michel Lefebvre","name":"Michel","surname":"Lefebvre","rank":null,"pid":[{"value":"0000-0002-5560-0586","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ahmed Bassalat","name":"Ahmed","surname":"Bassalat","rank":null,"pid":[{"value":"0000-0002-0129-1423","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Borut Kersevan","name":"Borut","surname":"Kersevan","rank":null,"pid":[{"value":"0000-0002-4529-452X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Calum Macdonald","name":"Calum","surname":"Macdonald","rank":null,"pid":[{"value":"0000-0001-7857-9188","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Davide Costanzo","name":"Davide","surname":"Costanzo","rank":null,"pid":[{"value":"0000-0003-4920-6264","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Wladyslaw Dabrowski","name":"Wladyslaw","surname":"Dabrowski","rank":null,"pid":[{"value":"0000-0001-9061-9568","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jean-François Grivaz","name":"Jean-François","surname":"Grivaz","rank":null,"pid":[{"value":"0000-0003-4793-7995","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Elisabetta Pianori","name":"Elisabetta","surname":"Pianori","rank":null,"pid":[{"value":"0000-0001-9233-5892","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christoph Falk Anders","name":"Christoph Falk","surname":"Anders","rank":null,"pid":[{"value":"0000-0001-6632-6327","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Vasiliki Kouskoura","name":"Vasiliki","surname":"Kouskoura","rank":null,"pid":[{"value":"0000-0002-8987-3208","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ruth Pöttgen","name":"Ruth","surname":"Pöttgen","rank":null,"pid":[{"value":"0000-0002-3304-0987","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sergey Burdin","name":"Sergey","surname":"Burdin","rank":null,"pid":[{"value":"0000-0003-4831-4132","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Farid Ould-Saada","name":"Farid","surname":"Ould-Saada","rank":null,"pid":[{"value":"0000-0002-9404-835X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Peter Onyisi","name":"Peter","surname":"Onyisi","rank":null,"pid":[{"value":"0000-0003-4201-7997","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Javier Llorente Merino","name":"Javier","surname":"Llorente Merino","rank":null,"pid":[{"value":"0000-0003-0027-7969","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michel Vetterli","name":"Michel","surname":"Vetterli","rank":null,"pid":[{"value":"0000-0002-7223-2965","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nicolas Morange","name":"Nicolas","surname":"Morange","rank":null,"pid":[{"value":"0000-0003-0047-7215","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Gianluca Introzzi","name":"Gianluca","surname":"Introzzi","rank":null,"pid":[{"value":"0000-0002-1314-2580","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Roger Moore","name":"Roger","surname":"Moore","rank":null,"pid":[{"value":"0000-0003-4160-4700","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Pierre-Antoine Delsart","name":"Pierre-Antoine","surname":"Delsart","rank":null,"pid":[{"value":"0000-0002-9556-2924","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Victor Solovyev","name":"Victor","surname":"Solovyev","rank":null,"pid":[{"value":"0000-0002-9402-6329","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jiangyong jia","name":"Jiangyong","surname":"jia","rank":null,"pid":[{"value":"0000-0002-5725-3397","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kyle Cranmer","name":"Kyle","surname":"Cranmer","rank":null,"pid":[{"value":"0000-0002-5769-7094","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Frederik Ruehr","name":"Frederik","surname":"Ruehr","rank":null,"pid":[{"value":"0000-0003-4452-620X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Peter van Gemmeren","name":"Peter","surname":"van Gemmeren","rank":null,"pid":[{"value":"0000-0002-7227-4006","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jacob Kempster","name":"Jacob","surname":"Kempster","rank":null,"pid":[{"value":"0000-0003-4168-3373","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Oleg Brandt","name":"Oleg","surname":"Brandt","rank":null,"pid":[{"value":"0000-0001-5219-1417","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Antonio Policicchio","name":"Antonio","surname":"Policicchio","rank":null,"pid":[{"value":"0000-0002-1290-220X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Fabrice Hubaut","name":"Fabrice","surname":"Hubaut","rank":null,"pid":[{"value":"0000-0002-0113-2465","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christopher Gorham Lester","name":"Christopher Gorham","surname":"Lester","rank":null,"pid":[{"value":"0000-0001-5770-4883","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Royer Edson Ticse Torres","name":"Royer Edson","surname":"Ticse Torres","rank":null,"pid":[{"value":"0000-0001-8178-5257","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Maria Josefina Alconada Verzini","name":"Maria Josefina","surname":"Alconada Verzini","rank":null,"pid":[{"value":"0000-0003-2212-7830","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"PHILLIP URQUIJO","name":"PHILLIP","surname":"URQUIJO","rank":null,"pid":[{"value":"0000-0002-0887-7953","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Annick Lleres","name":"Annick","surname":"Lleres","rank":null,"pid":[{"value":"0000-0003-1769-8524","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrei Snesarev","name":"Andrei","surname":"Snesarev","rank":null,"pid":[{"value":"0000-0002-9067-8362","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marcin Wolter","name":"Marcin","surname":"Wolter","rank":null,"pid":[{"value":"0000-0001-9184-2921","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Garabed Halladjian","name":"Garabed","surname":"Halladjian","rank":null,"pid":[{"value":"0000-0001-7162-0301","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Anthony Morley","name":"Anthony","surname":"Morley","rank":null,"pid":[{"value":"0000-0003-0373-1346","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Francesco Nuti","name":"Francesco","surname":"Nuti","rank":null,"pid":[{"value":"0000-0003-3491-7637","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mar Capeans","name":"Mar","surname":"Capeans","rank":null,"pid":[{"value":"0000-0001-7727-9175","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jose Enrique Garcia Navarro","name":"Jose Enrique","surname":"Garcia Navarro","rank":null,"pid":[{"value":"0000-0002-0279-0523","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Aidan Robson","name":"Aidan","surname":"Robson","rank":null,"pid":[{"value":"0000-0002-1659-8284","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Eduardo Ros","name":"Eduardo","surname":"Ros","rank":null,"pid":[{"value":"0000-0003-2812-9554","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kevin Varvell","name":"Kevin","surname":"Varvell","rank":null,"pid":[{"value":"0000-0003-1017-1295","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ina Carli","name":"Ina","surname":"Carli","rank":null,"pid":[{"value":"0000-0002-0411-1141","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ivo van Vulpen","name":"Ivo","surname":"van Vulpen","rank":null,"pid":[{"value":"0000-0001-7074-5655","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Liaoshan Shi","name":"Liaoshan","surname":"Shi","rank":null,"pid":[{"value":"0000-0001-9532-5075","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Pavel Starovoitov","name":"Pavel","surname":"Starovoitov","rank":null,"pid":[{"value":"0000-0003-1990-0992","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lorenzo Feligioni","name":"Lorenzo","surname":"Feligioni","rank":null,"pid":[{"value":"0000-0002-1403-0951","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christoph Wasicki","name":"Christoph","surname":"Wasicki","rank":null,"pid":[{"value":"0000-0001-8041-741X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nuno Castro","name":"Nuno","surname":"Castro","rank":null,"pid":[{"value":"0000-0001-8491-4376","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mikhail Levchenko","name":"Mikhail","surname":"Levchenko","rank":null,"pid":[{"value":"0000-0002-5495-0656","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marek Palka","name":"Marek","surname":"Palka","rank":null,"pid":[{"value":"0000-0002-7185-3540","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Philip Allport","name":"Philip","surname":"Allport","rank":null,"pid":[{"value":"0000-0001-7303-2570","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tomas Jakoubek","name":"Tomas","surname":"Jakoubek","rank":null,"pid":[{"value":"0000-0001-7038-0369","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Elizabeth Brost","name":"Elizabeth","surname":"Brost","rank":null,"pid":[{"value":"0000-0002-6800-9808","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Maciej Trzebinski","name":"Maciej","surname":"Trzebinski","rank":null,"pid":[{"value":"0000-0002-5151-7101","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nikola Makovec","name":"Nikola","surname":"Makovec","rank":null,"pid":[{"value":"0000-0001-5124-904X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Bernhard Meirose","name":"Bernhard","surname":"Meirose","rank":null,"pid":[{"value":"0000-0003-0032-7022","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrea Dell’Acqua","name":"Andrea","surname":"Dell’Acqua","rank":null,"pid":[{"value":"0000-0003-2453-7745","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Carlos Escobar Ibáñez","name":"Carlos","surname":"Escobar Ibáñez","rank":null,"pid":[{"value":"0000-0003-4442-4537","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hideyuki Oide","name":"Hideyuki","surname":"Oide","rank":null,"pid":[{"value":"0000-0002-2173-3233","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Valentina Maria Martina Cairo","name":"Valentina Maria Martina","surname":"Cairo","rank":null,"pid":[{"value":"0000-0002-0758-7575","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Johannes Erdmann","name":"Johannes","surname":"Erdmann","rank":null,"pid":[{"value":"0000-0002-8073-2740","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Frederic Deliot","name":"Frederic","surname":"Deliot","rank":null,"pid":[{"value":"0000-0003-0777-6031","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Fabian Hügging","name":"Fabian","surname":"Hügging","rank":null,"pid":[{"value":"0000-0002-7472-3151","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Antoine Marzin","name":"Antoine","surname":"Marzin","rank":null,"pid":[{"value":"0000-0003-4364-4351","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sebastien Prince","name":"Sebastien","surname":"Prince","rank":null,"pid":[{"value":"0000-0001-9947-3892","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Melissa Ridel","name":"Melissa","surname":"Ridel","rank":null,"pid":[{"value":"0000-0002-2601-7420","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christian Weiser","name":"Christian","surname":"Weiser","rank":null,"pid":[{"value":"0000-0002-6456-6834","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Farida Fassi","name":"Farida","surname":"Fassi","rank":null,"pid":[{"value":"0000-0002-6423-7213","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Manuella Vincter","name":"Manuella","surname":"Vincter","rank":null,"pid":[{"value":"0000-0002-5338-8972","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Julien Caudron","name":"Julien","surname":"Caudron","rank":null,"pid":[{"value":"0000-0002-3530-6531","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Giada Mancini","name":"Giada","surname":"Mancini","rank":null,"pid":[{"value":"0000-0001-6158-2751","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Vasiliki A. Mitsou","name":"Vasiliki A.","surname":"Mitsou","rank":null,"pid":[{"value":"0000-0002-1533-8886","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"David Wardrope","name":"David","surname":"Wardrope","rank":null,"pid":[{"value":"0000-0002-8208-2964","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Loïc Valéry","name":"Loïc","surname":"Valéry","rank":null,"pid":[{"value":"0000-0002-5510-1111","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Krisztian Peters","name":"Krisztian","surname":"Peters","rank":null,"pid":[{"value":"0000-0002-7654-1677","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrea Knue","name":"Andrea","surname":"Knue","rank":null,"pid":[{"value":"0000-0002-1559-9285","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stathes Paganis","name":"Stathes","surname":"Paganis","rank":null,"pid":[{"value":"0000-0002-1950-8993","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Steffen Stärz","name":"Steffen","surname":"Stärz","rank":null,"pid":[{"value":"0000-0002-2908-3909","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Emma Torro Pastor","name":"Emma","surname":"Torro Pastor","rank":null,"pid":[{"value":"0000-0002-5507-7924","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrey Kiryunin","name":"Andrey","surname":"Kiryunin","rank":null,"pid":[{"value":"0000-0001-7490-6890","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Daniela Marcella Rebuzzi","name":"Daniela Marcella","surname":"Rebuzzi","rank":null,"pid":[{"value":"0000-0003-4461-3880","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Frederick Luehring","name":"Frederick","surname":"Luehring","rank":null,"pid":[{"value":"0000-0001-8721-6901","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"alessandro cerri","name":"alessandro","surname":"cerri","rank":null,"pid":[{"value":"0000-0002-1904-6661","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Remi Lafaye","name":"Remi","surname":"Lafaye","rank":null,"pid":[{"value":"0000-0001-7848-6088","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Torsten Akesson","name":"Torsten","surname":"Akesson","rank":null,"pid":[{"value":"0000-0003-4141-5408","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hongbo Zhu","name":"Hongbo","surname":"Zhu","rank":null,"pid":[{"value":"0000-0001-8066-7048","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paul Laycock","name":"Paul","surname":"Laycock","rank":null,"pid":[{"value":"0000-0002-8572-5339","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paolo Giromini","name":"Paolo","surname":"Giromini","rank":null,"pid":[{"value":"0000-0003-0276-287X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Leszek Adamczyk","name":"Leszek","surname":"Adamczyk","rank":null,"pid":[{"value":"0000-0002-5859-2075","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stephen Lloyd","name":"Stephen","surname":"Lloyd","rank":null,"pid":[{"value":"0000-0002-5073-2264","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Aaron Angerami","name":"Aaron","surname":"Angerami","rank":null,"pid":[{"value":"0000-0001-7834-8750","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Susumu Oda","name":"Susumu","surname":"Oda","rank":null,"pid":[{"value":"0000-0001-5836-768X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nathalie Besson","name":"Nathalie","surname":"Besson","rank":null,"pid":[{"value":"0000-0001-9248-6252","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"MARCELLO FANTI","name":"MARCELLO","surname":"FANTI","rank":null,"pid":[{"value":"0000-0002-8773-145X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Peter Kodyš","name":"Peter","surname":"Kodyš","rank":null,"pid":[{"value":"0000-0002-8644-2349","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Teng Jian Khoo","name":"Teng Jian","surname":"Khoo","rank":null,"pid":[{"value":"0000-0002-5954-3101","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Javier Montejo Berlingen","name":"Javier","surname":"Montejo Berlingen","rank":null,"pid":[{"value":"0000-0001-9213-904X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sandro Palestini","name":"Sandro","surname":"Palestini","rank":null,"pid":[{"value":"0000-0002-4110-096X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mark Oreglia","name":"Mark","surname":"Oreglia","rank":null,"pid":[{"value":"0000-0001-6203-2209","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Krzysztof Korcyl","name":"Krzysztof","surname":"Korcyl","rank":null,"pid":[{"value":"0000-0001-8085-4505","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sebastian Schmitt","name":"Sebastian","surname":"Schmitt","rank":null,"pid":[{"value":"0000-0002-7935-0470","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Irinel Caprini","name":"Irinel","surname":"Caprini","rank":null,"pid":[{"value":"0000-0003-3343-3200","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Anthony Doyle","name":"Anthony","surname":"Doyle","rank":null,"pid":[{"value":"0000-0001-6322-6195","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Markus Elsing","name":"Markus","surname":"Elsing","rank":null,"pid":[{"value":"0000-0002-1213-0545","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christian Schmitt","name":"Christian","surname":"Schmitt","rank":null,"pid":[{"value":"0000-0003-1471-690X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Konstantinos Nikolopoulos","name":"Konstantinos","surname":"Nikolopoulos","rank":null,"pid":[{"value":"0000-0002-3048-489X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Janet Dietrich","name":"Janet","surname":"Dietrich","rank":null,"pid":[{"value":"0000-0001-7061-1585","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yuri Kulchitsky","name":"Yuri","surname":"Kulchitsky","rank":null,"pid":[{"value":"0000-0002-3036-5575","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sinead Farrington","name":"Sinead","surname":"Farrington","rank":null,"pid":[{"value":"0000-0001-5350-9271","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stefano Terzo","name":"Stefano","surname":"Terzo","rank":null,"pid":[{"value":"0000-0003-3388-3906","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"stefania xella","name":"stefania","surname":"xella","rank":null,"pid":[{"value":"0000-0002-0988-1655","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jonathan Butterworth","name":"Jonathan","surname":"Butterworth","rank":null,"pid":[{"value":"0000-0002-5905-5394","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Gideon Bella","name":"Gideon","surname":"Bella","rank":null,"pid":[{"value":"0000-0002-4009-0990","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marcello Bindi","name":"Marcello","surname":"Bindi","rank":null,"pid":[{"value":"0000-0001-6172-545X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Heather Gray","name":"Heather","surname":"Gray","rank":null,"pid":[{"value":"0000-0002-5293-4716","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marcel Vos","name":"Marcel","surname":"Vos","rank":null,"pid":[{"value":"0000-0001-8474-5357","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ljiljana Morvaj","name":"Ljiljana","surname":"Morvaj","rank":null,"pid":[{"value":"0000-0003-2061-2904","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Didier Ferrere","name":"Didier","surname":"Ferrere","rank":null,"pid":[{"value":"0000-0002-5687-9240","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mauro Villa","name":"Mauro","surname":"Villa","rank":null,"pid":[{"value":"0000-0002-9181-8048","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Deepak Kar","name":"Deepak","surname":"Kar","rank":null,"pid":[{"value":"0000-0002-4238-9822","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Frank Berghaus","name":"Frank","surname":"Berghaus","rank":null,"pid":[{"value":"0000-0003-1887-3910","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Bruce Yabsley","name":"Bruce","surname":"Yabsley","rank":null,"pid":[{"value":"0000-0002-2680-0474","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Frank Winklmeier","name":"Frank","surname":"Winklmeier","rank":null,"pid":[{"value":"0000-0001-8290-3200","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Zoya Karpova","name":"Zoya","surname":"Karpova","rank":null,"pid":[{"value":"0000-0003-0254-4629","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jason Nielsen","name":"Jason","surname":"Nielsen","rank":null,"pid":[{"value":"0000-0002-9175-4419","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Claire Gwenlan","name":"Claire","surname":"Gwenlan","rank":null,"pid":[{"value":"0000-0002-3518-0617","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stephanie Majewski","name":"Stephanie","surname":"Majewski","rank":null,"pid":[{"value":"0000-0002-6871-3395","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"James Mueller","name":"James","surname":"Mueller","rank":null,"pid":[{"value":"0000-0001-5099-4718","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mario Giordani","name":"Mario","surname":"Giordani","rank":null,"pid":[{"value":"0000-0002-0792-6039","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Laura Fabbri","name":"Laura","surname":"Fabbri","rank":null,"pid":[{"value":"0000-0002-4002-8353","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michael Begel","name":"Michael","surname":"Begel","rank":null,"pid":[{"value":"0000-0002-1634-4399","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Laurent Schoeffel","name":"Laurent","surname":"Schoeffel","rank":null,"pid":[{"value":"0000-0002-8081-2353","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Bobby Acharya","name":"Bobby","surname":"Acharya","rank":null,"pid":[{"value":"0000-0002-8588-9157","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jan Kretzschmar","name":"Jan","surname":"Kretzschmar","rank":null,"pid":[{"value":"0000-0002-8515-1355","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Thi Ngoc Loan Truong","name":"Thi Ngoc Loan","surname":"Truong","rank":null,"pid":[{"value":"0000-0001-8249-7150","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yury Smirnov","name":"Yury","surname":"Smirnov","rank":null,"pid":[{"value":"0000-0002-2891-0781","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andreas Warburton","name":"Andreas","surname":"Warburton","rank":null,"pid":[{"value":"0000-0002-2298-7315","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Osamu Jinnouchi","name":"Osamu","surname":"Jinnouchi","rank":null,"pid":[{"value":"0000-0001-5073-0974","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nicola Orlando","name":"Nicola","surname":"Orlando","rank":null,"pid":[{"value":"0000-0003-0616-245X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Magda Anna Chelstowska","name":"Magda Anna","surname":"Chelstowska","rank":null,"pid":[{"value":"0000-0003-1030-2099","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Armin Nairz","name":"Armin","surname":"Nairz","rank":null,"pid":[{"value":"0000-0003-3561-0880","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Chris Malena Delitzsch","name":"Chris Malena","surname":"Delitzsch","rank":null,"pid":[{"value":"0000-0001-7021-3333","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kendall Reeves","name":"Kendall","surname":"Reeves","rank":null,"pid":[{"value":"0000-0003-3504-4882","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Minghui Liu","name":"Minghui","surname":"Liu","rank":null,"pid":[{"value":"0000-0003-0056-7296","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Clara Troncon","name":"Clara","surname":"Troncon","rank":null,"pid":[{"value":"0000-0002-7997-8524","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Chris Hays","name":"Chris","surname":"Hays","rank":null,"pid":[{"value":"0000-0003-2371-9723","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Driss Benchekroun","name":"Driss","surname":"Benchekroun","rank":null,"pid":[{"value":"0000-0001-5196-8327","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tamar Djobava","name":"Tamar","surname":"Djobava","rank":null,"pid":[{"value":"0000-0002-9414-8350","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Joany Manjarrés Ramos","name":"Joany","surname":"Manjarrés Ramos","rank":null,"pid":[{"value":"0000-0003-3896-5222","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Craig Wiglesworth","name":"Craig","surname":"Wiglesworth","rank":null,"pid":[{"value":"0000-0001-6219-8946","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lawrence Lee","name":"Lawrence","surname":"Lee","rank":null,"pid":[{"value":"0000-0002-5590-335X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Elisabetta Barberio","name":"Elisabetta","surname":"Barberio","rank":null,"pid":[{"value":"0000-0002-3111-0910","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Robert McPherson","name":"Robert","surname":"McPherson","rank":null,"pid":[{"value":"0000-0001-9211-7019","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paul Jackson","name":"Paul","surname":"Jackson","rank":null,"pid":[{"value":"0000-0002-0847-402X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Peter Henrik Hansen","name":"Peter Henrik","surname":"Hansen","rank":null,"pid":[{"value":"0000-0002-6764-4789","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Cristobal Padilla","name":"Cristobal","surname":"Padilla","rank":null,"pid":[{"value":"0000-0001-7951-0166","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nora Pettersson","name":"Nora","surname":"Pettersson","rank":null,"pid":[{"value":"0000-0001-7451-3544","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paul Glaysher","name":"Paul","surname":"Glaysher","rank":null,"pid":[{"value":"0000-0002-5437-971X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Patrick Czodrowski","name":"Patrick","surname":"Czodrowski","rank":null,"pid":[{"value":"0000-0003-0723-1437","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"María Moreno Llácer","name":"María","surname":"Moreno Llácer","rank":null,"pid":[{"value":"0000-0003-1113-3645","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Soumya Mohapatra","name":"Soumya","surname":"Mohapatra","rank":null,"pid":[{"value":"0000-0003-3006-6337","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrea Gaudiello","name":"Andrea","surname":"Gaudiello","rank":null,"pid":[{"value":"0000-0001-7721-8217","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Maximiliano Sioli","name":"Maximiliano","surname":"Sioli","rank":null,"pid":[{"value":"0000-0002-0912-9121","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christian Schroeder","name":"Christian","surname":"Schroeder","rank":null,"pid":[{"value":"0000-0001-6449-0668","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Guillaume Unal","name":"Guillaume","surname":"Unal","rank":null,"pid":[{"value":"0000-0001-8130-7423","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christian Gutschow","name":"Christian","surname":"Gutschow","rank":null,"pid":[{"value":"0000-0003-0857-794X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Spyridon Argyropoulos","name":"Spyridon","surname":"Argyropoulos","rank":null,"pid":[{"value":"0000-0001-7748-1429","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Vadim Bednyakov","name":"Vadim","surname":"Bednyakov","rank":null,"pid":[{"value":"0000-0003-4864-8909","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Caterina Doglioni","name":"Caterina","surname":"Doglioni","rank":null,"pid":[{"value":"0000-0002-1509-0390","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrew Haas","name":"Andrew","surname":"Haas","rank":null,"pid":[{"value":"0000-0002-4832-0455","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Masahiro Morii","name":"Masahiro","surname":"Morii","rank":null,"pid":[{"value":"0000-0001-9324-057X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrea Coccaro","name":"Andrea","surname":"Coccaro","rank":null,"pid":[{"value":"0000-0003-2368-4559","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Edward Moyse","name":"Edward","surname":"Moyse","rank":null,"pid":[{"value":"0000-0003-4449-6178","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Takuya Nobe","name":"Takuya","surname":"Nobe","rank":null,"pid":[{"value":"0000-0002-5809-325X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jahred Adelman","name":"Jahred","surname":"Adelman","rank":null,"pid":[{"value":"0000-0002-1041-3496","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Antonio Ereditato","name":"Antonio","surname":"Ereditato","rank":null,"pid":[{"value":"0000-0002-5423-8079","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stefan Schmitt","name":"Stefan","surname":"Schmitt","rank":null,"pid":[{"value":"0000-0001-8387-1853","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"David DeMarco","name":"David","surname":"DeMarco","rank":null,"pid":[{"value":"0000-0002-8921-8828","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alessandro La Rosa","name":"Alessandro","surname":"La Rosa","rank":null,"pid":[{"value":"0000-0001-6291-2142","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Liza Mijovic","name":"Liza","surname":"Mijovic","rank":null,"pid":[{"value":"0000-0003-0162-2891","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stephane Willocq","name":"Stephane","surname":"Willocq","rank":null,"pid":[{"value":"0000-0002-4120-1453","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"George Iakovidis","name":"George","surname":"Iakovidis","rank":null,"pid":[{"value":"0000-0002-0330-5921","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Madar Romain","name":"Madar","surname":"Romain","rank":null,"pid":[{"value":"0000-0002-6875-6408","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"giuseppe iacobucci","name":"giuseppe","surname":"iacobucci","rank":null,"pid":[{"value":"0000-0001-9965-5442","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Allen Mincer","name":"Allen","surname":"Mincer","rank":null,"pid":[{"value":"0000-0002-6307-1418","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"TROCME Benjamin","name":"TROCME","surname":"Benjamin","rank":null,"pid":[{"value":"0000-0001-9500-2487","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Liang Li","name":"Liang","surname":"Li","rank":null,"pid":[{"value":"0000-0001-6411-6107","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"William Murray","name":"William","surname":"Murray","rank":null,"pid":[{"value":"0000-0003-1710-6306","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Chaowaroj Wanotayaroj","name":"Chaowaroj","surname":"Wanotayaroj","rank":null,"pid":[{"value":"0000-0002-8178-5705","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Luc Goossens","name":"Luc","surname":"Goossens","rank":null,"pid":[{"value":"0000-0002-2536-4498","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kerstin Jon-And","name":"Kerstin","surname":"Jon-And","rank":null,"pid":[{"value":"0000-0001-8201-7700","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Arwa Bannoura","name":"Arwa","surname":"Bannoura","rank":null,"pid":[{"value":"0000-0002-7166-8118","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Filipe Veloso","name":"Filipe","surname":"Veloso","rank":null,"pid":[{"value":"0000-0002-5956-4244","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Maximilian Swiatlowski","name":"Maximilian","surname":"Swiatlowski","rank":null,"pid":[{"value":"0000-0001-7287-0468","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marija Vranjes Milosavljevic","name":"Marija","surname":"Vranjes Milosavljevic","rank":null,"pid":[{"value":"0000-0003-4477-9733","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Riccardo Maria Bianchi","name":"Riccardo Maria","surname":"Bianchi","rank":null,"pid":[{"value":"0000-0001-7345-7798","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Elias Coniavitis","name":"Elias","surname":"Coniavitis","rank":null,"pid":[{"value":"0000-0002-2148-8012","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ondrej Penc","name":"Ondrej","surname":"Penc","rank":null,"pid":[{"value":"0000-0002-5433-3981","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Julie Kirk","name":"Julie","surname":"Kirk","rank":null,"pid":[{"value":"0000-0001-8096-7577","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hongtao Yang","name":"Hongtao","surname":"Yang","rank":null,"pid":[{"value":"0000-0003-3554-7113","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ewelina Maria Lobodzinska","name":"Ewelina Maria","surname":"Lobodzinska","rank":null,"pid":[{"value":"0000-0001-9012-3431","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christos Leonidopoulos","name":"Christos","surname":"Leonidopoulos","rank":null,"pid":[{"value":"0000-0002-7241-2114","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Silvia Biondi","name":"Silvia","surname":"Biondi","rank":null,"pid":[{"value":"0000-0002-1492-6715","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stephane JEZEQUEL","name":"Stephane","surname":"JEZEQUEL","rank":null,"pid":[{"value":"0000-0001-7369-6975","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Zhiqing Zhang","name":"Zhiqing","surname":"Zhang","rank":null,"pid":[{"value":"0000-0002-7853-9079","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kathleen Whalen","name":"Kathleen","surname":"Whalen","rank":null,"pid":[{"value":"0000-0002-9383-8763","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sophie HENROT","name":"Sophie","surname":"HENROT","rank":null,"pid":[{"value":"0000-0003-1218-2991","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Louise Heelan","name":"Louise","surname":"Heelan","rank":null,"pid":[{"value":"0000-0002-4879-0131","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Dominik Derendarz","name":"Dominik","surname":"Derendarz","rank":null,"pid":[{"value":"0000-0001-5660-3095","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ning Zhou","name":"Ning","surname":"Zhou","rank":null,"pid":[{"value":"0000-0002-1775-2511","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mihai Caprini","name":"Mihai","surname":"Caprini","rank":null,"pid":[{"value":"0000-0002-6806-6730","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lee Sawyer","name":"Lee","surname":"Sawyer","rank":null,"pid":[{"value":"0000-0001-8295-0605","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lydia Roos","name":"Lydia","surname":"Roos","rank":null,"pid":[{"value":"0000-0001-7151-9983","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stanislav Nemecek","name":"Stanislav","surname":"Nemecek","rank":null,"pid":[{"value":"0000-0001-8978-7150","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Matthias Danninger","name":"Matthias","surname":"Danninger","rank":null,"pid":[{"value":"0000-0002-7807-7484","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lailin Xu","name":"Lailin","surname":"Xu","rank":null,"pid":[{"value":"0000-0001-8997-3199","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Angel Campoverde","name":"Angel","surname":"Campoverde","rank":null,"pid":[{"value":"0000-0003-1968-1216","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Carl Gwilliam","name":"Carl","surname":"Gwilliam","rank":null,"pid":[{"value":"0000-0002-9401-5304","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Fabrizio Salvatore","name":"Fabrizio","surname":"Salvatore","rank":null,"pid":[{"value":"0000-0002-3709-1554","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Werner Wiedenmann","name":"Werner","surname":"Wiedenmann","rank":null,"pid":[{"value":"0000-0003-3605-3633","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Adriaan König","name":"Adriaan","surname":"König","rank":null,"pid":[{"value":"0000-0001-6702-6473","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Romain Kukla","name":"Romain","surname":"Kukla","rank":null,"pid":[{"value":"0000-0002-1140-2465","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sarah Heim","name":"Sarah","surname":"Heim","rank":null,"pid":[{"value":"0000-0002-2639-6571","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stamatios Gkaitatzis","name":"Stamatios","surname":"Gkaitatzis","rank":null,"pid":[{"value":"0000-0001-9420-7499","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alison Elliot","name":"Alison","surname":"Elliot","rank":null,"pid":[{"value":"0000-0003-0921-0314","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marc Escalier","name":"Marc","surname":"Escalier","rank":null,"pid":[{"value":"0000-0003-4270-2775","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Philipp Stolte","name":"Philipp","surname":"Stolte","rank":null,"pid":[{"value":"0000-0002-8828-3564","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Peter Loch","name":"Peter","surname":"Loch","rank":null,"pid":[{"value":"0000-0002-2005-671X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tamara Vazquez Schroeder","name":"Tamara","surname":"Vazquez Schroeder","rank":null,"pid":[{"value":"0000-0002-9780-099X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"lily asquith","name":"lily","surname":"asquith","rank":null,"pid":[{"value":"0000-0001-8035-7162","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Javier Sanchez","name":"Javier","surname":"Sanchez","rank":null,"pid":[{"value":"0000-0001-9913-310X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tomas Davidek","name":"Tomas","surname":"Davidek","rank":null,"pid":[{"value":"0000-0002-3770-8307","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lidia Dell'Asta","name":"Lidia","surname":"Dell'Asta","rank":null,"pid":[{"value":"0000-0002-9601-4225","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Francisco Alonso","name":"Francisco","surname":"Alonso","rank":null,"pid":[{"value":"0000-0001-9431-8156","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"J. Katharina Behr","name":"J. Katharina","surname":"Behr","rank":null,"pid":[{"value":"0000-0002-5501-4640","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"MARIO MARTINEZ","name":"MARIO","surname":"MARTINEZ","rank":null,"pid":[{"value":"0000-0002-3135-945X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Konstantinos Bachas","name":"Konstantinos","surname":"Bachas","rank":null,"pid":[{"value":"0000-0002-9047-6517","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Arnaud Lucotte","name":"Arnaud","surname":"Lucotte","rank":null,"pid":[{"value":"0000-0002-5992-0640","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"scott snyder","name":"scott","surname":"snyder","rank":null,"pid":[{"value":"0000-0001-8610-8423","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Edson Carquin","name":"Edson","surname":"Carquin","rank":null,"pid":[{"value":"0000-0002-7863-1166","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kristin Lohwasser","name":"Kristin","surname":"Lohwasser","rank":null,"pid":[{"value":"0000-0003-1833-9160","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jens Weingarten","name":"Jens","surname":"Weingarten","rank":null,"pid":[{"value":"0000-0003-2165-871X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Victor Maleev","name":"Victor","surname":"Maleev","rank":null,"pid":[{"value":"0000-0003-1028-8602","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Francesca Ungaro","name":"Francesca","surname":"Ungaro","rank":null,"pid":[{"value":"0000-0003-2005-595X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Koji Terashi","name":"Koji","surname":"Terashi","rank":null,"pid":[{"value":"0000-0001-6520-8070","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kohei Yorita","name":"Kohei","surname":"Yorita","rank":null,"pid":[{"value":"0000-0003-1988-8401","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Roland Jansky","name":"Roland","surname":"Jansky","rank":null,"pid":[{"value":"0000-0003-0456-4658","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sandro De Cecco","name":"Sandro","surname":"De Cecco","rank":null,"pid":[{"value":"0000-0003-4907-8610","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Wade Fisher","name":"Wade","surname":"Fisher","rank":null,"pid":[{"value":"0000-0003-3043-3045","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jaroslav Guenther","name":"Jaroslav","surname":"Guenther","rank":null,"pid":[{"value":"0000-0003-3189-3959","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tatsuya Masubuchi","name":"Tatsuya","surname":"Masubuchi","rank":null,"pid":[{"value":"0000-0001-9984-8009","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Janusz Chwastowski","name":"Janusz","surname":"Chwastowski","rank":null,"pid":[{"value":"0000-0002-6190-8376","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ian Watson","name":"Ian","surname":"Watson","rank":null,"pid":[{"value":"0000-0003-2141-3413","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Patrick Rieck","name":"Patrick","surname":"Rieck","rank":null,"pid":[{"value":"0000-0003-0290-0566","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mario Lassnig","name":"Mario","surname":"Lassnig","rank":null,"pid":[{"value":"0000-0002-9541-0592","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jana Schaarschmidt","name":"Jana","surname":"Schaarschmidt","rank":null,"pid":[{"value":"0000-0002-0433-6439","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Joseph Reichert","name":"Joseph","surname":"Reichert","rank":null,"pid":[{"value":"0000-0003-2110-8021","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Bjarne Stugu","name":"Bjarne","surname":"Stugu","rank":null,"pid":[{"value":"0000-0002-1728-9272","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paolo Gauzzi","name":"Paolo","surname":"Gauzzi","rank":null,"pid":[{"value":"0000-0003-4841-5822","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sergei Smirnov","name":"Sergei","surname":"Smirnov","rank":null,"pid":[{"value":"0000-0002-6778-073X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Xiangyang Ju","name":"Xiangyang","surname":"Ju","rank":null,"pid":[{"value":"0000-0002-9745-1638","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Fernando Barreiro","name":"Fernando","surname":"Barreiro","rank":null,"pid":[{"value":"0000-0002-3021-0258","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Markus Cristinziani","name":"Markus","surname":"Cristinziani","rank":null,"pid":[{"value":"0000-0003-3893-9171","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Antonio Amorim","name":"Antonio","surname":"Amorim","rank":null,"pid":[{"value":"0000-0003-0638-2321","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sune Jakobsen","name":"Sune","surname":"Jakobsen","rank":null,"pid":[{"value":"0000-0002-6564-040X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alejandro Alonso","name":"Alejandro","surname":"Alonso","rank":null,"pid":[{"value":"0000-0003-1259-0573","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Makoto Shimojima","name":"Makoto","surname":"Shimojima","rank":null,"pid":[{"value":"0000-0002-8738-1664","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Francesco Guescini","name":"Francesco","surname":"Guescini","rank":null,"pid":[{"value":"0000-0001-5351-2673","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christina Potter","name":"Christina","surname":"Potter","rank":null,"pid":[{"value":"0000-0002-9815-5208","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Evelina Bouhova-Thacker","name":"Evelina","surname":"Bouhova-Thacker","rank":null,"pid":[{"value":"0000-0002-5103-1558","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Eduard Simioni","name":"Eduard","surname":"Simioni","rank":null,"pid":[{"value":"0000-0002-8929-6236","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paolo Camarri","name":"Paolo","surname":"Camarri","rank":null,"pid":[{"value":"0000-0002-5732-5645","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"William Leight","name":"William","surname":"Leight","rank":null,"pid":[{"value":"0000-0002-2968-7841","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Luis Roberto Flores Castillo","name":"Luis Roberto","surname":"Flores Castillo","rank":null,"pid":[{"value":"0000-0003-1551-5974","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alessandro null","name":"Alessandro","surname":null,"rank":null,"pid":[{"value":"0000-0002-8224-6105","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Roman Lysak","name":"Roman","surname":"Lysak","rank":null,"pid":[{"value":"0000-0003-2990-1673","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Matteo Franchini","name":"Matteo","surname":"Franchini","rank":null,"pid":[{"value":"0000-0002-4554-252X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Martin Nagel","name":"Martin","surname":"Nagel","rank":null,"pid":[{"value":"0000-0002-2588-6691","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Harald Fox","name":"Harald","surname":"Fox","rank":null,"pid":[{"value":"0000-0003-3089-6090","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Klaus Moenig","name":"Klaus","surname":"Moenig","rank":null,"pid":[{"value":"0000-0002-3169-7117","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sergey Karpov","name":"Sergey","surname":"Karpov","rank":null,"pid":[{"value":"0000-0002-2230-5353","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Remi Zaidan","name":"Remi","surname":"Zaidan","rank":null,"pid":[{"value":"0000-0002-3710-4554","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kilian Rosbach","name":"Kilian","surname":"Rosbach","rank":null,"pid":[{"value":"0000-0002-4241-2949","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Pawel Klimek","name":"Pawel","surname":"Klimek","rank":null,"pid":[{"value":"0000-0003-1661-6873","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nikolaos Konstantinidis","name":"Nikolaos","surname":"Konstantinidis","rank":null,"pid":[{"value":"0000-0002-4140-6360","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alberto Mengarelli","name":"Alberto","surname":"Mengarelli","rank":null,"pid":[{"value":"0000-0002-1884-854X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Robert Kowalewski","name":"Robert","surname":"Kowalewski","rank":null,"pid":[{"value":"0000-0002-7314-0990","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Miguel Arratia","name":"Miguel","surname":"Arratia","rank":null,"pid":[{"value":"0000-0001-6877-3315","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paola Giannetti","name":"Paola","surname":"Giannetti","rank":null,"pid":[{"value":"0000-0002-3721-9490","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Joern Lange","name":"Joern","surname":"Lange","rank":null,"pid":[{"value":"0000-0003-1307-1441","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Pavel Řezníček","name":"Pavel","surname":"Řezníček","rank":null,"pid":[{"value":"0000-0003-4017-9829","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Vojtech Pleskot","name":"Vojtech","surname":"Pleskot","rank":null,"pid":[{"value":"0000-0001-5435-497X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Federico Sforza","name":"Federico","surname":"Sforza","rank":null,"pid":[{"value":"0000-0002-4065-7352","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Danijela Bogavac","name":"Danijela","surname":"Bogavac","rank":null,"pid":[{"value":"0000-0003-2138-9062","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nenad Vranjes","name":"Nenad","surname":"Vranjes","rank":null,"pid":[{"value":"0000-0001-5415-5225","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Peter Watkins","name":"Peter","surname":"Watkins","rank":null,"pid":[{"value":"0000-0002-1290-6833","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yohei Yamaguchi","name":"Yohei","surname":"Yamaguchi","rank":null,"pid":[{"value":"0000-0002-3725-4800","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Matthias Saimpert","name":"Matthias","surname":"Saimpert","rank":null,"pid":[{"value":"0000-0002-3765-1320","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alexander Grohsjean","name":"Alexander","surname":"Grohsjean","rank":null,"pid":[{"value":"0000-0003-0748-8494","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tibor Zenis","name":"Tibor","surname":"Zenis","rank":null,"pid":[{"value":"0000-0001-8265-6916","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stefano Camarda","name":"Stefano","surname":"Camarda","rank":null,"pid":[{"value":"0000-0003-0479-7689","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Carlos Lacasta","name":"Carlos","surname":"Lacasta","rank":null,"pid":[{"value":"0000-0002-2623-6252","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Oliver Ricken","name":"Oliver","surname":"Ricken","rank":null,"pid":[{"value":"0000-0001-5107-7276","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Claude Leroy","name":"Claude","surname":"Leroy","rank":null,"pid":[{"value":"0000-0003-3105-7045","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Murrough Landon","name":"Murrough","surname":"Landon","rank":null,"pid":[{"value":"0000-0001-6828-9769","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Martin White","name":"Martin","surname":"White","rank":null,"pid":[{"value":"0000-0001-5474-4580","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Dimitrii Krasnopevtsev","name":"Dimitrii","surname":"Krasnopevtsev","rank":null,"pid":[{"value":"0000-0002-6356-372X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Susana Cabrera Urbán","name":"Susana","surname":"Cabrera Urbán","rank":null,"pid":[{"value":"0000-0001-7640-7913","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hans-Christian Schultz-Coulon","name":"Hans-Christian","surname":"Schultz-Coulon","rank":null,"pid":[{"value":"0000-0002-0860-7240","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stephen Jiggins","name":"Stephen","surname":"Jiggins","rank":null,"pid":[{"value":"0000-0003-2906-1977","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andre Sopczak","name":"Andre","surname":"Sopczak","rank":null,"pid":[{"value":"0000-0001-6981-0544","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Cristinel Diaconu","name":"Cristinel","surname":"Diaconu","rank":null,"pid":[{"value":"0000-0002-6193-5091","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hector de la Torre Perez","name":"Hector","surname":"de la Torre Perez","rank":null,"pid":[{"value":"0000-0002-4516-5269","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Costa Mezquita","name":"Costa","surname":"Mezquita","rank":null,"pid":[{"value":"0000-0002-2064-2954","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sahal Yacoob","name":"Sahal","surname":"Yacoob","rank":null,"pid":[{"value":"0000-0001-6977-3456","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jian Liu","name":"Jian","surname":"Liu","rank":null,"pid":[{"value":"0000-0002-8397-7620","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Oxana Smirnova","name":"Oxana","surname":"Smirnova","rank":null,"pid":[{"value":"0000-0003-2517-531X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Thomas Koffas","name":"Thomas","surname":"Koffas","rank":null,"pid":[{"value":"0000-0001-9612-4988","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stephen Burke","name":"Stephen","surname":"Burke","rank":null,"pid":[{"value":"0000-0002-1962-8493","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Elizaveta Shabalina","name":"Elizaveta","surname":"Shabalina","rank":null,"pid":[{"value":"0000-0003-4849-556X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nicolas Ellis","name":"Nicolas","surname":"Ellis","rank":null,"pid":[{"value":"0000-0002-1920-4930","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Robert Astalos","name":"Robert","surname":"Astalos","rank":null,"pid":[{"value":"0000-0001-5095-605X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Valerio Dao","name":"Valerio","surname":"Dao","rank":null,"pid":[{"value":"0000-0003-1645-8393","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Javier Alberto Murillo Quijada","name":"Javier Alberto","surname":"Murillo Quijada","rank":null,"pid":[{"value":"0000-0003-4933-2092","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alan Litke","name":"Alan","surname":"Litke","rank":null,"pid":[{"value":"0000-0003-3973-3642","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hans Peter Beck","name":"Hans Peter","surname":"Beck","rank":null,"pid":[{"value":"0000-0001-7212-1096","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Flavia De Almeida Dias","name":"Flavia","surname":"De Almeida Dias","rank":null,"pid":[{"value":"0000-0001-6882-5402","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mykhailo Lisovyi","name":"Mykhailo","surname":"Lisovyi","rank":null,"pid":[{"value":"0000-0002-3014-5855","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Frank Ellinghaus","name":"Frank","surname":"Ellinghaus","rank":null,"pid":[{"value":"0000-0003-3596-5331","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alexey Ezhilov","name":"Alexey","surname":"Ezhilov","rank":null,"pid":[{"value":"0000-0002-7520-293X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Anna Sfyrla","name":"Anna","surname":"Sfyrla","rank":null,"pid":[{"value":"0000-0002-3003-9905","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marc-André Pleier","name":"Marc-André","surname":"Pleier","rank":null,"pid":[{"value":"0000-0002-9461-3494","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Aliaksei Hrynevich","name":"Aliaksei","surname":"Hrynevich","rank":null,"pid":[{"value":"0000-0002-5411-114X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lucia Masetti","name":"Lucia","surname":"Masetti","rank":null,"pid":[{"value":"0000-0002-0038-5372","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Petr Balek","name":"Petr","surname":"Balek","rank":null,"pid":[{"value":"0000-0002-0942-1966","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marco Sessa","name":"Marco","surname":"Sessa","rank":null,"pid":[{"value":"0000-0002-1402-7525","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ilija Vukotic","name":"Ilija","surname":"Vukotic","rank":null,"pid":[{"value":"0000-0003-0472-3516","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tova Holmes","name":"Tova","surname":"Holmes","rank":null,"pid":[{"value":"0000-0002-3959-5174","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hermann Kolanoski","name":"Hermann","surname":"Kolanoski","rank":null,"pid":[{"value":"0000-0003-0435-2524","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sigve Haug","name":"Sigve","surname":"Haug","rank":null,"pid":[{"value":"0000-0003-0442-3361","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Bruno Galhardo","name":"Bruno","surname":"Galhardo","rank":null,"pid":[{"value":"0000-0003-0641-301X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nello Bruscino","name":"Nello","surname":"Bruscino","rank":null,"pid":[{"value":"0000-0002-6168-689X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jonas Strandberg","name":"Jonas","surname":"Strandberg","rank":null,"pid":[{"value":"0000-0002-8913-0981","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Riccardo Vari","name":"Riccardo","surname":"Vari","rank":null,"pid":[{"value":"0000-0002-2814-1337","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Emmanuel Sauvan","name":"Emmanuel","surname":"Sauvan","rank":null,"pid":[{"value":"0000-0003-1921-2647","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hartmut Sadrozinski","name":"Hartmut","surname":"Sadrozinski","rank":null,"pid":[{"value":"0000-0003-0019-5410","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Juerg Beringer","name":"Juerg","surname":"Beringer","rank":null,"pid":[{"value":"0000-0002-9975-1781","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"laurent chevalier","name":"laurent","surname":"chevalier","rank":null,"pid":[{"value":"0000-0003-3762-7264","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Dominik Duda","name":"Dominik","surname":"Duda","rank":null,"pid":[{"value":"0000-0002-5916-3467","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Takanori Kono","name":"Takanori","surname":"Kono","rank":null,"pid":[{"value":"0000-0003-1553-2950","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Igor Gorelov","name":"Igor","surname":"Gorelov","rank":null,"pid":[{"value":"0000-0001-5570-0133","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Noam Tal Hod","name":"Noam","surname":"Tal Hod","rank":null,"pid":[{"value":"0000-0001-5241-0544","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Umberto De Sanctis","name":"Umberto","surname":"De Sanctis","rank":null,"pid":[{"value":"0000-0003-4704-525X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrii Tykhonov","name":"Andrii","surname":"Tykhonov","rank":null,"pid":[{"value":"0000-0003-2908-7915","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Francois Corriveau","name":"Francois","surname":"Corriveau","rank":null,"pid":[{"value":"0000-0002-4970-7600","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michaela Queitsch-Maitland","name":"Michaela","surname":"Queitsch-Maitland","rank":null,"pid":[{"value":"0000-0003-4643-515X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Iacopo Vivarelli","name":"Iacopo","surname":"Vivarelli","rank":null,"pid":[{"value":"0000-0003-0097-123X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"sotirios vlachos","name":"sotirios","surname":"vlachos","rank":null,"pid":[{"value":"0000-0002-1879-3745","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Archil Durglishvili","name":"Archil","surname":"Durglishvili","rank":null,"pid":[{"value":"0000-0003-4157-592X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Vladimir Cindro","name":"Vladimir","surname":"Cindro","rank":null,"pid":[{"value":"0000-0002-2037-7185","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tim Adye","name":"Tim","surname":"Adye","rank":null,"pid":[{"value":"0000-0003-0627-5059","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ruggero Turra","name":"Ruggero","surname":"Turra","rank":null,"pid":[{"value":"0000-0001-8740-796X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Susana Amor Santos","name":"Susana","surname":"Amor Santos","rank":null,"pid":[{"value":"0000-0001-7566-6067","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Thorsten Wengler","name":"Thorsten","surname":"Wengler","rank":null,"pid":[{"value":"0000-0002-4375-5265","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sergio Grancagnolo","name":"Sergio","surname":"Grancagnolo","rank":null,"pid":[{"value":"0000-0001-8490-8304","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Koji Sato","name":"Koji","surname":"Sato","rank":null,"pid":[{"value":"0000-0001-8988-4065","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marjorie Shapiro","name":"Marjorie","surname":"Shapiro","rank":null,"pid":[{"value":"0000-0001-8540-9654","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paul Thompson","name":"Paul","surname":"Thompson","rank":null,"pid":[{"value":"0000-0002-6239-7715","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Daniele Zanzi","name":"Daniele","surname":"Zanzi","rank":null,"pid":[{"value":"0000-0002-1222-7937","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Adrian Irles","name":"Adrian","surname":"Irles","rank":null,"pid":[{"value":"0000-0001-5668-151X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Walter Hopkins","name":"Walter","surname":"Hopkins","rank":null,"pid":[{"value":"0000-0001-7814-8740","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Aurelio Juste","name":"Aurelio","surname":"Juste","rank":null,"pid":[{"value":"0000-0002-1558-3291","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yoram Rozen","name":"Yoram","surname":"Rozen","rank":null,"pid":[{"value":"0000-0001-6969-0634","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mateusz Dyndal","name":"Mateusz","surname":"Dyndal","rank":null,"pid":[{"value":"0000-0001-9632-6352","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Monika Wielers","name":"Monika","surname":"Wielers","rank":null,"pid":[{"value":"0000-0001-9232-4827","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Anna Kaczmarska","name":"Anna","surname":"Kaczmarska","rank":null,"pid":[{"value":"0000-0002-8880-4120","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Evgeny Khramov","name":"Evgeny","surname":"Khramov","rank":null,"pid":[{"value":"0000-0001-7400-6454","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Vincent Alexander Croft","name":"Vincent Alexander","surname":"Croft","rank":null,"pid":[{"value":"0000-0002-8731-4525","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yuji Yamazaki","name":"Yuji","surname":"Yamazaki","rank":null,"pid":[{"value":"0000-0003-3710-6995","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sau Lan Wu","name":"Sau Lan","surname":"Wu","rank":null,"pid":[{"value":"0000-0001-5866-1504","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kevin Kröninger","name":"Kevin","surname":"Kröninger","rank":null,"pid":[{"value":"0000-0001-9873-0228","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Calin Alexa","name":"Calin","surname":"Alexa","rank":null,"pid":[{"value":"0000-0003-0922-7669","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Frank Filthaut","name":"Frank","surname":"Filthaut","rank":null,"pid":[{"value":"0000-0003-3338-2247","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alaettin Serhan Mete","name":"Alaettin Serhan","surname":"Mete","rank":null,"pid":[{"value":"0000-0002-5508-530X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michael Duehrssen-Debling","name":"Michael","surname":"Duehrssen-Debling","rank":null,"pid":[{"value":"0000-0002-5833-7058","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Torre Wenaus","name":"Torre","surname":"Wenaus","rank":null,"pid":[{"value":"0000-0002-8678-893X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michele LIVAN","name":"Michele","surname":"LIVAN","rank":null,"pid":[{"value":"0000-0002-5877-0062","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jose Guillermo Panduro Vazquez","name":"Jose Guillermo","surname":"Panduro Vazquez","rank":null,"pid":[{"value":"0000-0003-2605-8940","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Pascal Pralavorio","name":"Pascal","surname":"Pralavorio","rank":null,"pid":[{"value":"0000-0002-2452-6715","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yuto Minami","name":"Yuto","surname":"Minami","rank":null,"pid":[{"value":"0000-0003-2176-8089","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stephen Hillier","name":"Stephen","surname":"Hillier","rank":null,"pid":[{"value":"0000-0002-7599-6469","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Simon Viel","name":"Simon","surname":"Viel","rank":null,"pid":[{"value":"0000-0001-9554-4059","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andreas Kugel","name":"Andreas","surname":"Kugel","rank":null,"pid":[{"value":"0000-0002-8493-6660","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Lene Kristian Bryngemark","name":"Lene Kristian","surname":"Bryngemark","rank":null,"pid":[{"value":"0000-0002-8420-3408","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Guenter Duckeck","name":"Guenter","surname":"Duckeck","rank":null,"pid":[{"value":"0000-0002-7756-7801","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alexey Anisenkov","name":"Alexey","surname":"Anisenkov","rank":null,"pid":[{"value":"0000-0002-7201-5936","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Javier Jiménez Peña","name":"Javier","surname":"Jiménez Peña","rank":null,"pid":[{"value":"0000-0002-8705-628X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sascha null","name":"Sascha","surname":null,"rank":null,"pid":[{"value":"0000-0003-2941-2829","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Evelyn Thomson","name":"Evelyn","surname":"Thomson","rank":null,"pid":[{"value":"0000-0001-6031-2768","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Andrea Formica","name":"Andrea","surname":"Formica","rank":null,"pid":[{"value":"0000-0001-8308-2643","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Mattias Ellert","name":"Mattias","surname":"Ellert","rank":null,"pid":[{"value":"0000-0001-5265-3175","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Richard Keeler","name":"Richard","surname":"Keeler","rank":null,"pid":[{"value":"0000-0002-0510-4189","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Karolos Potamianos","name":"Karolos","surname":"Potamianos","rank":null,"pid":[{"value":"0000-0001-7839-9785","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Muhammad Alhroob","name":"Muhammad","surname":"Alhroob","rank":null,"pid":[{"value":"0000-0001-7569-7111","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marie-Helene Genest","name":"Marie-Helene","surname":"Genest","rank":null,"pid":[{"value":"0000-0002-4098-2024","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"melissa franklin","name":"melissa","surname":"franklin","rank":null,"pid":[{"value":"0000-0002-6595-883X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"John Baines","name":"John","surname":"Baines","rank":null,"pid":[{"value":"0000-0003-0770-2702","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jean-Baptiste de Vivie","name":"Jean-Baptiste","surname":"de Vivie","rank":null,"pid":[{"value":"0000-0001-9163-2211","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kristian Gregersen","name":"Kristian","surname":"Gregersen","rank":null,"pid":[{"value":"0000-0003-0295-1670","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Elisabeth Schopf","name":"Elisabeth","surname":"Schopf","rank":null,"pid":[{"value":"0000-0002-9340-2214","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Tristan Beau","name":"Tristan","surname":"Beau","rank":null,"pid":[{"value":"0000-0002-2022-2140","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ask Emil Løvschall-Jensen","name":"Ask Emil","surname":"Løvschall-Jensen","rank":null,"pid":[{"value":"0000-0003-1834-4904","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Karel Smolek","name":"Karel","surname":"Smolek","rank":null,"pid":[{"value":"0000-0002-5996-7000","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Wendy Taylor","name":"Wendy","surname":"Taylor","rank":null,"pid":[{"value":"0000-0002-6596-9125","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"James Robinson","name":"James","surname":"Robinson","rank":null,"pid":[{"value":"0000-0002-2856-9413","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Arno Straessner","name":"Arno","surname":"Straessner","rank":null,"pid":[{"value":"0000-0003-2460-6659","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Bruno Lenzi","name":"Bruno","surname":"Lenzi","rank":null,"pid":[{"value":"0000-0002-1024-4004","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Thomas G McCarthy","name":"Thomas G","surname":"McCarthy","rank":null,"pid":[{"value":"0000-0002-1182-3526","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Maria Curatolo","name":"Maria","surname":"Curatolo","rank":null,"pid":[{"value":"0000-0003-0978-4879","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Zuzana Rurikova","name":"Zuzana","surname":"Rurikova","rank":null,"pid":[{"value":"0000-0003-3051-9607","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Norman Gee","name":"Norman","surname":"Gee","rank":null,"pid":[{"value":"0000-0002-8833-3154","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Craig Buttar","name":"Craig","surname":"Buttar","rank":null,"pid":[{"value":"0000-0003-0188-6491","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sophie Trincaz-Duvoid","name":"Sophie","surname":"Trincaz-Duvoid","rank":null,"pid":[{"value":"0000-0001-5913-0828","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Danilo Enoque Ferreira de Lima","name":"Danilo Enoque","surname":"Ferreira de Lima","rank":null,"pid":[{"value":"0000-0002-6606-3595","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Nektarios Benekos","name":"Nektarios","surname":"Benekos","rank":null,"pid":[{"value":"0000-0001-7831-8762","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Evgenii Baldin","name":"Evgenii","surname":"Baldin","rank":null,"pid":[{"value":"0000-0002-9854-975X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Craig Sawyer","name":"Craig","surname":"Sawyer","rank":null,"pid":[{"value":"0000-0002-2027-1428","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"George Redlinger","name":"George","surname":"Redlinger","rank":null,"pid":[{"value":"0000-0002-6437-9991","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Philip Sommer","name":"Philip","surname":"Sommer","rank":null,"pid":[{"value":"0000-0003-1703-7304","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Simone Monzani","name":"Simone","surname":"Monzani","rank":null,"pid":[{"value":"0000-0002-0479-2207","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Eirik Gramstad","name":"Eirik","surname":"Gramstad","rank":null,"pid":[{"value":"0000-0001-5792-5352","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Anna Lipniacka","name":"Anna","surname":"Lipniacka","rank":null,"pid":[{"value":"0000-0002-8759-8564","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Oana Vickey Boeriu","name":"Oana","surname":"Vickey Boeriu","rank":null,"pid":[{"value":"0000-0002-6497-6809","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Fridolin Dittus","name":"Fridolin","surname":"Dittus","rank":null,"pid":[{"value":"0000-0002-1760-8237","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Antonio Salvucci","name":"Antonio","surname":"Salvucci","rank":null,"pid":[{"value":"0000-0003-4876-2613","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Carlo Dallapiccola","name":"Carlo","surname":"Dallapiccola","rank":null,"pid":[{"value":"0000-0002-1391-2477","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Konstantinos Ntekas","name":"Konstantinos","surname":"Ntekas","rank":null,"pid":[{"value":"0000-0001-9252-6509","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Juan Antonio Aguilar Saavedra","name":"Juan Antonio","surname":"Aguilar Saavedra","rank":null,"pid":[{"value":"0000-0002-5475-8920","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Martine Bosman","name":"Martine","surname":"Bosman","rank":null,"pid":[{"value":"0000-0002-7290-643X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Trevor Vickey","name":"Trevor","surname":"Vickey","rank":null,"pid":[{"value":"0000-0002-1596-2611","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christian Ohm","name":"Christian","surname":"Ohm","rank":null,"pid":[{"value":"0000-0002-8015-7512","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"William Davey","name":"William","surname":"Davey","rank":null,"pid":[{"value":"0000-0002-8140-8619","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Wolfgang Walkowiak","name":"Wolfgang","surname":"Walkowiak","rank":null,"pid":[{"value":"0000-0002-0385-3784","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Austin Basye","name":"Austin","surname":"Basye","rank":null,"pid":[{"value":"0000-0002-7519-1310","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kuhan Wang","name":"Kuhan","surname":"Wang","rank":null,"pid":[{"value":"0000-0002-6151-0034","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Cristiano Alpigiani","name":"Cristiano","surname":"Alpigiani","rank":null,"pid":[{"value":"0000-0002-7641-5814","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"James Monk","name":"James","surname":"Monk","rank":null,"pid":[{"value":"0000-0001-8471-9247","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christopher Pollard","name":"Christopher","surname":"Pollard","rank":null,"pid":[{"value":"0000-0002-3690-3960","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jozsef Toth","name":"Jozsef","surname":"Toth","rank":null,"pid":[{"value":"0000-0001-9128-6080","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Vitaliano Chiarella","name":"Vitaliano","surname":"Chiarella","rank":null,"pid":[{"value":"0000-0002-4210-2924","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jos Vermeulen","name":"Jos","surname":"Vermeulen","rank":null,"pid":[{"value":"0000-0003-4378-5736","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yann Coadou","name":"Yann","surname":"Coadou","rank":null,"pid":[{"value":"0000-0001-8195-7004","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Christine Kourkoumelis","name":"Christine","surname":"Kourkoumelis","rank":null,"pid":[{"value":"0000-0003-0083-274X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Karishma Sekhon","name":"Karishma","surname":"Sekhon","rank":null,"pid":[{"value":"0000-0001-7677-8394","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Kun Liu","name":"Kun","surname":"Liu","rank":null,"pid":[{"value":"0000-0001-5807-0501","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Matt LeBlanc","name":"Matt","surname":"LeBlanc","rank":null,"pid":[{"value":"0000-0001-5977-6418","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ewan Hill","name":"Ewan","surname":"Hill","rank":null,"pid":[{"value":"0000-0002-1725-7414","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Diane Cinca","name":"Diane","surname":"Cinca","rank":null,"pid":[{"value":"0000-0003-0944-8998","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Benedict Tobias Winter","name":"Benedict Tobias","surname":"Winter","rank":null,"pid":[{"value":"0000-0001-9606-7688","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Steven Schramm","name":"Steven","surname":"Schramm","rank":null,"pid":[{"value":"0000-0001-9031-6751","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yusheng Wu","name":"Yusheng","surname":"Wu","rank":null,"pid":[{"value":"0000-0002-1528-4865","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Orel Gueta","name":"Orel","surname":"Gueta","rank":null,"pid":[{"value":"0000-0002-9440-2398","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Federico Meloni","name":"Federico","surname":"Meloni","rank":null,"pid":[{"value":"0000-0001-7075-2214","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sylvain Tisserant","name":"Sylvain","surname":"Tisserant","rank":null,"pid":[{"value":"0000-0002-0294-6727","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Dimitrios Iliadis","name":"Dimitrios","surname":"Iliadis","rank":null,"pid":[{"value":"0000-0001-6303-2761","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Sebastien Binet","name":"Sebastien","surname":"Binet","rank":null,"pid":[{"value":"0000-0003-4913-6104","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marcella Bona","name":"Marcella","surname":"Bona","rank":null,"pid":[{"value":"0000-0002-9660-580X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Ki Lie","name":"Ki","surname":"Lie","rank":null,"pid":[{"value":"0000-0002-5779-5989","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"David Sankey","name":"David","surname":"Sankey","rank":null,"pid":[{"value":"0000-0003-0955-4213","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michele Faucci Giannelli","name":"Michele","surname":"Faucci Giannelli","rank":null,"pid":[{"value":"0000-0003-3731-820X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Georges Azuelos","name":"Georges","surname":"Azuelos","rank":null,"pid":[{"value":"0000-0003-4241-022X","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"David Britton","name":"David","surname":"Britton","rank":null,"pid":[{"value":"0000-0001-9998-4342","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Henri Bachacou","name":"Henri","surname":"Bachacou","rank":null,"pid":[{"value":"0000-0002-2256-4515","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Stefania Stucci","name":"Stefania","surname":"Stucci","rank":null,"pid":[{"value":"0000-0002-1639-4484","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Peter Berta","name":"Peter","surname":"Berta","rank":null,"pid":[{"value":"0000-0003-0780-0345","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"SERKANT ALİ ÇETİN","name":"SERKANT ALİ","surname":"ÇETİN","rank":null,"pid":[{"value":"0000-0001-5050-8441","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Alexey Zhemchugov","name":"Alexey","surname":"Zhemchugov","rank":null,"pid":[{"value":"0000-0002-3360-4965","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Yasushi Nagasaka","name":"Yasushi","surname":"Nagasaka","rank":null,"pid":[{"value":"0000-0002-3669-9525","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"James Walder","name":"James","surname":"Walder","rank":null,"pid":[{"value":"0000-0002-9039-8758","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Marina Rotaru","name":"Marina","surname":"Rotaru","rank":null,"pid":[{"value":"0000-0003-4088-6275","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Chunhui Chen","name":"Chunhui","surname":"Chen","rank":null,"pid":[{"value":"0000-0003-1589-9955","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Valentina Tudorache","name":"Valentina","surname":"Tudorache","rank":null,"pid":[{"value":"0000-0001-5384-3843","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Amanda Cooper-Sarkar","name":"Amanda","surname":"Cooper-Sarkar","rank":null,"pid":[{"value":"0000-0002-7107-5902","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Paul Dervan","name":"Paul","surname":"Dervan","rank":null,"pid":[{"value":"0000-0003-3929-8046","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Laura Gonella","name":"Laura","surname":"Gonella","rank":null,"pid":[{"value":"0000-0002-4919-0808","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Georges Aad","name":"Georges","surname":"Aad","rank":null,"pid":[{"value":"0000-0002-6665-4934","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Hok-Chuen Cheng","name":"Hok-Chuen","surname":"Cheng","rank":null,"pid":[{"value":"0000-0002-8912-4389","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michal Svatos","name":"Michal","surname":"Svatos","rank":null,"pid":[{"value":"0000-0002-7199-3383","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jens Janssen","name":"Jens","surname":"Janssen","rank":null,"pid":[{"value":"0000-0002-2391-3078","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Jana Faltova","name":"Jana","surname":"Faltova","rank":null,"pid":[{"value":"0000-0003-4278-7182","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Trisha Farooque","name":"Trisha","surname":"Farooque","rank":null,"pid":[{"value":"0000-0003-1363-9324","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"andrea gabrielli","name":"andrea","surname":"gabrielli","rank":null,"pid":[{"value":"0000-0003-0768-9325","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Santiago Gonzalez de la Hoz","name":"Santiago","surname":"Gonzalez de la Hoz","rank":null,"pid":[{"value":"0000-0001-5304-5390","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"José Ocariz","name":"José","surname":"Ocariz","rank":null,"pid":[{"value":"0000-0003-2262-0780","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null},{"fullname":"Michele Weber","name":"Michele","surname":"Weber","rank":null,"pid":[{"value":"0000-0002-2770-9031","qualifier":{"classid":"ORCID","classname":"ORCID","schemeid":"ORCID","schemename":"ORCID"},"dataInfo":null}],"affiliation":null}]
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
new file mode 100644
index 000000000..eac239b93
--- /dev/null
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/authors_publication_sample.json
@@ -0,0 +1,2 @@
+50|arXiv_dedup_::34e03f2336b8b28286550425e65634ea
+[{"fullname":"Aad, G.","name":"G.","surname":"Aad","rank":1,"pid":[],"affiliation":null},{"fullname":"Ayoub, M.K.","name":"M. K.","surname":"Ayoub","rank":2,"pid":[],"affiliation":null},{"fullname":"Bassalat, A.","name":"A.","surname":"Bassalat","rank":3,"pid":[],"affiliation":null},{"fullname":"Becot, C.","name":"C.","surname":"Becot","rank":4,"pid":[],"affiliation":null},{"fullname":"Binet, S.","name":"S.","surname":"Binet","rank":5,"pid":[],"affiliation":null},{"fullname":"Bourdarios, C.","name":"C.","surname":"Bourdarios","rank":6,"pid":[],"affiliation":null},{"fullname":"Regie, J.B. de Vivie de","name":"J. B. Vivie","surname":"Regie","rank":7,"pid":[],"affiliation":null},{"fullname":"Delgove, D.","name":"D.","surname":"Delgove","rank":8,"pid":[],"affiliation":null},{"fullname":"Duflot, L.","name":"L.","surname":"Duflot","rank":9,"pid":[],"affiliation":null},{"fullname":"Escalier, M.","name":"M.","surname":"Escalier","rank":10,"pid":[],"affiliation":null},{"fullname":"Fayard, L.","name":"L.","surname":"Fayard","rank":11,"pid":[],"affiliation":null},{"fullname":"Fournier, D.","name":"D.","surname":"Fournier","rank":12,"pid":[],"affiliation":null},{"fullname":"Gkougkousis, E.L.","name":"E. L.","surname":"Gkougkousis","rank":13,"pid":[],"affiliation":null},{"fullname":"Grivaz, J.-F.","name":"J. -F","surname":"Grivaz","rank":14,"pid":[],"affiliation":null},{"fullname":"Guillemin, T.","name":"T.","surname":"Guillemin","rank":15,"pid":[],"affiliation":null},{"fullname":"Hariri, F.","name":"F.","surname":"Hariri","rank":16,"pid":[],"affiliation":null},{"fullname":"Henrot-Versillé, S.","name":"S.","surname":"Henrot-Versillé","rank":17,"pid":[],"affiliation":null},{"fullname":"Hrivnac, J.","name":"J.","surname":"Hrivnac","rank":18,"pid":[],"affiliation":null},{"fullname":"Iconomidou-Fayard, L.","name":"L.","surname":"Iconomidou-Fayard","rank":19,"pid":[],"affiliation":null},{"fullname":"Kado, M.","name":"M.","surname":"Kado","rank":20,"pid":[],"affiliation":null},{"fullname":"Lounis, A.","name":"A.","surname":"Lounis","rank":21,"pid":[],"affiliation":null},{"fullname":"Makovec, N.","name":"N.","surname":"Makovec","rank":22,"pid":[],"affiliation":null},{"fullname":"Morange, N.","name":"N.","surname":"Morange","rank":23,"pid":[],"affiliation":null},{"fullname":"Nellist, C.","name":"C.","surname":"Nellist","rank":24,"pid":[],"affiliation":null},{"fullname":"Poggioli, L.","name":"L.","surname":"Poggioli","rank":25,"pid":[],"affiliation":null},{"fullname":"Puzo, P.","name":"P.","surname":"Puzo","rank":26,"pid":[],"affiliation":null},{"fullname":"Renaud, A.","name":"A.","surname":"Renaud","rank":27,"pid":[],"affiliation":null},{"fullname":"Rousseau, D.","name":"D.","surname":"Rousseau","rank":28,"pid":[],"affiliation":null},{"fullname":"Rybkin, G.","name":"G.","surname":"Rybkin","rank":29,"pid":[],"affiliation":null},{"fullname":"Schaffer, A.C.","name":"A. C.","surname":"Schaffer","rank":30,"pid":[],"affiliation":null},{"fullname":"Scifo, E.","name":"E.","surname":"Scifo","rank":31,"pid":[],"affiliation":null},{"fullname":"Serin, L.","name":"L.","surname":"Serin","rank":32,"pid":[],"affiliation":null},{"fullname":"Simion, S.","name":"S.","surname":"Simion","rank":33,"pid":[],"affiliation":null},{"fullname":"Tanaka, R.","name":"R.","surname":"Tanaka","rank":34,"pid":[],"affiliation":null},{"fullname":"Tran, H.L.","name":"H. L.","surname":"Tran","rank":35,"pid":[],"affiliation":null},{"fullname":"Zerwas, D.","name":"D.","surname":"Zerwas","rank":36,"pid":[],"affiliation":null},{"fullname":"Zhang, Zhongkai","name":"Zhongkai","surname":"Zhang","rank":37,"pid":[],"affiliation":null},{"fullname":"Zhao, Y.","name":"Y.","surname":"Zhao","rank":38,"pid":[],"affiliation":null},{"fullname":"Rahal, G.","name":"G.","surname":"Rahal","rank":39,"pid":[],"affiliation":null},{"fullname":"Barnovska, Z.","name":"Z.","surname":"Barnovska","rank":40,"pid":[],"affiliation":null},{"fullname":"Berger, N.","name":"N.","surname":"Berger","rank":41,"pid":[],"affiliation":null},{"fullname":"Delmastro, M.","name":"M.","surname":"Delmastro","rank":42,"pid":[],"affiliation":null},{"fullname":"Ciaccio, L. Di","name":"L. Di","surname":"Ciaccio","rank":43,"pid":[],"affiliation":null},{"fullname":"Elles, S.","name":"S.","surname":"Elles","rank":44,"pid":[],"affiliation":null},{"fullname":"Hryn’ova, T.","name":"T.","surname":"Hryn’ova","rank":45,"pid":[],"affiliation":null},{"fullname":"Jézéquel, S.","name":"S.","surname":"Jézéquel","rank":46,"pid":[],"affiliation":null},{"fullname":"Koletsou, I.","name":"I.","surname":"Koletsou","rank":47,"pid":[],"affiliation":null},{"fullname":"Lafaye, R.","name":"R.","surname":"Lafaye","rank":48,"pid":[],"affiliation":null},{"fullname":"Leveque, J.","name":"J.","surname":"Leveque","rank":49,"pid":[],"affiliation":null},{"fullname":"Massol, N.","name":"N.","surname":"Massol","rank":50,"pid":[],"affiliation":null},{"fullname":"Sauvage, G.","name":"G.","surname":"Sauvage","rank":51,"pid":[],"affiliation":null},{"fullname":"Sauvan, E.","name":"E.","surname":"Sauvan","rank":52,"pid":[],"affiliation":null},{"fullname":"Simard, O.","name":"O.","surname":"Simard","rank":53,"pid":[],"affiliation":null},{"fullname":"Todorov, T.","name":"T.","surname":"Todorov","rank":54,"pid":[],"affiliation":null},{"fullname":"Wingerter-Seez, I.","name":"I.","surname":"Wingerter-Seez","rank":55,"pid":[],"affiliation":null},{"fullname":"Albrand, S.","name":"S.","surname":"Albrand","rank":56,"pid":[],"affiliation":null},{"fullname":"Brown, J.","name":"J.","surname":"Brown","rank":57,"pid":[],"affiliation":null},{"fullname":"Collot, J.","name":"J.","surname":"Collot","rank":58,"pid":[],"affiliation":null},{"fullname":"Crépé-Renaudin, S.","name":"S.","surname":"Crépé-Renaudin","rank":59,"pid":[],"affiliation":null},{"fullname":"Delsart, P.A.","name":"P. A.","surname":"Delsart","rank":60,"pid":[],"affiliation":null},{"fullname":"Gabaldon, C.","name":"C.","surname":"Gabaldon","rank":61,"pid":[],"affiliation":null},{"fullname":"Genest, M.H.","name":"M. H.","surname":"Genest","rank":62,"pid":[],"affiliation":null},{"fullname":"Hostachy, J.Y.","name":"J. Y.","surname":"Hostachy","rank":63,"pid":[],"affiliation":null},{"fullname":"Ledroit-Guillon, F.","name":"F.","surname":"Ledroit-Guillon","rank":64,"pid":[],"affiliation":null},{"fullname":"Lleres, A.","name":"A.","surname":"Lleres","rank":65,"pid":[],"affiliation":null},{"fullname":"Lucotte, A.","name":"A.","surname":"Lucotte","rank":66,"pid":[],"affiliation":null},{"fullname":"Malek, F.","name":"F.","surname":"Malek","rank":67,"pid":[],"affiliation":null},{"fullname":"Monini, C.","name":"C.","surname":"Monini","rank":68,"pid":[],"affiliation":null},{"fullname":"Stark, J.","name":"J.","surname":"Stark","rank":69,"pid":[],"affiliation":null},{"fullname":"Trocmé, B.","name":"B.","surname":"Trocmé","rank":70,"pid":[],"affiliation":null},{"fullname":"Wu, M.","name":"M.","surname":"Wu","rank":71,"pid":[],"affiliation":null},{"fullname":"Alio, L.","name":"L.","surname":"Alio","rank":72,"pid":[],"affiliation":null},{"fullname":"Barbero, M.","name":"M.","surname":"Barbero","rank":73,"pid":[],"affiliation":null},{"fullname":"Coadou, Y.","name":"Y.","surname":"Coadou","rank":74,"pid":[],"affiliation":null},{"fullname":"Diaconu, C.","name":"C.","surname":"Diaconu","rank":75,"pid":[],"affiliation":null},{"fullname":"Diglio, Sara","name":"Sara","surname":"Diglio","rank":76,"pid":[],"affiliation":null},{"fullname":"Djama, F.","name":"F.","surname":"Djama","rank":77,"pid":[],"affiliation":null},{"fullname":"Duccu, O.","name":"O.","surname":"Duccu","rank":78,"pid":[],"affiliation":null},{"fullname":"Feligioni, L.","name":"L.","surname":"Feligioni","rank":79,"pid":[],"affiliation":null},{"fullname":"Gao, J.","name":"J.","surname":"Gao","rank":80,"pid":[],"affiliation":null},{"fullname":"Hallewell, G.D.","name":"G. D.","surname":"Hallewell","rank":81,"pid":[],"affiliation":null},{"fullname":"Hubaut, F.","name":"F.","surname":"Hubaut","rank":82,"pid":[],"affiliation":null},{"fullname":"Kahn, S.J.","name":"S. J.","surname":"Kahn","rank":83,"pid":[],"affiliation":null},{"fullname":"Knoops, E. B. F. G.","name":"E. B. F. G.","surname":"Knoops","rank":84,"pid":[],"affiliation":null},{"fullname":"Guirriec, E. Le","name":"E. Le","surname":"Guirriec","rank":85,"pid":[],"affiliation":null},{"fullname":"Liu, J.","name":"J.","surname":"Liu","rank":86,"pid":[],"affiliation":null},{"fullname":"Liu, K.","name":"K.","surname":"Liu","rank":87,"pid":[],"affiliation":null},{"fullname":"Madaffari, D.","name":"D.","surname":"Madaffari","rank":88,"pid":[],"affiliation":null},{"fullname":"Mochizuki, K.","name":"K.","surname":"Mochizuki","rank":89,"pid":[],"affiliation":null},{"fullname":"Monnier, E.","name":"E.","surname":"Monnier","rank":90,"pid":[],"affiliation":null},{"fullname":"Muanza, S.","name":"S.","surname":"Muanza","rank":91,"pid":[],"affiliation":null},{"fullname":"Nagai, Y.","name":"Y.","surname":"Nagai","rank":92,"pid":[],"affiliation":null},{"fullname":"Nagy, E.","name":"E.","surname":"Nagy","rank":93,"pid":[],"affiliation":null},{"fullname":"Pralavorio, P.","name":"P.","surname":"Pralavorio","rank":94,"pid":[],"affiliation":null},{"fullname":"Rozanov, A.","name":"A.","surname":"Rozanov","rank":95,"pid":[],"affiliation":null},{"fullname":"Serre, T.","name":"T.","surname":"Serre","rank":96,"pid":[],"affiliation":null},{"fullname":"Talby, M.","name":"M.","surname":"Talby","rank":97,"pid":[],"affiliation":null},{"fullname":"Torres, R.E. Ticse","name":"R. E. Ticse","surname":"Torres","rank":98,"pid":[],"affiliation":null},{"fullname":"Tiouchichine, E.","name":"E.","surname":"Tiouchichine","rank":99,"pid":[],"affiliation":null},{"fullname":"Tisserant, S.","name":"S.","surname":"Tisserant","rank":100,"pid":[],"affiliation":null},{"fullname":"Toth, J.","name":"J.","surname":"Toth","rank":101,"pid":[],"affiliation":null},{"fullname":"Touchard, F.","name":"F.","surname":"Touchard","rank":102,"pid":[],"affiliation":null},{"fullname":"Vacavant, L.","name":"L.","surname":"Vacavant","rank":103,"pid":[],"affiliation":null},{"fullname":"Boumediene, D.","name":"D.","surname":"Boumediene","rank":104,"pid":[],"affiliation":null},{"fullname":"Busato, Emmanuel","name":"Emmanuel","surname":"Busato","rank":105,"pid":[],"affiliation":null},{"fullname":"Calvet, D.","name":"D.","surname":"Calvet","rank":106,"pid":[],"affiliation":null},{"fullname":"Calvet, S.","name":"S.","surname":"Calvet","rank":107,"pid":[],"affiliation":null},{"fullname":"Donini, J.","name":"J.","surname":"Donini","rank":108,"pid":[],"affiliation":null},{"fullname":"Dubreuil, E.","name":"E.","surname":"Dubreuil","rank":109,"pid":[],"affiliation":null},{"fullname":"Gilles, G.","name":"G.","surname":"Gilles","rank":110,"pid":[],"affiliation":null},{"fullname":"Gris, Ph.","name":"Ph","surname":"Gris","rank":111,"pid":[],"affiliation":null},{"fullname":"Liao, H.","name":"H.","surname":"Liao","rank":112,"pid":[],"affiliation":null},{"fullname":"Madar, R.","name":"R.","surname":"Madar","rank":113,"pid":[],"affiliation":null},{"fullname":"Pallin, D.","name":"D.","surname":"Pallin","rank":114,"pid":[],"affiliation":null},{"fullname":"Saez, S.M. Romano","name":"S. M. Romano","surname":"Saez","rank":115,"pid":[],"affiliation":null},{"fullname":"Santoni, C.","name":"C.","surname":"Santoni","rank":116,"pid":[],"affiliation":null},{"fullname":"Simon, D.","name":"D.","surname":"Simon","rank":117,"pid":[],"affiliation":null},{"fullname":"Theveneaux-Pelzer, Timothée","name":"Timothée","surname":"Theveneaux-Pelzer","rank":118,"pid":[],"affiliation":null},{"fullname":"Vazeille, F.","name":"F.","surname":"Vazeille","rank":119,"pid":[],"affiliation":null},{"fullname":"Yatsenko, E.","name":"E.","surname":"Yatsenko","rank":120,"pid":[],"affiliation":null}]
\ No newline at end of file
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json
deleted file mode 100644
index 600181ba5..000000000
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/oa/merge/publications_with_authors.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{ "journal":{ "dataInfo":null, "conferenceplace":null, "issnPrinted":"0009-9260", "issnOnline":null, "issnLinking":null, "ep":"636", "iss":null, "sp":"632", "vol":"55", "edition":null, "conferencedate":null, "name":"Clinical Radiology" }, "measures":null, "author":[ { "rank":null, "fullname":"KARL TURETSCHEK", "affiliation":null, "pid":null, "surname":"TURETSCHEK", "name":"KARL" }, { "rank":null, "fullname":"WOLFGANG EBNER", "affiliation":null, "pid":null, "surname":"EBNER", "name":"WOLFGANG" }, { "rank":null, "fullname":"DOMINIK FLEISCHMANN", "affiliation":null, "pid":null, "surname":"FLEISCHMANN", "name":"DOMINIK" }, { "rank":null, "fullname":"PATRICK WUNDERBALDINGER", "affiliation":null, "pid":null, "surname":"WUNDERBALDINGER", "name":"PATRICK" }, { "rank":null, "fullname":"LUDWIG ERLACHER", "affiliation":null, "pid":null, "surname":"ERLACHER", "name":"LUDWIG" }, { "rank":null, "fullname":"THOMAS ZONTSICH", "affiliation":null, "pid":null, "surname":"ZONTSICH", "name":"THOMAS" }, { "rank":null, "fullname":"ALEXANDER A. BANKIER", "affiliation":null, "pid":null, "surname":"BANKIER", "name":"ALEXANDER A." } ], "resulttype":{ "classid":"publication", "schemeid":"dnet:result_typologies", "schemename":"dnet:result_typologies", "classname":"publication"}, "title":[ { "qualifier":{ "classid":"main title", "schemeid":"dnet:dataCite_title", "schemename":"dnet:dataCite_title", "classname":"main title" }, "dataInfo":null, "value":"Early Pulmonary Involvement in Ankylosing Spondylitis: Assessment With Thin-section CT" } ], "relevantdate":[ { "qualifier":{ "classid":"created", "schemeid":"dnet:dataCite_date", "schemename":"dnet:dataCite_date", "classname":"created" }, "dataInfo":null, "value":"2002-09-19T13:54:50Z" } ], "dateofacceptance":{ "dataInfo":null, "value":"2002-09-19T13:54:50Z" }, "publisher":{ "dataInfo":null, "value":"Elsevier BV" }, "embargoenddate":null, "fulltext":null, "contributor":null, "resourcetype":{ "classid":"0001", "schemeid":"dnet:dataCite_resource", "schemename":"dnet:dataCite_resource", "classname":"0001"}, "coverage":null, "bestaccessright":null, "externalReference":null, "format":null, "description":[ ], "source":[ { "dataInfo":null, "value":"Crossref" } ], "subject":[ { "qualifier":{ "classid":"keywords", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"keywords" }, "dataInfo":null, "value":"Radiology Nuclear Medicine and imaging" }, { "qualifier":{ "classid":"keywords", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"keywords" }, "dataInfo":null, "value":"General Medicine" } ], "language":null, "instance":[ { "processingchargecurrency":null, "refereed":null, "instancetype":{ "classid":"0001", "schemeid":"dnet:publication_resource", "schemename":"dnet:publication_resource", "classname":"Article" }, "hostedby":null, "distributionlocation":null, "processingchargeamount":null, "license":{ "dataInfo":null, "value":"https://www.elsevier.com/tdm/userlicense/1.0/" }, "accessright":{ "classid":"RESTRICTED", "schemeid":"dnet:access_modes", "schemename":"dnet:access_modes", "classname":"Restricted" }, "dateofacceptance":{ "dataInfo":null, "value":"2002-09-19T13:54:50Z" }, "collectedfrom":{ "dataInfo":null, "value":"Crossref", "key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" }, "url":[ "https://api.elsevier.com/content/article/PII:S0009926000904987?httpAccept=text/xml", "https://api.elsevier.com/content/article/PII:S0009926000904987?httpAccept=text/plain", "http://dx.doi.org/10.1053/crad.2000.0498" ] } ], "context":null, "country":null, "originalId":[ "S0009926000904987", "10.1053/crad.2000.0498" ], "pid":[ { "qualifier":{ "classid":"doi", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"doi" }, "dataInfo":null, "value":"10.1053/crad.2000.0498" } ], "dateofcollection":"2020-02-06T20:40:22Z", "dateoftransformation":null, "oaiprovenance":null, "extraInfo":null, "id":"50|doiboost____::994b7e47b9e225ab6d5e14841cb45a7f", "collectedfrom":[ { "dataInfo":null, "value":"Crossref", "key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" } ], "dataInfo":{ "trust":"0.9", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "lastupdatetimestamp":1581021622595 }
-{ "journal":null, "measures":null, "author":[ { "rank":null, "fullname":"Dominik Fleischmann", "affiliation":null, "pid":[ { "qualifier":{ "classid":"ORCID", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"ORCID" }, "dataInfo":{ "trust":"0.91", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:crosswalk:entityregistry", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"Harvested"} }, "value":"0000-0003-0715-0952" } ], "surname":"Fleischmann", "name":"Dominik" } ], "resulttype":{ "classid":"publication", "schemeid":"dnet:result_typologies", "schemename":"dnet:result_typologies", "classname":"publication"}, "title":[ ], "relevantdate":[ ], "dateofacceptance":null, "publisher":null, "embargoenddate":null, "fulltext":[ ], "contributor":[ ], "resourcetype":null, "coverage":[ ], "bestaccessright":null, "externalReference":[ ], "format":[ ], "description":null, "source":[ ], "subject":[ ], "language":null, "instance":[ ], "context":[ ], "country":[ ], "originalId":[ ], "pid":[ { "qualifier":{ "classid":"doi", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"doi"}, "dataInfo":null, "value":"10.1053/crad.2000.0498" } ], "dateofcollection":null, "dateoftransformation":null, "oaiprovenance":null, "extraInfo":[ ], "id":"50|doiboost____::994b7e47b9e225ab6d5e14841cb45a7f", "collectedfrom":[ { "dataInfo":null, "value":"ORCID", "key":"10|openaire____::806360c771262b4d6770e7cdf04b5c5a" } ], "dataInfo":{ "trust":"0.9", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "lastupdatetimestamp":null }
-{ "journal":{ "dataInfo":null, "conferenceplace":null, "issnPrinted":"0009-9260", "issnOnline":null, "issnLinking":null, "ep":"636", "iss":"8", "sp":"632", "vol":"55", "edition":null, "conferencedate":null, "name":"Clinical Radiology" }, "measures":null, "author":[ { "rank":null, "fullname":"T. Zontsich", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/1966908432" } ], "surname":null, "name":null }, { "rank":null, "fullname":"L Erlacher", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/687931320" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Dominik Fleischmann", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/2156559961" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Alexander A. Bankier", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL"}, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/1107971609" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Patrick Wunderbaldinger", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL" }, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/2422340537" } ], "surname":null, "name":null }, { "rank":null, "fullname":"Wolfgang Ebner", "affiliation":null, "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL" }, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/2186462571" } ], "surname":null, "name":null }, { "rank":null, "fullname":"K. Turetschek", "affiliation":[ { "dataInfo":null, "value":"University of Vienna" } ], "pid":[ { "qualifier":{ "classid":"URL", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"URL" }, "dataInfo":null, "value":"https://academic.microsoft.com/#/detail/321765676" } ], "surname":null, "name":null } ], "resulttype":{ "classid":"publication", "schemeid":"dnet:result_typologies", "schemename":"dnet:result_typologies", "classname":"publication" }, "title":[ { "qualifier":{ "classid":"main title", "schemeid":"dnet:dataCite_title", "schemename":"dnet:dataCite_title", "classname":"main title" }, "dataInfo":null, "value":"early pulmonary involvement in ankylosing spondylitis assessment with thin section ct" }, { "qualifier":{ "classid":"alternative title", "schemeid":"dnet:dataCite_title", "schemename":"dnet:dataCite_title", "classname":"alternative title" }, "dataInfo":null, "value":"Early pulmonary involvement in ankylosing spondylitis: assessment with thin-section CT." } ], "relevantdate":null, "dateofacceptance":{ "dataInfo":null, "value":"2000-08-01" }, "publisher":{ "dataInfo":null, "value":"Elsevier" }, "embargoenddate":null, "fulltext":null, "contributor":null, "resourcetype":null, "coverage":null, "bestaccessright":null, "externalReference":null, "format":null, "description":[ { "dataInfo":null, "value":"Abstract AIM: To determine the frequency and the distribution of early pulmonary lesions in patients with ankylosing spondylitis (AS) and a normal chest X-ray on thin-section CT and to correlate the CT findings with the results of pulmonary function tests and clinical data. MATERIALS AND METHODS: Twenty-five patients with clinically proven AS and no history of smoking underwent clinical examinations, pulmonary function tests (PFT), chest radiography, and thin-section CT. Four of 25 patients (16%), who had obvious signs on plain films suggestive of pre-existing disorders unrelated to AS were excluded. RESULTS: Fifteen of 21 patients (71%) had abnormalities on thin-section CT. The most frequent abnormalities were thickening of the interlobular septa in seven of 21 patients (33%), mild bronchial wall thickening in (6/21, 29%), pleural thickening and pleuropulmonary irregularities (both 29%) and linear septal thickening (6/21, 29%). In six patients there were no signs of pleuropulmonary involvement. Eight of 15 patients (53%) with abnormal and four of six patients (67%) with normal CT findings revealed mild restrictive lung function impairment. CONCLUSION: Patients with AS but a normal chest radiograph frequently have abnormalities on thin-section CT. As these abnormalities are usually subtle and their extent does not correlate with functional and clinical data, the overall routine impact of thin-section CT in the diagnosis of AS is limited. Turetschek, K , (2000) Clinical Radiology53, 632–636." } ], "source":[ { "dataInfo":null, "value":null } ], "subject":[ { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Complication" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Chest radiograph" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.580897", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.diagnostic_test" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.580897", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"In patient" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Radiography" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4582326", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business.industry" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4582326", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Thin section ct" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Respiratory disease" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49358836", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.disease" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49358836", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Ankylosing spondylitis" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49937168", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.disease" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.49937168", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Radiology" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4573571", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine.medical_specialty" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.4573571", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Medicine" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.40295774", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business.industry" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":{ "trust":"0.40295774", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset" } }, "value":"business" }, { "qualifier":{ "classid":"MAG", "schemeid":"dnet:subject_classification_typologies", "schemename":"dnet:subject_classification_typologies", "classname":"Microsoft Academic Graph classification" }, "dataInfo":null, "value":"Pulmonary function testing" } ], "language":null, "instance":[ { "processingchargecurrency":null, "refereed":null, "instancetype":null, "hostedby":null, "distributionlocation":null, "processingchargeamount":null, "license":null, "accessright":null, "dateofacceptance":null, "collectedfrom":{ "dataInfo":null, "value":"Microsoft Academic Graph", "key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" }, "url":[ "https://www.ncbi.nlm.nih.gov/pubmed/10964736", "https://www.sciencedirect.com/science/article/pii/S0009926000904987", "https://academic.microsoft.com/#/detail/1990704599" ] } ], "context":null, "country":null, "originalId":[ "1990704599", "10.1053/crad.2000.0498" ], "pid":[ { "qualifier":{ "classid":"doi", "schemeid":"dnet:pid_types", "schemename":"dnet:pid_types", "classname":"doi" }, "dataInfo":null, "value":"10.1053/crad.2000.0498" } ], "dateofcollection":null, "dateoftransformation":null, "oaiprovenance":null, "extraInfo":null, "id":"50|doiboost____::994b7e47b9e225ab6d5e14841cb45a7f", "collectedfrom":[ { "dataInfo":null, "value":"Microsoft Academic Graph", "key":"10|openaire____::5f532a3fc4f1ea403f37070f59a7a53a" } ], "dataInfo":{ "trust":"0.9", "invisible":false, "inferred":false, "deletedbyinference":false, "inferenceprovenance":null, "provenanceaction":{ "classid":"sysimport:actionset", "schemeid":"dnet:provenanceActions", "schemename":"dnet:provenanceActions", "classname":"sysimport:actionset"} }, "lastupdatetimestamp":null }
\ No newline at end of file
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
index 52e4e126a..c880edb7d 100644
--- a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_2.json
@@ -1 +1,140 @@
-{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
\ No newline at end of file
+{
+ "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
+ "resuttype": {"classid": "dataset"},
+ "pid": [
+ {
+ "qualifier": {"classid": "doi"},
+ "value": "10.1016/j.cmet.2011.03.013"
+ },
+ {
+ "qualifier": {"classid": "urn"},
+ "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+ },
+ {
+ "qualifier": {"classid": "scp-number"},
+ "value": "79953761260"
+ },
+ {
+ "qualifier": {"classid": "pmc"},
+ "value": "21459329"
+ }
+ ],
+ "collectedfrom": [
+ {
+ "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
+ "value": "Repository B"
+ }
+ ],
+ "instance": [
+ {
+ "refereed": {
+ "classid": "0000",
+ "classname": "UNKNOWN",
+ "schemeid": "dnet:review_levels",
+ "schemename": "dnet:review_levels"
+ },
+ "hostedby": {
+ "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+ "value": "Zenodo"
+ },
+ "accessright": {
+ "classid": "OPEN",
+ "classname": "Open Access",
+ "schemeid": "dnet:access_modes",
+ "schemename": "dnet:access_modes"
+ },
+ "processingchargecurrency": {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "value": "EUR"
+ },
+ "pid": [
+ {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "qualifier": {
+ "classid": "doi",
+ "classname": "Digital Object Identifier",
+ "schemeid": "dnet:pid_types",
+ "schemename": "dnet:pid_types"
+ },
+ "value": "10.1371/journal.pone.0085605"
+ }
+ ],
+ "distributionlocation": "",
+ "url": ["https://doi.org/10.1371/journal.pone.0085605"],
+ "alternateIdentifier": [
+ {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "qualifier": {
+ "classid": "pmid",
+ "classname": "PubMed ID",
+ "schemeid": "dnet:pid_types",
+ "schemename": "dnet:pid_types"
+ },
+ "value": "24454899.0"
+ }
+ ],
+ "collectedfrom": {
+ "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
+ "value": "Repository B"
+ },
+ "processingchargeamount": {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "value": "1022.02"
+ },
+ "instancetype": {
+ "classid": "0004",
+ "classname": "Conference object",
+ "schemeid": "dnet:publication_resource",
+ "schemename": "dnet:publication_resource"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json
new file mode 100644
index 000000000..967c1181b
--- /dev/null
+++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/schema/oaf/utils/dataset_delegated.json
@@ -0,0 +1,140 @@
+{
+ "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
+ "resuttype": {"classid": "dataset"},
+ "pid": [
+ {
+ "qualifier": {"classid": "doi"},
+ "value": "10.1016/j.cmet.2011.03.013"
+ },
+ {
+ "qualifier": {"classid": "urn"},
+ "value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
+ },
+ {
+ "qualifier": {"classid": "scp-number"},
+ "value": "79953761260"
+ },
+ {
+ "qualifier": {"classid": "pmc"},
+ "value": "21459329"
+ }
+ ],
+ "collectedfrom": [
+ {
+ "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+ "value": "Zenodo"
+ }
+ ],
+ "instance": [
+ {
+ "refereed": {
+ "classid": "0000",
+ "classname": "UNKNOWN",
+ "schemeid": "dnet:review_levels",
+ "schemename": "dnet:review_levels"
+ },
+ "hostedby": {
+ "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+ "value": "Zenodo"
+ },
+ "accessright": {
+ "classid": "OPEN",
+ "classname": "Open Access",
+ "schemeid": "dnet:access_modes",
+ "schemename": "dnet:access_modes"
+ },
+ "processingchargecurrency": {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "value": "EUR"
+ },
+ "pid": [
+ {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "qualifier": {
+ "classid": "doi",
+ "classname": "Digital Object Identifier",
+ "schemeid": "dnet:pid_types",
+ "schemename": "dnet:pid_types"
+ },
+ "value": "10.1371/journal.pone.0085605"
+ }
+ ],
+ "distributionlocation": "",
+ "url": ["https://doi.org/10.1371/journal.pone.0085605"],
+ "alternateIdentifier": [
+ {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "qualifier": {
+ "classid": "pmid",
+ "classname": "PubMed ID",
+ "schemeid": "dnet:pid_types",
+ "schemename": "dnet:pid_types"
+ },
+ "value": "24454899.0"
+ }
+ ],
+ "collectedfrom": {
+ "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
+ "value": "Zenodo"
+ },
+ "processingchargeamount": {
+ "dataInfo": {
+ "provenanceaction": {
+ "classid": "sysimport:crosswalk:datasetarchive",
+ "classname": "Harvested",
+ "schemeid": "dnet:provenanceActions",
+ "schemename": "dnet:provenanceActions"
+ },
+ "deletedbyinference": false,
+ "inferred": false,
+ "inferenceprovenance": "",
+ "invisible": true,
+ "trust": "0.9"
+ },
+ "value": "1022.02"
+ },
+ "instancetype": {
+ "classid": "0004",
+ "classname": "Conference object",
+ "schemeid": "dnet:publication_resource",
+ "schemename": "dnet:publication_resource"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/dhp-pace-core/pom.xml b/dhp-pace-core/pom.xml
new file mode 100644
index 000000000..fd7f44fc9
--- /dev/null
+++ b/dhp-pace-core/pom.xml
@@ -0,0 +1,110 @@
+
+
+
+ 4.0.0
+
+
+ eu.dnetlib.dhp
+ dhp
+ 1.2.5-SNAPSHOT
+ ../pom.xml
+
+
+ eu.dnetlib.dhp
+ dhp-pace-core
+ 1.2.5-SNAPSHOT
+ jar
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ ${net.alchim31.maven.version}
+
+
+ scala-compile-first
+ initialize
+
+ add-source
+ compile
+
+
+
+ scala-test-compile
+ process-test-resources
+
+ testCompile
+
+
+
+
+ true
+ ${scala.binary.version}
+ ${scala.version}
+
+
+
+
+
+
+
+
+ edu.cmu
+ secondstring
+
+
+ com.google.guava
+ guava
+
+
+ com.google.code.gson
+ gson
+
+
+ org.apache.commons
+ commons-lang3
+
+
+ commons-io
+ commons-io
+
+
+ org.antlr
+ stringtemplate
+
+
+ commons-logging
+ commons-logging
+
+
+ org.reflections
+ reflections
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ org.apache.commons
+ commons-math3
+
+
+ com.jayway.jsonpath
+ json-path
+
+
+ com.ibm.icu
+ icu4j
+
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+
+
+
+
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
new file mode 100644
index 000000000..e971ec5bb
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java
@@ -0,0 +1,59 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.pace.common.AbstractPaceFunctions;
+import eu.dnetlib.pace.config.Config;
+
+public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction {
+
+ protected Map params;
+
+ public AbstractClusteringFunction(final Map params) {
+ this.params = params;
+ }
+
+ protected abstract Collection doApply(Config conf, String s);
+
+ @Override
+ public Collection apply(Config conf, List fields) {
+ return fields
+ .stream()
+ .filter(f -> !f.isEmpty())
+ .map(s -> normalize(s))
+ .map(s -> filterAllStopWords(s))
+ .map(s -> doApply(conf, s))
+ .map(c -> filterBlacklisted(c, ngramBlacklist))
+ .flatMap(c -> c.stream())
+ .filter(StringUtils::isNotBlank)
+ .collect(Collectors.toCollection(HashSet::new));
+ }
+
+ public Map getParams() {
+ return params;
+ }
+
+ protected Integer param(String name) {
+ Object val = params.get(name);
+ if (val == null)
+ return null;
+ if (val instanceof Number) {
+ return ((Number) val).intValue();
+ }
+ return Integer.parseInt(val.toString());
+ }
+
+ protected int paramOrDefault(String name, int i) {
+ Integer res = param(name);
+ if (res == null)
+ res = i;
+ return res;
+ }
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java
new file mode 100644
index 000000000..b5db27106
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java
@@ -0,0 +1,51 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+import java.util.StringTokenizer;
+
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("acronyms")
+public class Acronyms extends AbstractClusteringFunction {
+
+ public Acronyms(Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+ return extractAcronyms(s, param("max"), param("minLen"), param("maxLen"));
+ }
+
+ private Set extractAcronyms(final String s, int maxAcronyms, int minLen, int maxLen) {
+
+ final Set acronyms = Sets.newLinkedHashSet();
+
+ for (int i = 0; i < maxAcronyms; i++) {
+
+ final StringTokenizer st = new StringTokenizer(s);
+ final StringBuilder sb = new StringBuilder();
+
+ while (st.hasMoreTokens()) {
+ final String token = st.nextToken();
+ if (sb.length() > maxLen) {
+ break;
+ }
+ if (token.length() > 1 && i < token.length()) {
+ sb.append(token.charAt(i));
+ }
+ }
+ String acronym = sb.toString();
+ if (acronym.length() > minLen) {
+ acronyms.add(acronym);
+ }
+ }
+ return acronyms;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringClass.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringClass.java
new file mode 100644
index 000000000..3bb845b15
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringClass.java
@@ -0,0 +1,14 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface ClusteringClass {
+
+ public String value();
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java
new file mode 100644
index 000000000..269de867d
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java
@@ -0,0 +1,16 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import eu.dnetlib.pace.config.Config;
+
+public interface ClusteringFunction {
+
+ public Collection apply(Config config, List fields);
+
+ public Map getParams();
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java
new file mode 100644
index 000000000..cbfcde266
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java
@@ -0,0 +1,28 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("immutablefieldvalue")
+public class ImmutableFieldValue extends AbstractClusteringFunction {
+
+ public ImmutableFieldValue(final Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(final Config conf, final String s) {
+ final List res = Lists.newArrayList();
+
+ res.add(s);
+
+ return res;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java
new file mode 100644
index 000000000..e00092bd0
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java
@@ -0,0 +1,69 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.jayway.jsonpath.Configuration;
+import com.jayway.jsonpath.DocumentContext;
+import com.jayway.jsonpath.JsonPath;
+import com.jayway.jsonpath.Option;
+
+import eu.dnetlib.pace.common.AbstractPaceFunctions;
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.util.MapDocumentUtil;
+
+@ClusteringClass("jsonlistclustering")
+public class JSONListClustering extends AbstractPaceFunctions implements ClusteringFunction {
+
+ private Map params;
+
+ public JSONListClustering(Map params) {
+ this.params = params;
+ }
+
+ @Override
+ public Map getParams() {
+ return params;
+ }
+
+ @Override
+ public Collection apply(Config conf, List fields) {
+ return fields
+ .stream()
+ .filter(f -> !f.isEmpty())
+ .map(s -> doApply(conf, s))
+ .filter(StringUtils::isNotBlank)
+ .collect(Collectors.toCollection(HashSet::new));
+ }
+
+ private String doApply(Config conf, String json) {
+ StringBuilder st = new StringBuilder(); // to build the string used for comparisons basing on the jpath into
+ // parameters
+ final DocumentContext documentContext = JsonPath
+ .using(Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS))
+ .parse(json);
+
+ // for each path in the param list
+ for (String key : params.keySet().stream().filter(k -> k.contains("jpath")).collect(Collectors.toList())) {
+ String path = params.get(key).toString();
+ String value = MapDocumentUtil.getJPathString(path, documentContext);
+ if (value == null || value.isEmpty())
+ value = "";
+ st.append(value);
+ st.append(" ");
+ }
+
+ st.setLength(st.length() - 1);
+
+ if (StringUtils.isBlank(st)) {
+ return "1";
+ }
+ return st.toString();
+ }
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java
new file mode 100644
index 000000000..fdd8d1fb1
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java
@@ -0,0 +1,54 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("keywordsclustering")
+public class KeywordsClustering extends AbstractClusteringFunction {
+
+ public KeywordsClustering(Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(final Config conf, String s) {
+
+ // takes city codes and keywords codes without duplicates
+ Set keywords = getKeywords(s, conf.translationMap(), paramOrDefault("windowSize", 4));
+ Set cities = getCities(s, paramOrDefault("windowSize", 4));
+
+ // list of combination to return as result
+ final Collection combinations = new LinkedHashSet();
+
+ for (String keyword : keywordsToCodes(keywords, conf.translationMap())) {
+ for (String city : citiesToCodes(cities)) {
+ combinations.add(keyword + "-" + city);
+ if (combinations.size() >= paramOrDefault("max", 2)) {
+ return combinations;
+ }
+ }
+ }
+
+ return combinations;
+ }
+
+ @Override
+ public Collection apply(final Config conf, List fields) {
+ return fields
+ .stream()
+ .filter(f -> !f.isEmpty())
+ .map(KeywordsClustering::cleanup)
+ .map(KeywordsClustering::normalize)
+ .map(s -> filterAllStopWords(s))
+ .map(s -> doApply(conf, s))
+ .map(c -> filterBlacklisted(c, ngramBlacklist))
+ .flatMap(c -> c.stream())
+ .filter(StringUtils::isNotBlank)
+ .collect(Collectors.toCollection(HashSet::new));
+ }
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java
new file mode 100644
index 000000000..9692f5762
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java
@@ -0,0 +1,78 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.model.Person;
+
+@ClusteringClass("lnfi")
+public class LastNameFirstInitial extends AbstractClusteringFunction {
+
+ private boolean DEFAULT_AGGRESSIVE = true;
+
+ public LastNameFirstInitial(final Map params) {
+ super(params);
+ }
+
+ @Override
+ public Collection apply(Config conf, List fields) {
+ return fields
+ .stream()
+ .filter(f -> !f.isEmpty())
+ .map(LastNameFirstInitial::normalize)
+ .map(s -> doApply(conf, s))
+ .map(c -> filterBlacklisted(c, ngramBlacklist))
+ .flatMap(c -> c.stream())
+ .filter(StringUtils::isNotBlank)
+ .collect(Collectors.toCollection(HashSet::new));
+ }
+
+ public static String normalize(final String s) {
+ return fixAliases(transliterate(nfd(unicodeNormalization(s))))
+ // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input
+ // strings
+ .replaceAll("[^ \\w]+", "")
+ .replaceAll("(\\p{InCombiningDiacriticalMarks})+", "")
+ .replaceAll("(\\p{Punct})+", " ")
+ .replaceAll("(\\d)+", " ")
+ .replaceAll("(\\n)+", " ")
+ .trim();
+ }
+
+ @Override
+ protected Collection doApply(final Config conf, final String s) {
+
+ final List res = Lists.newArrayList();
+
+ final boolean aggressive = (Boolean) (getParams().containsKey("aggressive") ? getParams().get("aggressive")
+ : DEFAULT_AGGRESSIVE);
+
+ Person p = new Person(s, aggressive);
+
+ if (p.isAccurate()) {
+ String lastName = p.getNormalisedSurname().toLowerCase();
+ String firstInitial = p.getNormalisedFirstName().toLowerCase().substring(0, 1);
+
+ res.add(firstInitial.concat(lastName));
+ } else { // is not accurate, meaning it has no defined name and surname
+ List fullname = Arrays.asList(p.getNormalisedFullname().split(" "));
+ if (fullname.size() == 1) {
+ res.add(p.getNormalisedFullname().toLowerCase());
+ } else if (fullname.size() == 2) {
+ res.add(fullname.get(0).substring(0, 1).concat(fullname.get(1)).toLowerCase());
+ res.add(fullname.get(1).substring(0, 1).concat(fullname.get(0)).toLowerCase());
+ } else {
+ res.add(fullname.get(0).substring(0, 1).concat(fullname.get(fullname.size() - 1)).toLowerCase());
+ res.add(fullname.get(fullname.size() - 1).substring(0, 1).concat(fullname.get(0)).toLowerCase());
+ }
+ }
+
+ return res;
+ }
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
new file mode 100644
index 000000000..807f41dd5
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java
@@ -0,0 +1,38 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("lowercase")
+public class LowercaseClustering extends AbstractClusteringFunction {
+
+ public LowercaseClustering(final Map params) {
+ super(params);
+ }
+
+ @Override
+ public Collection apply(Config conf, List fields) {
+ Collection c = Sets.newLinkedHashSet();
+ for (String f : fields) {
+ c.addAll(doApply(conf, f));
+ }
+ return c;
+ }
+
+ @Override
+ protected Collection doApply(final Config conf, final String s) {
+ if (StringUtils.isBlank(s)) {
+ return Lists.newArrayList();
+ }
+ return Lists.newArrayList(s.toLowerCase().trim());
+ }
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java
new file mode 100644
index 000000000..6ee80b86e
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java
@@ -0,0 +1,24 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Set;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.pace.common.AbstractPaceFunctions;
+
+public class NGramUtils extends AbstractPaceFunctions {
+ static private final NGramUtils NGRAMUTILS = new NGramUtils();
+
+ private static final int SIZE = 100;
+
+ private static final Set stopwords = AbstractPaceFunctions
+ .loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
+
+ public static String cleanupForOrdering(String s) {
+ return (NGRAMUTILS.filterStopWords(NGRAMUTILS.normalize(s), stopwords) + StringUtils.repeat(" ", SIZE))
+ .substring(0, SIZE)
+ .replaceAll(" ", "");
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java
new file mode 100644
index 000000000..bcc9667a8
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java
@@ -0,0 +1,41 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("ngrampairs")
+public class NgramPairs extends Ngrams {
+
+ public NgramPairs(Map params) {
+ super(params, false);
+ }
+
+ public NgramPairs(Map params, boolean sorted) {
+ super(params, sorted);
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+ return ngramPairs(Lists.newArrayList(getNgrams(s, param("ngramLen"), param("max") * 2, 1, 2)), param("max"));
+ }
+
+ protected Collection ngramPairs(final List ngrams, int maxNgrams) {
+ Collection res = Lists.newArrayList();
+ int j = 0;
+ for (int i = 0; i < ngrams.size() && res.size() < maxNgrams; i++) {
+ if (++j >= ngrams.size()) {
+ break;
+ }
+ res.add(ngrams.get(i) + ngrams.get(j));
+ // System.out.println("-- " + concatNgrams);
+ }
+ return res;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java
new file mode 100644
index 000000000..7b862c729
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java
@@ -0,0 +1,52 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.*;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("ngrams")
+public class Ngrams extends AbstractClusteringFunction {
+
+ private final boolean sorted;
+
+ public Ngrams(Map params) {
+ this(params, false);
+ }
+
+ public Ngrams(Map params, boolean sorted) {
+ super(params);
+ this.sorted = sorted;
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+ return getNgrams(s, param("ngramLen"), param("max"), param("maxPerToken"), param("minNgramLen"));
+ }
+
+ protected Collection getNgrams(String s, int ngramLen, int max, int maxPerToken, int minNgramLen) {
+
+ final Collection ngrams = sorted ? new TreeSet<>() : new LinkedHashSet();
+ final StringTokenizer st = new StringTokenizer(s);
+
+ while (st.hasMoreTokens()) {
+ final String token = st.nextToken();
+ if (!token.isEmpty()) {
+ for (int i = 0; i < maxPerToken && ngramLen + i <= token.length(); i++) {
+ String ngram = token.substring(i, Math.min(ngramLen + i, token.length())).trim();
+
+ if (ngram.length() >= minNgramLen) {
+ ngrams.add(ngram);
+
+ if (ngrams.size() >= max) {
+ return ngrams;
+ }
+ }
+ }
+ }
+ }
+ // System.out.println(ngrams + " n: " + ngrams.size());
+ return ngrams;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java
new file mode 100644
index 000000000..f1d1e17b9
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java
@@ -0,0 +1,113 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("numAuthorsTitleSuffixPrefixChain")
+public class NumAuthorsTitleSuffixPrefixChain extends AbstractClusteringFunction {
+
+ public NumAuthorsTitleSuffixPrefixChain(Map params) {
+ super(params);
+ }
+
+ @Override
+ public Collection apply(Config conf, List fields) {
+
+ try {
+ int num_authors = Math.min(Integer.parseInt(fields.get(0)), 21); // SIZE threshold is 20, +1
+
+ if (num_authors > 0) {
+ return super.apply(conf, fields.subList(1, fields.size()))
+ .stream()
+ .map(s -> num_authors + "-" + s)
+ .collect(Collectors.toList());
+ }
+ } catch (NumberFormatException e) {
+ // missing or null authors array
+ }
+
+ return Collections.emptyList();
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+ return suffixPrefixChain(cleanup(s), param("mod"));
+ }
+
+ private Collection suffixPrefixChain(String s, int mod) {
+ // create the list of words from the string (remove short words)
+ List wordsList = Arrays
+ .stream(s.split(" "))
+ .filter(si -> si.length() > 3)
+ .collect(Collectors.toList());
+
+ final int words = wordsList.size();
+ final int letters = s.length();
+
+ // create the prefix: number of words + number of letters/mod
+ String prefix = words / mod + "-";
+
+ return doSuffixPrefixChain(wordsList, prefix);
+
+ }
+
+ private Collection doSuffixPrefixChain(List wordsList, String prefix) {
+
+ Set set = Sets.newLinkedHashSet();
+ switch (wordsList.size()) {
+ case 0:
+ break;
+ case 1:
+ set.add(wordsList.get(0));
+ break;
+ case 2:
+ set
+ .add(
+ prefix +
+ suffix(wordsList.get(0), 3) +
+ prefix(wordsList.get(1), 3));
+
+ set
+ .add(
+ prefix +
+ prefix(wordsList.get(0), 3) +
+ suffix(wordsList.get(1), 3));
+
+ break;
+ default:
+ set
+ .add(
+ prefix +
+ suffix(wordsList.get(0), 3) +
+ prefix(wordsList.get(1), 3) +
+ suffix(wordsList.get(2), 3));
+
+ set
+ .add(
+ prefix +
+ prefix(wordsList.get(0), 3) +
+ suffix(wordsList.get(1), 3) +
+ prefix(wordsList.get(2), 3));
+ break;
+ }
+
+ return set;
+
+ }
+
+ private String suffix(String s, int len) {
+ return s.substring(s.length() - len);
+ }
+
+ private String prefix(String s, int len) {
+ return s.substring(0, len);
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
new file mode 100644
index 000000000..91b51bebb
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java
@@ -0,0 +1,84 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.pace.common.AbstractPaceFunctions;
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.model.Person;
+
+@ClusteringClass("personClustering")
+public class PersonClustering extends AbstractPaceFunctions implements ClusteringFunction {
+
+ private Map params;
+
+ private static final int MAX_TOKENS = 5;
+
+ public PersonClustering(final Map params) {
+ this.params = params;
+ }
+
+ @Override
+ public Collection apply(final Config conf, final List fields) {
+ final Set hashes = Sets.newHashSet();
+
+ for (final String f : fields) {
+
+ final Person person = new Person(f, false);
+
+ if (StringUtils.isNotBlank(person.getNormalisedFirstName())
+ && StringUtils.isNotBlank(person.getNormalisedSurname())) {
+ hashes.add(firstLC(person.getNormalisedFirstName()) + person.getNormalisedSurname().toLowerCase());
+ } else {
+ for (final String token1 : tokens(f, MAX_TOKENS)) {
+ for (final String token2 : tokens(f, MAX_TOKENS)) {
+ if (!token1.equals(token2)) {
+ hashes.add(firstLC(token1) + token2);
+ }
+ }
+ }
+ }
+ }
+
+ return hashes;
+ }
+
+// @Override
+// public Collection apply(final List fields) {
+// final Set hashes = Sets.newHashSet();
+//
+// for (final Field f : fields) {
+//
+// final GTAuthor gta = GTAuthor.fromOafJson(f.stringValue());
+//
+// final Author a = gta.getAuthor();
+//
+// if (StringUtils.isNotBlank(a.getFirstname()) && StringUtils.isNotBlank(a.getSecondnames())) {
+// hashes.add(firstLC(a.getFirstname()) + a.getSecondnames().toLowerCase());
+// } else {
+// for (final String token1 : tokens(f.stringValue(), MAX_TOKENS)) {
+// for (final String token2 : tokens(f.stringValue(), MAX_TOKENS)) {
+// if (!token1.equals(token2)) {
+// hashes.add(firstLC(token1) + token2);
+// }
+// }
+// }
+// }
+// }
+//
+// return hashes;
+// }
+
+ @Override
+ public Map getParams() {
+ return params;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java
new file mode 100644
index 000000000..09a112c37
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java
@@ -0,0 +1,34 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.pace.config.Config;
+import eu.dnetlib.pace.model.Person;
+
+@ClusteringClass("personHash")
+public class PersonHash extends AbstractClusteringFunction {
+
+ private boolean DEFAULT_AGGRESSIVE = false;
+
+ public PersonHash(final Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(final Config conf, final String s) {
+ final List res = Lists.newArrayList();
+
+ final boolean aggressive = (Boolean) (getParams().containsKey("aggressive") ? getParams().get("aggressive")
+ : DEFAULT_AGGRESSIVE);
+
+ res.add(new Person(s, aggressive).hash());
+
+ return res;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java
new file mode 100644
index 000000000..3733dfc74
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java
@@ -0,0 +1,20 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.Map;
+
+import eu.dnetlib.pace.config.Config;
+
+public class RandomClusteringFunction extends AbstractClusteringFunction {
+
+ public RandomClusteringFunction(Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(final Config conf, String s) {
+ return null;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java
new file mode 100644
index 000000000..ca1b4189b
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java
@@ -0,0 +1,34 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("sortedngrampairs")
+public class SortedNgramPairs extends NgramPairs {
+
+ public SortedNgramPairs(Map params) {
+ super(params, false);
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+
+ final List tokens = Lists.newArrayList(Splitter.on(" ").omitEmptyStrings().trimResults().split(s));
+
+ Collections.sort(tokens);
+
+ return ngramPairs(
+ Lists.newArrayList(getNgrams(Joiner.on(" ").join(tokens), param("ngramLen"), param("max") * 2, 1, 2)),
+ param("max"));
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
new file mode 100644
index 000000000..048380f7e
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java
@@ -0,0 +1,34 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.Lists;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("spacetrimmingfieldvalue")
+public class SpaceTrimmingFieldValue extends AbstractClusteringFunction {
+
+ public SpaceTrimmingFieldValue(final Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(final Config conf, final String s) {
+ final List res = Lists.newArrayList();
+
+ res
+ .add(
+ StringUtils.isBlank(s) ? RandomStringUtils.random(param("randomLength"))
+ : s.toLowerCase().replaceAll("\\s+", ""));
+
+ return res;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java
new file mode 100644
index 000000000..b6921e9f1
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java
@@ -0,0 +1,42 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("suffixprefix")
+public class SuffixPrefix extends AbstractClusteringFunction {
+
+ public SuffixPrefix(Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+ return suffixPrefix(s, param("len"), param("max"));
+ }
+
+ private Collection suffixPrefix(String s, int len, int max) {
+ final Set bigrams = Sets.newLinkedHashSet();
+ int i = 0;
+ while (++i < s.length() && bigrams.size() < max) {
+ int j = s.indexOf(" ", i);
+
+ int offset = j + len + 1 < s.length() ? j + len + 1 : s.length();
+
+ if (j - len > 0) {
+ String bigram = s.substring(j - len, offset).replaceAll(" ", "").trim();
+ if (bigram.length() >= 4) {
+ bigrams.add(bigram);
+ }
+ }
+ }
+ return bigrams;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java
new file mode 100644
index 000000000..34f41085b
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java
@@ -0,0 +1,52 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import eu.dnetlib.pace.common.AbstractPaceFunctions;
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("urlclustering")
+public class UrlClustering extends AbstractPaceFunctions implements ClusteringFunction {
+
+ protected Map params;
+
+ public UrlClustering(final Map params) {
+ this.params = params;
+ }
+
+ @Override
+ public Map getParams() {
+ return params;
+ }
+
+ @Override
+ public Collection apply(final Config conf, List fields) {
+ try {
+ return fields
+ .stream()
+ .filter(f -> !f.isEmpty())
+ .map(this::asUrl)
+ .map(URL::getHost)
+ .collect(Collectors.toCollection(HashSet::new));
+ } catch (IllegalStateException e) {
+ return new HashSet<>();
+ }
+ }
+
+ private URL asUrl(String value) {
+ try {
+ return new URL(value);
+ } catch (MalformedURLException e) {
+ // should not happen as checked by pace typing
+ throw new IllegalStateException("invalid URL: " + value);
+ }
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java
new file mode 100644
index 000000000..22351cf8f
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java
@@ -0,0 +1,91 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("wordsStatsSuffixPrefixChain")
+public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction {
+
+ public WordsStatsSuffixPrefixChain(Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+ return suffixPrefixChain(s, param("mod"));
+ }
+
+ private Collection suffixPrefixChain(String s, int mod) {
+
+ // create the list of words from the string (remove short words)
+ List wordsList = Arrays
+ .stream(s.split(" "))
+ .filter(si -> si.length() > 3)
+ .collect(Collectors.toList());
+
+ final int words = wordsList.size();
+ final int letters = s.length();
+
+ // create the prefix: number of words + number of letters/mod
+ String prefix = words + "-" + letters / mod + "-";
+
+ return doSuffixPrefixChain(wordsList, prefix);
+
+ }
+
+ private Collection doSuffixPrefixChain(List wordsList, String prefix) {
+
+ Set set = Sets.newLinkedHashSet();
+ switch (wordsList.size()) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ set
+ .add(
+ prefix +
+ suffix(wordsList.get(0), 3) +
+ prefix(wordsList.get(1), 3));
+
+ set
+ .add(
+ prefix +
+ prefix(wordsList.get(0), 3) +
+ suffix(wordsList.get(1), 3));
+
+ break;
+ default:
+ set
+ .add(
+ prefix +
+ suffix(wordsList.get(0), 3) +
+ prefix(wordsList.get(1), 3) +
+ suffix(wordsList.get(2), 3));
+
+ set
+ .add(
+ prefix +
+ prefix(wordsList.get(0), 3) +
+ suffix(wordsList.get(1), 3) +
+ prefix(wordsList.get(2), 3));
+ break;
+ }
+
+ return set;
+
+ }
+
+ private String suffix(String s, int len) {
+ return s.substring(s.length() - len);
+ }
+
+ private String prefix(String s, int len) {
+ return s.substring(0, len);
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java
new file mode 100644
index 000000000..f9fef376b
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java
@@ -0,0 +1,59 @@
+
+package eu.dnetlib.pace.clustering;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+
+import com.google.common.collect.Sets;
+
+import eu.dnetlib.pace.config.Config;
+
+@ClusteringClass("wordssuffixprefix")
+public class WordsSuffixPrefix extends AbstractClusteringFunction {
+
+ public WordsSuffixPrefix(Map params) {
+ super(params);
+ }
+
+ @Override
+ protected Collection doApply(Config conf, String s) {
+ return suffixPrefix(s, param("len"), param("max"));
+ }
+
+ private Collection suffixPrefix(String s, int len, int max) {
+
+ final int words = s.split(" ").length;
+
+ // adjust the token length according to the number of words
+ switch (words) {
+ case 1:
+ return Sets.newLinkedHashSet();
+ case 2:
+ return doSuffixPrefix(s, len + 2, max, words);
+ case 3:
+ return doSuffixPrefix(s, len + 1, max, words);
+ default:
+ return doSuffixPrefix(s, len, max, words);
+ }
+ }
+
+ private Collection doSuffixPrefix(String s, int len, int max, int words) {
+ final Set bigrams = Sets.newLinkedHashSet();
+ int i = 0;
+ while (++i < s.length() && bigrams.size() < max) {
+ int j = s.indexOf(" ", i);
+
+ int offset = j + len + 1 < s.length() ? j + len + 1 : s.length();
+
+ if (j - len > 0) {
+ String bigram = s.substring(j - len, offset).replaceAll(" ", "").trim();
+ if (bigram.length() >= 4) {
+ bigrams.add(words + bigram);
+ }
+ }
+ }
+ return bigrams;
+ }
+
+}
diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
new file mode 100644
index 000000000..ba7639ada
--- /dev/null
+++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
@@ -0,0 +1,359 @@
+
+package eu.dnetlib.pace.common;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.nio.charset.StandardCharsets;
+import java.text.Normalizer;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Sets;
+import com.ibm.icu.text.Transliterator;
+
+import eu.dnetlib.pace.clustering.NGramUtils;
+
+/**
+ * Set of common functions for the framework
+ *
+ * @author claudio
+ */
+public class AbstractPaceFunctions {
+
+ // city map to be used when translating the city names into codes
+ private static Map cityMap = AbstractPaceFunctions
+ .loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv");
+
+ // list of stopwords in different languages
+ protected static Set stopwords_gr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_gr.txt");
+ protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt");
+ protected static Set stopwords_de = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_de.txt");
+ protected static Set stopwords_es = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_es.txt");
+ protected static Set stopwords_fr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_fr.txt");
+ protected static Set stopwords_it = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_it.txt");
+ protected static Set stopwords_pt = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_pt.txt");
+
+ // transliterator
+ protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
+
+ // blacklist of ngrams: to avoid generic keys
+ protected static Set